Skip to main content

dms/
lib.rs

1//! DMS parser — full spec v0.14.
2
3// Re-exported so downstream code can build meta tables without its own
4// `indexmap` dependency.
5pub use indexmap::IndexMap;
6
7// Tier-1 decoder/encoder skeleton. Public types match the contract
8// in `dms/TIER1.md` §"Document types" and §"AST shape"; the lexer
9// and parser additions are not yet implemented. Tier-0 callers can
10// ignore this module — `decode_document` / `encode` are unchanged.
11pub mod tier1;
12pub mod tier1_stream;
13
14// `fast_hash` feature swaps the std siphash for `foldhash::fast::FixedState`
15// in the `IndexMap` that backs `Value::Table` and the front-matter meta
16// table. Mirrors toml-rs's `fast_hash` switch — keeps DMS-side
17// apples-to-apples with TOML when both have the flag enabled.
18#[cfg(feature = "fast_hash")]
19type DmsHasher = foldhash::fast::FixedState;
20#[cfg(not(feature = "fast_hash"))]
21type DmsHasher = std::collections::hash_map::RandomState;
22
23/// `IndexMap` specialization used for body tables and front matter.
24/// Hasher is swappable via the `fast_hash` cargo feature.
25pub type DmsMap<V> = IndexMap<String, V, DmsHasher>;
26
27/// `HashMap` specialization used by **unordered** body tables, opt-in via
28/// `decode_document_unordered` / `decode_lite_document_unordered` (or the
29/// `--ignore-order` CLI flag). Iteration order is **arbitrary** — see
30/// SPEC §"Unordered tables". Reuses `DmsHasher` so the hasher is the
31/// same as the ordered side; the only difference is the lack of the
32/// `IndexMap`'s entry-order `Vec<usize>` bookkeeping.
33pub type DmsHashMap<V> = std::collections::HashMap<String, V, DmsHasher>;
34
35#[derive(Debug, Clone, PartialEq)]
36pub enum Value {
37    Bool(bool),
38    Integer(i64),
39    Float(f64),
40    String(String),
41    OffsetDateTime(String),
42    LocalDateTime(String),
43    LocalDate(String),
44    LocalTime(String),
45    /// Ordered table (insertion-order preserving). Built by the default
46    /// parser entry points.
47    Table(DmsMap<Value>),
48    /// Unordered table (arbitrary iteration order). Built only by the
49    /// `*_unordered` decoder entry points. `encode` (full mode) refuses
50    /// to round-trip a Document containing this variant; `encode_lite`
51    /// accepts it but emits keys in arbitrary order. See SPEC
52    /// §"Unordered tables".
53    UnorderedTable(DmsHashMap<Value>),
54    List(Vec<Value>),
55}
56
57#[derive(Debug, Clone)]
58pub struct DecodeError {
59    pub line: usize,
60    pub column: usize,
61    pub message: String,
62}
63
64impl std::fmt::Display for DecodeError {
65    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
66        write!(f, "{}:{}: {}", self.line, self.column, self.message)
67    }
68}
69
70impl std::error::Error for DecodeError {}
71
72/// Deprecated alias for [`DecodeError`]. Removed in v0.4.
73///
74/// SPEC v0.14 renamed `parse`/`to_dms` -> `decode`/`encode`; this type
75/// alias keeps `dms::ParseError` working for one release so downstream
76/// code keeps compiling (with a warning).
77#[deprecated(since = "0.3.0", note = "renamed to `DecodeError`")]
78pub type ParseError = DecodeError;
79
80/// Errors raised by the encode (emitter) path. Currently the only
81/// failure mode is full-mode emit refusing a `Document` that contains
82/// an unordered table — see SPEC §"Unordered tables". Lite-mode emit
83/// (`encode_lite`) cannot fail and accepts unordered Documents.
84#[derive(Debug, Clone, PartialEq, Eq)]
85pub enum EncodeError {
86    /// Full-mode `encode` was called with a Document whose body
87    /// contains a `Value::UnorderedTable` (built only by the
88    /// `*_unordered` decoder entry points). Round-trip emit needs a
89    /// stable iteration order; an unordered Document cannot satisfy
90    /// that. Use `encode_lite` instead, which accepts unordered
91    /// Documents but emits keys in arbitrary order.
92    UnorderedInFullMode,
93}
94
95impl std::fmt::Display for EncodeError {
96    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97        match self {
98            EncodeError::UnorderedInFullMode => write!(
99                f,
100                "encode (full-mode round-trip) refuses Document with \
101                 Value::UnorderedTable; unordered tables have arbitrary \
102                 iteration order — use encode_lite instead. \
103                 See SPEC §\"Unordered tables\"."
104            ),
105        }
106    }
107}
108
109impl std::error::Error for EncodeError {}
110
111/// A parsed DMS document — body value plus optional front matter table.
112#[derive(Debug, Clone, PartialEq)]
113pub struct Document {
114    /// Front matter table (user metadata only; reserved `_dms_*` keys are
115    /// consumed by the parser and do NOT appear here). `None` if the
116    /// source had no `+++` block.
117    pub meta: Option<DmsMap<Value>>,
118    /// The body — the rest of the document, parsed per the spec.
119    pub body: Value,
120    /// Comments captured during parsing, in source order, each tagged with
121    /// its attachment (path + position). Empty when the source had no
122    /// comments.
123    ///
124    /// Path-prefix convention: body-attached comments use bare paths
125    /// (rooted at the body value); front-matter-attached comments use a
126    /// path that begins with the special segment
127    /// `BreadcrumbSegment::Key("__fm__")` and continues with the path
128    /// inside the front-matter table. (We chose a single flat list with
129    /// a sentinel prefix — rather than a separate `meta_comments` field —
130    /// for simpler downstream iteration: a re-emitter that walks the
131    /// list once handles both contexts uniformly.)
132    pub comments: Vec<AttachedComment>,
133    /// Sparse `(path, OriginalLiteral)` entries — only nodes whose
134    /// surface form differs from the default (decimal-no-underscores
135    /// for ints, basic-quoted for strings) get an entry. Consulted by
136    /// `encode` to re-emit values in their decoded form. The
137    /// conformance JSON encoder ignores this field. Same `__fm__`
138    /// path-prefix convention as `comments`.
139    pub original_forms: Vec<(Vec<BreadcrumbSegment>, OriginalLiteral)>,
140}
141
142/// Whether a comment was a `#`/`//` line comment or a `### ###` / `/* */`
143/// block comment.
144#[derive(Debug, Clone, PartialEq, Eq)]
145pub enum CommentKind {
146    Line,
147    Block,
148}
149
150/// How a comment attaches to its target node.
151#[derive(Debug, Clone, PartialEq, Eq)]
152pub enum CommentPosition {
153    /// Comment(s) on lines preceding a sibling node (no blank line gap).
154    Leading,
155    /// `/* ... */` block comment(s) appearing between a key's `:` and
156    /// its value, or between a `+` and a list item's content. `Inner`
157    /// is `/* ... */`-only (line comments consume to EOL; `### ###`
158    /// block comments require the opener on its own line). See SPEC
159    /// §Attachment rules.
160    Inner,
161    /// Comment(s) on the same line as a value, after the value.
162    /// Multiple stack in source order; a `#`/`//` line comment, if
163    /// present, must come last.
164    Trailing,
165    /// Comment that did not attach to a sibling — separated by a blank
166    /// line, or left over at block close. Attaches to the enclosing
167    /// container.
168    Floating,
169}
170
171/// One captured comment.
172#[derive(Debug, Clone, PartialEq, Eq)]
173pub struct Comment {
174    /// Raw text of the comment, **including delimiters**:
175    /// `# foo` / `// foo` / `/* foo */` / `### LABEL\n...\nLABEL`.
176    pub content: String,
177    pub kind: CommentKind,
178}
179
180/// One step in a comment-attachment breadcrumb path.
181///
182/// `Ord` is derived so the emitter can sort path slices for binary
183/// search lookup (see `SortedIndex` in §encode emitter). The order
184/// itself is not normative — it only needs to be stable and total so
185/// that two passes of `encode` produce byte-identical output. The
186/// derived order is `Key < Index` (variant order), then lexicographic
187/// on the inner value.
188#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
189pub enum BreadcrumbSegment {
190    Key(String),
191    Index(usize),
192}
193
194/// A comment plus its attachment metadata.
195#[derive(Debug, Clone, PartialEq, Eq)]
196pub struct AttachedComment {
197    pub comment: Comment,
198    pub position: CommentPosition,
199    /// Path to the node the comment is attached to. Empty `[]` means
200    /// "the document root" (or the front-matter table root, given the
201    /// `__fm__` prefix convention).
202    pub path: Vec<BreadcrumbSegment>,
203}
204
205/// Per-node literal-form record, captured during decoding so that
206/// `encode` can re-emit the value in the same source form (integer
207/// base, string flavor, heredoc modifiers). Keyed off the node's path
208/// in `Document::original_forms`. Spec §encode.
209#[derive(Debug, Clone, PartialEq)]
210pub enum OriginalLiteral {
211    /// An integer literal, stored as its exact source lexeme:
212    /// `"0x1F40"`, `"0o755"`, `"0b1010_0110"`, `"1_000_000"`,
213    /// `"+42"`, `"-7"`, etc. Re-emitted verbatim.
214    Integer { lit: String },
215    /// A string value, stored with its original surface form (basic /
216    /// literal / heredoc-basic / heredoc-literal, plus heredoc label
217    /// and modifier list).
218    String { form: StringForm },
219}
220
221#[derive(Debug, Clone, PartialEq)]
222pub enum StringForm {
223    /// `"..."`
224    Basic,
225    /// `'...'`
226    Literal,
227    /// `"""[LABEL]...LABEL` or `'''[LABEL]...LABEL` (or unlabeled
228    /// `"""..."""` / `'''...'''`).
229    Heredoc {
230        flavor: HeredocFlavor,
231        /// `None` means unlabeled (terminator is `"""` / `'''`).
232        label: Option<String>,
233        /// Modifier calls in source order: `_trim(...)`,
234        /// `_fold_paragraphs()`, etc. Re-emitted on the opener line.
235        modifiers: Vec<HeredocModifierCall>,
236    },
237}
238
239#[derive(Debug, Clone, Copy, PartialEq, Eq)]
240pub enum HeredocFlavor {
241    BasicTriple,
242    LiteralTriple,
243}
244
245#[derive(Debug, Clone, PartialEq)]
246pub struct HeredocModifierCall {
247    pub name: String,
248    /// Pre-evaluated args (Strings, Integers, etc.). Re-rendered
249    /// using the same default-form emit rules as ordinary values
250    /// (integers as decimal, strings as basic-quoted).
251    pub args: Vec<Value>,
252}
253
254/// Parse mode — full preserves comments and original literal forms;
255/// lite skips that bookkeeping. See SPEC §Parsing modes — full and
256/// lite. Same grammar, same errors; only the round-trip metadata
257/// differs.
258#[derive(Debug, Clone, Copy, PartialEq, Eq)]
259pub enum ParseMode {
260    Full,
261    Lite,
262}
263
264/// Capability advertisement — this port ships lite-mode decode +
265/// lite-mode `encode`. See SPEC §Parsing modes — full and lite. Lite
266/// mode is optional to ship; callers can probe this constant before
267/// opting in via `decode_lite_document` / `encode_lite`.
268pub const SUPPORTS_LITE_MODE: bool = true;
269
270/// Capability advertisement — this port ships unordered-table decode
271/// mode. See SPEC §Unordered tables. Unordered mode is optional to
272/// ship; callers probe this before opting in via
273/// `decode_document_unordered` / `decode_lite_document_unordered`.
274pub const SUPPORTS_IGNORE_ORDER: bool = true;
275
276/// Decode a DMS source string and return only the body. Front matter,
277/// if present, is decoded and validated (illegal declarations error),
278/// then discarded. SPEC v0.14 canonical name (replaces `parse`).
279pub fn decode(src: &str) -> Result<Value, DecodeError> {
280    decode_document(src).map(|d| d.body)
281}
282
283/// Lite-mode body-only decode. Equivalent to
284/// `decode_lite_document(src)?.body`, but returns only the body. Use
285/// when you don't need front matter either.
286pub fn decode_lite(src: &str) -> Result<Value, DecodeError> {
287    decode_document_with_mode(src, ParseMode::Lite).map(|d| d.body)
288}
289
290/// Lite-mode `Document` decode. Body + front matter, no comment AST,
291/// no `original_forms`. The returned `Document` is **not** suitable
292/// for `encode` round-trip — see SPEC §Parsing modes — full and lite.
293pub fn decode_lite_document(src: &str) -> Result<Document, DecodeError> {
294    decode_document_with_mode(src, ParseMode::Lite)
295}
296
297/// Decode a full DMS document, returning body + front-matter metadata
298/// + attached comments.
299pub fn decode_document(src: &str) -> Result<Document, DecodeError> {
300    decode_document_with_options(src, ParseMode::Full, false)
301}
302
303/// Full-mode decode with the **unordered** table backing — every
304/// `Value::Table` is replaced by `Value::UnorderedTable` (a plain
305/// `HashMap`, no insertion-order tracking). See SPEC §"Unordered
306/// tables". Front matter remains ordered (per spec — meta is small and
307/// is consumed strictly).
308///
309/// **Important:** the resulting `Document` cannot be round-tripped via
310/// `encode` (full-mode emit). Use `encode_lite` if you need to
311/// re-serialize. `encode` will panic with a clear message on an
312/// unordered Document.
313pub fn decode_document_unordered(src: &str) -> Result<Document, DecodeError> {
314    decode_document_with_options(src, ParseMode::Full, true)
315}
316
317/// Lite-mode decode with the **unordered** table backing. The
318/// `(unordered, lite)` combo is the fastest read-only path: hash-only
319/// table backing + no comment/original-form bookkeeping. See SPEC
320/// §"Unordered tables".
321pub fn decode_lite_document_unordered(src: &str) -> Result<Document, DecodeError> {
322    decode_document_with_options(src, ParseMode::Lite, true)
323}
324
325/// Decode with explicit mode selection.
326pub fn decode_document_with_mode(src: &str, mode: ParseMode) -> Result<Document, DecodeError> {
327    decode_document_with_options(src, mode, false)
328}
329
330/// Decode with explicit mode and ordering selection. When
331/// `ignore_order=true`, body tables are built as `Value::UnorderedTable`
332/// (HashMap-backed); when false, as `Value::Table` (IndexMap-backed,
333/// insertion-order preserved). See SPEC §"Unordered tables".
334pub fn decode_document_with_options(
335    src: &str,
336    mode: ParseMode,
337    ignore_order: bool,
338) -> Result<Document, DecodeError> {
339    // SPEC §"UTF-8 only, NFC-normalized": DMS source is plain UTF-8 with
340    // no byte-order mark. A leading U+FEFF is not silently consumed —
341    // reject it explicitly so encoding mistakes surface loudly. (BOMs
342    // *inside* string/heredoc bodies are fine; this only fires at offset 0.)
343    if src.starts_with('\u{FEFF}') {
344        return Err(DecodeError {
345            line: 1,
346            column: 1,
347            message: "BOM (U+FEFF) at file start is not allowed; DMS source is plain UTF-8"
348                .to_string(),
349        });
350    }
351    // U+0000 is not allowed anywhere in DMS source (see SPEC §Strings).
352    // Reject up front so deeper code can assume NUL-free bytes.
353    if let Some(off) = src.find('\0') {
354        let prefix = &src[..off];
355        let line = 1 + prefix.bytes().filter(|&b| b == b'\n').count();
356        let last_nl = prefix.rfind('\n').map(|i| i + 1).unwrap_or(0);
357        let col = off - last_nl + 1;
358        return Err(DecodeError {
359            line,
360            column: col,
361            message: "U+0000 (NUL) is not allowed in DMS source".to_string(),
362        });
363    }
364    // SPEC §Unicode normalization: NFC the source before tokenization so
365    // bare keys in decomposed form get accepted, and so equivalent
366    // spellings collide on the duplicate-key check. Idempotent for
367    // already-NFC input.
368    let normalized = nfc_normalize(src);
369    let mut p = Parser::new_with_mode(&normalized, mode);
370    p.ignore_order = ignore_order;
371    let meta = p.parse_front_matter()?;
372    let body = p.parse_body()?;
373    Ok(Document {
374        meta,
375        body,
376        comments: p.comments,
377        original_forms: p.original_forms,
378    })
379}
380
381/// Tier-1-aware decode entry point used by `tier1::decode_t1`.
382///
383/// Identical to `decode_document_with_options(src, Full, false)` except
384/// that `_dms_tier: 1` in front matter is accepted rather than rejected.
385/// Returns the decoded `Document` together with the observed tier value
386/// (`0` when no `_dms_tier` key was present or when `_dms_tier: 0`;
387/// `1` when `_dms_tier: 1` was seen). See TIER1.md §"Front matter
388/// additions" for the contract.
389pub(crate) fn decode_document_accepting_tier1(
390    src: &str,
391) -> Result<
392    (
393        Document,
394        u32,
395        Vec<(Vec<BreadcrumbSegment>, tier1::DecorationPosition, tier1::DecoratorCall)>,
396        Vec<Vec<BreadcrumbSegment>>,
397    ),
398    DecodeError,
399> {
400    if src.starts_with('\u{FEFF}') {
401        return Err(DecodeError {
402            line: 1,
403            column: 1,
404            message: "BOM (U+FEFF) at file start is not allowed; DMS source is plain UTF-8"
405                .to_string(),
406        });
407    }
408    if let Some(off) = src.find('\0') {
409        let prefix = &src[..off];
410        let line = 1 + prefix.bytes().filter(|&b| b == b'\n').count();
411        let last_nl = prefix.rfind('\n').map(|i| i + 1).unwrap_or(0);
412        let col = off - last_nl + 1;
413        return Err(DecodeError {
414            line,
415            column: col,
416            message: "U+0000 (NUL) is not allowed in DMS source".to_string(),
417        });
418    }
419    let normalized = nfc_normalize(src);
420    let mut p = Parser::new_with_mode(&normalized, ParseMode::Full);
421    p.accept_tier1 = true;
422    let meta = p.parse_front_matter()?;
423    let tier = p.observed_tier;
424    let body = p.parse_body()?;
425    let raw_decorations = p.decorations_raw;
426    let decoration_only_paths = p.decoration_only_paths;
427    Ok((
428        Document {
429            meta,
430            body,
431            comments: p.comments,
432            original_forms: p.original_forms,
433        },
434        tier,
435        raw_decorations,
436        decoration_only_paths,
437    ))
438}
439
440/// Parse a bare value string with tier-1 decoration capture active.
441///
442/// Used by `tier1::parse_param_group` to parse the interior of a
443/// param-group bracket (`{...}` for named, `[...]` for positional)
444/// while capturing any nested decorator calls on the values.
445///
446/// `src` must be a complete value expression (table literal, list
447/// literal, etc.) with no surrounding front matter. Decorations found
448/// inside are returned in the second element of the tuple; paths are
449/// rooted at the parsed value's own shape (e.g. `[Key("label")]` for
450/// `{label: |inner() "v"}`).
451///
452/// Errors if there is trailing content after the value.
453pub(crate) fn parse_value_t1(
454    src: &str,
455) -> Result<
456    (
457        Value,
458        Vec<(Vec<BreadcrumbSegment>, tier1::DecorationPosition, tier1::DecoratorCall)>,
459    ),
460    DecodeError,
461> {
462    let mut p = Parser::new_with_mode(src, ParseMode::Full);
463    p.accept_tier1 = true;
464    p.observed_tier = 1;
465    let v = p.parse_body()?;
466    // parse_body already checks for trailing content.
467    Ok((v, p.decorations_raw))
468}
469
470/// Front-matter-only decode. Scans leading trivia, the opening `+++`,
471/// the front-matter contents, and the closing `+++`, then stops — body
472/// bytes are NOT tokenized. Required at SPEC tier 0; see SPEC
473/// §"Front-matter-only decode" for the full contract.
474///
475/// Return value:
476///
477/// - `Ok(None)` — the document has no front matter at all (no opening
478///   `+++` after leading trivia).
479/// - `Ok(Some(Value::Table(map)))` — front matter was present. `map` is
480///   empty when the source carried `+++\n+++\n` with no inner keys, so
481///   callers can distinguish "no front matter" from "present-but-empty".
482/// - `Err(DecodeError)` — every front-matter rule still applies:
483///   open/close on their own lines, unterminated front matter is an
484///   error, the `_`-prefix namespace is enforced (`_dms_tier` is
485///   type-checked, unknown reserved keys rejected). Diagnostics inside
486///   the `+++ ... +++` block are byte-identical to a full decode.
487///
488/// Errors that only manifest in the body (duplicate body keys,
489/// unterminated body heredoc, …) are NOT surfaced — that's the entire
490/// point of this entry point. Callers needing whole-document validation
491/// must use [`decode_document`].
492///
493/// Runs in lite mode (no comment AST, no `original_forms`).
494pub fn decode_front_matter(src: &str) -> Result<Option<Value>, DecodeError> {
495    // Source preprocessing — same as `decode_document_with_options` so
496    // diagnostics stay byte-identical between front-matter-only and
497    // full decode for sources whose only error is in the FM block.
498    if src.starts_with('\u{FEFF}') {
499        return Err(DecodeError {
500            line: 1,
501            column: 1,
502            message: "BOM (U+FEFF) at file start is not allowed; DMS source is plain UTF-8"
503                .to_string(),
504        });
505    }
506    if let Some(off) = src.find('\0') {
507        let prefix = &src[..off];
508        let line = 1 + prefix.bytes().filter(|&b| b == b'\n').count();
509        let last_nl = prefix.rfind('\n').map(|i| i + 1).unwrap_or(0);
510        let col = off - last_nl + 1;
511        return Err(DecodeError {
512            line,
513            column: col,
514            message: "U+0000 (NUL) is not allowed in DMS source".to_string(),
515        });
516    }
517    let normalized = nfc_normalize(src);
518    let mut p = Parser::new_with_mode(&normalized, ParseMode::Lite);
519    let meta = p.parse_front_matter()?;
520    // Note: we deliberately do NOT call p.parse_body() — body bytes
521    // remain untokenized, per SPEC §"Front-matter-only decode".
522    Ok(meta.map(Value::Table))
523}
524
525// ---------- deprecated `parse*` aliases (SPEC v0.14 rename) ----------
526//
527// SPEC v0.14 renamed the canonical entry points from `parse`/`to_dms`
528// to `decode`/`encode`. The old names live on for one release as
529// thin deprecated wrappers so existing downstream code keeps compiling
530// (with a warning) — slated for removal in v0.15. See PORTING.md
531// §"Migration from the `parse`/`to_dms` era".
532
533/// Deprecated alias for [`decode`]. Removed in v0.15.
534#[deprecated(since = "0.3.0", note = "use `decode` instead")]
535pub fn parse(src: &str) -> Result<Value, DecodeError> {
536    decode(src)
537}
538
539/// Deprecated alias for [`decode_lite`]. Removed in v0.15.
540#[deprecated(since = "0.3.0", note = "use `decode_lite` instead")]
541pub fn parse_lite(src: &str) -> Result<Value, DecodeError> {
542    decode_lite(src)
543}
544
545/// Deprecated alias for [`decode_lite_document`]. Removed in v0.15.
546#[deprecated(since = "0.3.0", note = "use `decode_lite_document` instead")]
547pub fn parse_lite_document(src: &str) -> Result<Document, DecodeError> {
548    decode_lite_document(src)
549}
550
551/// Deprecated alias for [`decode_document`]. Removed in v0.15.
552#[deprecated(since = "0.3.0", note = "use `decode_document` instead")]
553pub fn parse_document(src: &str) -> Result<Document, DecodeError> {
554    decode_document(src)
555}
556
557/// Deprecated alias for [`decode_document_unordered`]. Removed in v0.15.
558#[deprecated(since = "0.3.0", note = "use `decode_document_unordered` instead")]
559pub fn parse_document_unordered(src: &str) -> Result<Document, DecodeError> {
560    decode_document_unordered(src)
561}
562
563/// Deprecated alias for [`decode_lite_document_unordered`]. Removed in v0.15.
564#[deprecated(since = "0.3.0", note = "use `decode_lite_document_unordered` instead")]
565pub fn parse_lite_document_unordered(src: &str) -> Result<Document, DecodeError> {
566    decode_lite_document_unordered(src)
567}
568
569/// Deprecated alias for [`decode_document_with_mode`]. Removed in v0.15.
570#[deprecated(since = "0.3.0", note = "use `decode_document_with_mode` instead")]
571pub fn parse_document_with_mode(src: &str, mode: ParseMode) -> Result<Document, DecodeError> {
572    decode_document_with_mode(src, mode)
573}
574
575/// Deprecated alias for [`decode_document_with_options`]. Removed in v0.15.
576#[deprecated(since = "0.3.0", note = "use `decode_document_with_options` instead")]
577pub fn parse_document_with_options(
578    src: &str,
579    mode: ParseMode,
580    ignore_order: bool,
581) -> Result<Document, DecodeError> {
582    decode_document_with_options(src, mode, ignore_order)
583}
584
585/// NFC-normalize a string. Returns the original `String` allocation when
586/// the input is already NFC (the common case for ASCII-heavy sources),
587/// otherwise allocates the normalized form.
588fn nfc_normalize(s: &str) -> String {
589    use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization};
590    match is_nfc_quick(s.chars()) {
591        IsNormalized::Yes => s.to_string(),
592        _ => s.nfc().collect(),
593    }
594}
595
596// ---------- helpers ----------
597
598// Frozen Unicode 15.1 snapshot of `XID_Continue \ Default_Ignorable_Code_Point`.
599// SPEC §"What counts as a bare key" — UAX #31 §2 default identifier syntax,
600// frozen so every conforming parser rejects/accepts the *same* code points
601// regardless of the host runtime's stdlib Unicode version. 773 inclusive
602// ranges, sorted by start; binary searched in `is_xid_continue`.
603static XID_CONTINUE_RANGES: &[(u32, u32)] = &[
604    (0x00AA, 0x00AA),
605    (0x00B5, 0x00B5),
606    (0x00B7, 0x00B7),
607    (0x00BA, 0x00BA),
608    (0x00C0, 0x00D6),
609    (0x00D8, 0x00F6),
610    (0x00F8, 0x02C1),
611    (0x02C6, 0x02D1),
612    (0x02E0, 0x02E4),
613    (0x02EC, 0x02EC),
614    (0x02EE, 0x02EE),
615    (0x0300, 0x034E),
616    (0x0350, 0x0374),
617    (0x0376, 0x0377),
618    (0x037B, 0x037D),
619    (0x037F, 0x037F),
620    (0x0386, 0x038A),
621    (0x038C, 0x038C),
622    (0x038E, 0x03A1),
623    (0x03A3, 0x03F5),
624    (0x03F7, 0x0481),
625    (0x0483, 0x0487),
626    (0x048A, 0x052F),
627    (0x0531, 0x0556),
628    (0x0559, 0x0559),
629    (0x0560, 0x0588),
630    (0x0591, 0x05BD),
631    (0x05BF, 0x05BF),
632    (0x05C1, 0x05C2),
633    (0x05C4, 0x05C5),
634    (0x05C7, 0x05C7),
635    (0x05D0, 0x05EA),
636    (0x05EF, 0x05F2),
637    (0x0610, 0x061A),
638    (0x0620, 0x0669),
639    (0x066E, 0x06D3),
640    (0x06D5, 0x06DC),
641    (0x06DF, 0x06E8),
642    (0x06EA, 0x06FC),
643    (0x06FF, 0x06FF),
644    (0x0710, 0x074A),
645    (0x074D, 0x07B1),
646    (0x07C0, 0x07F5),
647    (0x07FA, 0x07FA),
648    (0x07FD, 0x07FD),
649    (0x0800, 0x082D),
650    (0x0840, 0x085B),
651    (0x0860, 0x086A),
652    (0x0870, 0x0887),
653    (0x0889, 0x088E),
654    (0x0898, 0x08E1),
655    (0x08E3, 0x0963),
656    (0x0966, 0x096F),
657    (0x0971, 0x0983),
658    (0x0985, 0x098C),
659    (0x098F, 0x0990),
660    (0x0993, 0x09A8),
661    (0x09AA, 0x09B0),
662    (0x09B2, 0x09B2),
663    (0x09B6, 0x09B9),
664    (0x09BC, 0x09C4),
665    (0x09C7, 0x09C8),
666    (0x09CB, 0x09CE),
667    (0x09D7, 0x09D7),
668    (0x09DC, 0x09DD),
669    (0x09DF, 0x09E3),
670    (0x09E6, 0x09F1),
671    (0x09FC, 0x09FC),
672    (0x09FE, 0x09FE),
673    (0x0A01, 0x0A03),
674    (0x0A05, 0x0A0A),
675    (0x0A0F, 0x0A10),
676    (0x0A13, 0x0A28),
677    (0x0A2A, 0x0A30),
678    (0x0A32, 0x0A33),
679    (0x0A35, 0x0A36),
680    (0x0A38, 0x0A39),
681    (0x0A3C, 0x0A3C),
682    (0x0A3E, 0x0A42),
683    (0x0A47, 0x0A48),
684    (0x0A4B, 0x0A4D),
685    (0x0A51, 0x0A51),
686    (0x0A59, 0x0A5C),
687    (0x0A5E, 0x0A5E),
688    (0x0A66, 0x0A75),
689    (0x0A81, 0x0A83),
690    (0x0A85, 0x0A8D),
691    (0x0A8F, 0x0A91),
692    (0x0A93, 0x0AA8),
693    (0x0AAA, 0x0AB0),
694    (0x0AB2, 0x0AB3),
695    (0x0AB5, 0x0AB9),
696    (0x0ABC, 0x0AC5),
697    (0x0AC7, 0x0AC9),
698    (0x0ACB, 0x0ACD),
699    (0x0AD0, 0x0AD0),
700    (0x0AE0, 0x0AE3),
701    (0x0AE6, 0x0AEF),
702    (0x0AF9, 0x0AFF),
703    (0x0B01, 0x0B03),
704    (0x0B05, 0x0B0C),
705    (0x0B0F, 0x0B10),
706    (0x0B13, 0x0B28),
707    (0x0B2A, 0x0B30),
708    (0x0B32, 0x0B33),
709    (0x0B35, 0x0B39),
710    (0x0B3C, 0x0B44),
711    (0x0B47, 0x0B48),
712    (0x0B4B, 0x0B4D),
713    (0x0B55, 0x0B57),
714    (0x0B5C, 0x0B5D),
715    (0x0B5F, 0x0B63),
716    (0x0B66, 0x0B6F),
717    (0x0B71, 0x0B71),
718    (0x0B82, 0x0B83),
719    (0x0B85, 0x0B8A),
720    (0x0B8E, 0x0B90),
721    (0x0B92, 0x0B95),
722    (0x0B99, 0x0B9A),
723    (0x0B9C, 0x0B9C),
724    (0x0B9E, 0x0B9F),
725    (0x0BA3, 0x0BA4),
726    (0x0BA8, 0x0BAA),
727    (0x0BAE, 0x0BB9),
728    (0x0BBE, 0x0BC2),
729    (0x0BC6, 0x0BC8),
730    (0x0BCA, 0x0BCD),
731    (0x0BD0, 0x0BD0),
732    (0x0BD7, 0x0BD7),
733    (0x0BE6, 0x0BEF),
734    (0x0C00, 0x0C0C),
735    (0x0C0E, 0x0C10),
736    (0x0C12, 0x0C28),
737    (0x0C2A, 0x0C39),
738    (0x0C3C, 0x0C44),
739    (0x0C46, 0x0C48),
740    (0x0C4A, 0x0C4D),
741    (0x0C55, 0x0C56),
742    (0x0C58, 0x0C5A),
743    (0x0C5D, 0x0C5D),
744    (0x0C60, 0x0C63),
745    (0x0C66, 0x0C6F),
746    (0x0C80, 0x0C83),
747    (0x0C85, 0x0C8C),
748    (0x0C8E, 0x0C90),
749    (0x0C92, 0x0CA8),
750    (0x0CAA, 0x0CB3),
751    (0x0CB5, 0x0CB9),
752    (0x0CBC, 0x0CC4),
753    (0x0CC6, 0x0CC8),
754    (0x0CCA, 0x0CCD),
755    (0x0CD5, 0x0CD6),
756    (0x0CDD, 0x0CDE),
757    (0x0CE0, 0x0CE3),
758    (0x0CE6, 0x0CEF),
759    (0x0CF1, 0x0CF3),
760    (0x0D00, 0x0D0C),
761    (0x0D0E, 0x0D10),
762    (0x0D12, 0x0D44),
763    (0x0D46, 0x0D48),
764    (0x0D4A, 0x0D4E),
765    (0x0D54, 0x0D57),
766    (0x0D5F, 0x0D63),
767    (0x0D66, 0x0D6F),
768    (0x0D7A, 0x0D7F),
769    (0x0D81, 0x0D83),
770    (0x0D85, 0x0D96),
771    (0x0D9A, 0x0DB1),
772    (0x0DB3, 0x0DBB),
773    (0x0DBD, 0x0DBD),
774    (0x0DC0, 0x0DC6),
775    (0x0DCA, 0x0DCA),
776    (0x0DCF, 0x0DD4),
777    (0x0DD6, 0x0DD6),
778    (0x0DD8, 0x0DDF),
779    (0x0DE6, 0x0DEF),
780    (0x0DF2, 0x0DF3),
781    (0x0E01, 0x0E3A),
782    (0x0E40, 0x0E4E),
783    (0x0E50, 0x0E59),
784    (0x0E81, 0x0E82),
785    (0x0E84, 0x0E84),
786    (0x0E86, 0x0E8A),
787    (0x0E8C, 0x0EA3),
788    (0x0EA5, 0x0EA5),
789    (0x0EA7, 0x0EBD),
790    (0x0EC0, 0x0EC4),
791    (0x0EC6, 0x0EC6),
792    (0x0EC8, 0x0ECE),
793    (0x0ED0, 0x0ED9),
794    (0x0EDC, 0x0EDF),
795    (0x0F00, 0x0F00),
796    (0x0F18, 0x0F19),
797    (0x0F20, 0x0F29),
798    (0x0F35, 0x0F35),
799    (0x0F37, 0x0F37),
800    (0x0F39, 0x0F39),
801    (0x0F3E, 0x0F47),
802    (0x0F49, 0x0F6C),
803    (0x0F71, 0x0F84),
804    (0x0F86, 0x0F97),
805    (0x0F99, 0x0FBC),
806    (0x0FC6, 0x0FC6),
807    (0x1000, 0x1049),
808    (0x1050, 0x109D),
809    (0x10A0, 0x10C5),
810    (0x10C7, 0x10C7),
811    (0x10CD, 0x10CD),
812    (0x10D0, 0x10FA),
813    (0x10FC, 0x115E),
814    (0x1161, 0x1248),
815    (0x124A, 0x124D),
816    (0x1250, 0x1256),
817    (0x1258, 0x1258),
818    (0x125A, 0x125D),
819    (0x1260, 0x1288),
820    (0x128A, 0x128D),
821    (0x1290, 0x12B0),
822    (0x12B2, 0x12B5),
823    (0x12B8, 0x12BE),
824    (0x12C0, 0x12C0),
825    (0x12C2, 0x12C5),
826    (0x12C8, 0x12D6),
827    (0x12D8, 0x1310),
828    (0x1312, 0x1315),
829    (0x1318, 0x135A),
830    (0x135D, 0x135F),
831    (0x1369, 0x1371),
832    (0x1380, 0x138F),
833    (0x13A0, 0x13F5),
834    (0x13F8, 0x13FD),
835    (0x1401, 0x166C),
836    (0x166F, 0x167F),
837    (0x1681, 0x169A),
838    (0x16A0, 0x16EA),
839    (0x16EE, 0x16F8),
840    (0x1700, 0x1715),
841    (0x171F, 0x1734),
842    (0x1740, 0x1753),
843    (0x1760, 0x176C),
844    (0x176E, 0x1770),
845    (0x1772, 0x1773),
846    (0x1780, 0x17B3),
847    (0x17B6, 0x17D3),
848    (0x17D7, 0x17D7),
849    (0x17DC, 0x17DD),
850    (0x17E0, 0x17E9),
851    (0x1810, 0x1819),
852    (0x1820, 0x1878),
853    (0x1880, 0x18AA),
854    (0x18B0, 0x18F5),
855    (0x1900, 0x191E),
856    (0x1920, 0x192B),
857    (0x1930, 0x193B),
858    (0x1946, 0x196D),
859    (0x1970, 0x1974),
860    (0x1980, 0x19AB),
861    (0x19B0, 0x19C9),
862    (0x19D0, 0x19DA),
863    (0x1A00, 0x1A1B),
864    (0x1A20, 0x1A5E),
865    (0x1A60, 0x1A7C),
866    (0x1A7F, 0x1A89),
867    (0x1A90, 0x1A99),
868    (0x1AA7, 0x1AA7),
869    (0x1AB0, 0x1ABD),
870    (0x1ABF, 0x1ACE),
871    (0x1B00, 0x1B4C),
872    (0x1B50, 0x1B59),
873    (0x1B6B, 0x1B73),
874    (0x1B80, 0x1BF3),
875    (0x1C00, 0x1C37),
876    (0x1C40, 0x1C49),
877    (0x1C4D, 0x1C7D),
878    (0x1C80, 0x1C88),
879    (0x1C90, 0x1CBA),
880    (0x1CBD, 0x1CBF),
881    (0x1CD0, 0x1CD2),
882    (0x1CD4, 0x1CFA),
883    (0x1D00, 0x1F15),
884    (0x1F18, 0x1F1D),
885    (0x1F20, 0x1F45),
886    (0x1F48, 0x1F4D),
887    (0x1F50, 0x1F57),
888    (0x1F59, 0x1F59),
889    (0x1F5B, 0x1F5B),
890    (0x1F5D, 0x1F5D),
891    (0x1F5F, 0x1F7D),
892    (0x1F80, 0x1FB4),
893    (0x1FB6, 0x1FBC),
894    (0x1FBE, 0x1FBE),
895    (0x1FC2, 0x1FC4),
896    (0x1FC6, 0x1FCC),
897    (0x1FD0, 0x1FD3),
898    (0x1FD6, 0x1FDB),
899    (0x1FE0, 0x1FEC),
900    (0x1FF2, 0x1FF4),
901    (0x1FF6, 0x1FFC),
902    (0x203F, 0x2040),
903    (0x2054, 0x2054),
904    (0x2071, 0x2071),
905    (0x207F, 0x207F),
906    (0x2090, 0x209C),
907    (0x20D0, 0x20DC),
908    (0x20E1, 0x20E1),
909    (0x20E5, 0x20F0),
910    (0x2102, 0x2102),
911    (0x2107, 0x2107),
912    (0x210A, 0x2113),
913    (0x2115, 0x2115),
914    (0x2118, 0x211D),
915    (0x2124, 0x2124),
916    (0x2126, 0x2126),
917    (0x2128, 0x2128),
918    (0x212A, 0x2139),
919    (0x213C, 0x213F),
920    (0x2145, 0x2149),
921    (0x214E, 0x214E),
922    (0x2160, 0x2188),
923    (0x2C00, 0x2CE4),
924    (0x2CEB, 0x2CF3),
925    (0x2D00, 0x2D25),
926    (0x2D27, 0x2D27),
927    (0x2D2D, 0x2D2D),
928    (0x2D30, 0x2D67),
929    (0x2D6F, 0x2D6F),
930    (0x2D7F, 0x2D96),
931    (0x2DA0, 0x2DA6),
932    (0x2DA8, 0x2DAE),
933    (0x2DB0, 0x2DB6),
934    (0x2DB8, 0x2DBE),
935    (0x2DC0, 0x2DC6),
936    (0x2DC8, 0x2DCE),
937    (0x2DD0, 0x2DD6),
938    (0x2DD8, 0x2DDE),
939    (0x2DE0, 0x2DFF),
940    (0x3005, 0x3007),
941    (0x3021, 0x302F),
942    (0x3031, 0x3035),
943    (0x3038, 0x303C),
944    (0x3041, 0x3096),
945    (0x3099, 0x309A),
946    (0x309D, 0x309F),
947    (0x30A1, 0x30FF),
948    (0x3105, 0x312F),
949    (0x3131, 0x3163),
950    (0x3165, 0x318E),
951    (0x31A0, 0x31BF),
952    (0x31F0, 0x31FF),
953    (0x3400, 0x4DBF),
954    (0x4E00, 0xA48C),
955    (0xA4D0, 0xA4FD),
956    (0xA500, 0xA60C),
957    (0xA610, 0xA62B),
958    (0xA640, 0xA66F),
959    (0xA674, 0xA67D),
960    (0xA67F, 0xA6F1),
961    (0xA717, 0xA71F),
962    (0xA722, 0xA788),
963    (0xA78B, 0xA7CA),
964    (0xA7D0, 0xA7D1),
965    (0xA7D3, 0xA7D3),
966    (0xA7D5, 0xA7D9),
967    (0xA7F2, 0xA827),
968    (0xA82C, 0xA82C),
969    (0xA840, 0xA873),
970    (0xA880, 0xA8C5),
971    (0xA8D0, 0xA8D9),
972    (0xA8E0, 0xA8F7),
973    (0xA8FB, 0xA8FB),
974    (0xA8FD, 0xA92D),
975    (0xA930, 0xA953),
976    (0xA960, 0xA97C),
977    (0xA980, 0xA9C0),
978    (0xA9CF, 0xA9D9),
979    (0xA9E0, 0xA9FE),
980    (0xAA00, 0xAA36),
981    (0xAA40, 0xAA4D),
982    (0xAA50, 0xAA59),
983    (0xAA60, 0xAA76),
984    (0xAA7A, 0xAAC2),
985    (0xAADB, 0xAADD),
986    (0xAAE0, 0xAAEF),
987    (0xAAF2, 0xAAF6),
988    (0xAB01, 0xAB06),
989    (0xAB09, 0xAB0E),
990    (0xAB11, 0xAB16),
991    (0xAB20, 0xAB26),
992    (0xAB28, 0xAB2E),
993    (0xAB30, 0xAB5A),
994    (0xAB5C, 0xAB69),
995    (0xAB70, 0xABEA),
996    (0xABEC, 0xABED),
997    (0xABF0, 0xABF9),
998    (0xAC00, 0xD7A3),
999    (0xD7B0, 0xD7C6),
1000    (0xD7CB, 0xD7FB),
1001    (0xF900, 0xFA6D),
1002    (0xFA70, 0xFAD9),
1003    (0xFB00, 0xFB06),
1004    (0xFB13, 0xFB17),
1005    (0xFB1D, 0xFB28),
1006    (0xFB2A, 0xFB36),
1007    (0xFB38, 0xFB3C),
1008    (0xFB3E, 0xFB3E),
1009    (0xFB40, 0xFB41),
1010    (0xFB43, 0xFB44),
1011    (0xFB46, 0xFBB1),
1012    (0xFBD3, 0xFC5D),
1013    (0xFC64, 0xFD3D),
1014    (0xFD50, 0xFD8F),
1015    (0xFD92, 0xFDC7),
1016    (0xFDF0, 0xFDF9),
1017    (0xFE20, 0xFE2F),
1018    (0xFE33, 0xFE34),
1019    (0xFE4D, 0xFE4F),
1020    (0xFE71, 0xFE71),
1021    (0xFE73, 0xFE73),
1022    (0xFE77, 0xFE77),
1023    (0xFE79, 0xFE79),
1024    (0xFE7B, 0xFE7B),
1025    (0xFE7D, 0xFE7D),
1026    (0xFE7F, 0xFEFC),
1027    (0xFF10, 0xFF19),
1028    (0xFF21, 0xFF3A),
1029    (0xFF3F, 0xFF3F),
1030    (0xFF41, 0xFF5A),
1031    (0xFF65, 0xFF9F),
1032    (0xFFA1, 0xFFBE),
1033    (0xFFC2, 0xFFC7),
1034    (0xFFCA, 0xFFCF),
1035    (0xFFD2, 0xFFD7),
1036    (0xFFDA, 0xFFDC),
1037    (0x10000, 0x1000B),
1038    (0x1000D, 0x10026),
1039    (0x10028, 0x1003A),
1040    (0x1003C, 0x1003D),
1041    (0x1003F, 0x1004D),
1042    (0x10050, 0x1005D),
1043    (0x10080, 0x100FA),
1044    (0x10140, 0x10174),
1045    (0x101FD, 0x101FD),
1046    (0x10280, 0x1029C),
1047    (0x102A0, 0x102D0),
1048    (0x102E0, 0x102E0),
1049    (0x10300, 0x1031F),
1050    (0x1032D, 0x1034A),
1051    (0x10350, 0x1037A),
1052    (0x10380, 0x1039D),
1053    (0x103A0, 0x103C3),
1054    (0x103C8, 0x103CF),
1055    (0x103D1, 0x103D5),
1056    (0x10400, 0x1049D),
1057    (0x104A0, 0x104A9),
1058    (0x104B0, 0x104D3),
1059    (0x104D8, 0x104FB),
1060    (0x10500, 0x10527),
1061    (0x10530, 0x10563),
1062    (0x10570, 0x1057A),
1063    (0x1057C, 0x1058A),
1064    (0x1058C, 0x10592),
1065    (0x10594, 0x10595),
1066    (0x10597, 0x105A1),
1067    (0x105A3, 0x105B1),
1068    (0x105B3, 0x105B9),
1069    (0x105BB, 0x105BC),
1070    (0x10600, 0x10736),
1071    (0x10740, 0x10755),
1072    (0x10760, 0x10767),
1073    (0x10780, 0x10785),
1074    (0x10787, 0x107B0),
1075    (0x107B2, 0x107BA),
1076    (0x10800, 0x10805),
1077    (0x10808, 0x10808),
1078    (0x1080A, 0x10835),
1079    (0x10837, 0x10838),
1080    (0x1083C, 0x1083C),
1081    (0x1083F, 0x10855),
1082    (0x10860, 0x10876),
1083    (0x10880, 0x1089E),
1084    (0x108E0, 0x108F2),
1085    (0x108F4, 0x108F5),
1086    (0x10900, 0x10915),
1087    (0x10920, 0x10939),
1088    (0x10980, 0x109B7),
1089    (0x109BE, 0x109BF),
1090    (0x10A00, 0x10A03),
1091    (0x10A05, 0x10A06),
1092    (0x10A0C, 0x10A13),
1093    (0x10A15, 0x10A17),
1094    (0x10A19, 0x10A35),
1095    (0x10A38, 0x10A3A),
1096    (0x10A3F, 0x10A3F),
1097    (0x10A60, 0x10A7C),
1098    (0x10A80, 0x10A9C),
1099    (0x10AC0, 0x10AC7),
1100    (0x10AC9, 0x10AE6),
1101    (0x10B00, 0x10B35),
1102    (0x10B40, 0x10B55),
1103    (0x10B60, 0x10B72),
1104    (0x10B80, 0x10B91),
1105    (0x10C00, 0x10C48),
1106    (0x10C80, 0x10CB2),
1107    (0x10CC0, 0x10CF2),
1108    (0x10D00, 0x10D27),
1109    (0x10D30, 0x10D39),
1110    (0x10E80, 0x10EA9),
1111    (0x10EAB, 0x10EAC),
1112    (0x10EB0, 0x10EB1),
1113    (0x10EFD, 0x10F1C),
1114    (0x10F27, 0x10F27),
1115    (0x10F30, 0x10F50),
1116    (0x10F70, 0x10F85),
1117    (0x10FB0, 0x10FC4),
1118    (0x10FE0, 0x10FF6),
1119    (0x11000, 0x11046),
1120    (0x11066, 0x11075),
1121    (0x1107F, 0x110BA),
1122    (0x110C2, 0x110C2),
1123    (0x110D0, 0x110E8),
1124    (0x110F0, 0x110F9),
1125    (0x11100, 0x11134),
1126    (0x11136, 0x1113F),
1127    (0x11144, 0x11147),
1128    (0x11150, 0x11173),
1129    (0x11176, 0x11176),
1130    (0x11180, 0x111C4),
1131    (0x111C9, 0x111CC),
1132    (0x111CE, 0x111DA),
1133    (0x111DC, 0x111DC),
1134    (0x11200, 0x11211),
1135    (0x11213, 0x11237),
1136    (0x1123E, 0x11241),
1137    (0x11280, 0x11286),
1138    (0x11288, 0x11288),
1139    (0x1128A, 0x1128D),
1140    (0x1128F, 0x1129D),
1141    (0x1129F, 0x112A8),
1142    (0x112B0, 0x112EA),
1143    (0x112F0, 0x112F9),
1144    (0x11300, 0x11303),
1145    (0x11305, 0x1130C),
1146    (0x1130F, 0x11310),
1147    (0x11313, 0x11328),
1148    (0x1132A, 0x11330),
1149    (0x11332, 0x11333),
1150    (0x11335, 0x11339),
1151    (0x1133B, 0x11344),
1152    (0x11347, 0x11348),
1153    (0x1134B, 0x1134D),
1154    (0x11350, 0x11350),
1155    (0x11357, 0x11357),
1156    (0x1135D, 0x11363),
1157    (0x11366, 0x1136C),
1158    (0x11370, 0x11374),
1159    (0x11400, 0x1144A),
1160    (0x11450, 0x11459),
1161    (0x1145E, 0x11461),
1162    (0x11480, 0x114C5),
1163    (0x114C7, 0x114C7),
1164    (0x114D0, 0x114D9),
1165    (0x11580, 0x115B5),
1166    (0x115B8, 0x115C0),
1167    (0x115D8, 0x115DD),
1168    (0x11600, 0x11640),
1169    (0x11644, 0x11644),
1170    (0x11650, 0x11659),
1171    (0x11680, 0x116B8),
1172    (0x116C0, 0x116C9),
1173    (0x11700, 0x1171A),
1174    (0x1171D, 0x1172B),
1175    (0x11730, 0x11739),
1176    (0x11740, 0x11746),
1177    (0x11800, 0x1183A),
1178    (0x118A0, 0x118E9),
1179    (0x118FF, 0x11906),
1180    (0x11909, 0x11909),
1181    (0x1190C, 0x11913),
1182    (0x11915, 0x11916),
1183    (0x11918, 0x11935),
1184    (0x11937, 0x11938),
1185    (0x1193B, 0x11943),
1186    (0x11950, 0x11959),
1187    (0x119A0, 0x119A7),
1188    (0x119AA, 0x119D7),
1189    (0x119DA, 0x119E1),
1190    (0x119E3, 0x119E4),
1191    (0x11A00, 0x11A3E),
1192    (0x11A47, 0x11A47),
1193    (0x11A50, 0x11A99),
1194    (0x11A9D, 0x11A9D),
1195    (0x11AB0, 0x11AF8),
1196    (0x11C00, 0x11C08),
1197    (0x11C0A, 0x11C36),
1198    (0x11C38, 0x11C40),
1199    (0x11C50, 0x11C59),
1200    (0x11C72, 0x11C8F),
1201    (0x11C92, 0x11CA7),
1202    (0x11CA9, 0x11CB6),
1203    (0x11D00, 0x11D06),
1204    (0x11D08, 0x11D09),
1205    (0x11D0B, 0x11D36),
1206    (0x11D3A, 0x11D3A),
1207    (0x11D3C, 0x11D3D),
1208    (0x11D3F, 0x11D47),
1209    (0x11D50, 0x11D59),
1210    (0x11D60, 0x11D65),
1211    (0x11D67, 0x11D68),
1212    (0x11D6A, 0x11D8E),
1213    (0x11D90, 0x11D91),
1214    (0x11D93, 0x11D98),
1215    (0x11DA0, 0x11DA9),
1216    (0x11EE0, 0x11EF6),
1217    (0x11F00, 0x11F10),
1218    (0x11F12, 0x11F3A),
1219    (0x11F3E, 0x11F42),
1220    (0x11F50, 0x11F59),
1221    (0x11FB0, 0x11FB0),
1222    (0x12000, 0x12399),
1223    (0x12400, 0x1246E),
1224    (0x12480, 0x12543),
1225    (0x12F90, 0x12FF0),
1226    (0x13000, 0x1342F),
1227    (0x13440, 0x13455),
1228    (0x14400, 0x14646),
1229    (0x16800, 0x16A38),
1230    (0x16A40, 0x16A5E),
1231    (0x16A60, 0x16A69),
1232    (0x16A70, 0x16ABE),
1233    (0x16AC0, 0x16AC9),
1234    (0x16AD0, 0x16AED),
1235    (0x16AF0, 0x16AF4),
1236    (0x16B00, 0x16B36),
1237    (0x16B40, 0x16B43),
1238    (0x16B50, 0x16B59),
1239    (0x16B63, 0x16B77),
1240    (0x16B7D, 0x16B8F),
1241    (0x16E40, 0x16E7F),
1242    (0x16F00, 0x16F4A),
1243    (0x16F4F, 0x16F87),
1244    (0x16F8F, 0x16F9F),
1245    (0x16FE0, 0x16FE1),
1246    (0x16FE3, 0x16FE4),
1247    (0x16FF0, 0x16FF1),
1248    (0x17000, 0x187F7),
1249    (0x18800, 0x18CD5),
1250    (0x18D00, 0x18D08),
1251    (0x1AFF0, 0x1AFF3),
1252    (0x1AFF5, 0x1AFFB),
1253    (0x1AFFD, 0x1AFFE),
1254    (0x1B000, 0x1B122),
1255    (0x1B132, 0x1B132),
1256    (0x1B150, 0x1B152),
1257    (0x1B155, 0x1B155),
1258    (0x1B164, 0x1B167),
1259    (0x1B170, 0x1B2FB),
1260    (0x1BC00, 0x1BC6A),
1261    (0x1BC70, 0x1BC7C),
1262    (0x1BC80, 0x1BC88),
1263    (0x1BC90, 0x1BC99),
1264    (0x1BC9D, 0x1BC9E),
1265    (0x1CF00, 0x1CF2D),
1266    (0x1CF30, 0x1CF46),
1267    (0x1D165, 0x1D169),
1268    (0x1D16D, 0x1D172),
1269    (0x1D17B, 0x1D182),
1270    (0x1D185, 0x1D18B),
1271    (0x1D1AA, 0x1D1AD),
1272    (0x1D242, 0x1D244),
1273    (0x1D400, 0x1D454),
1274    (0x1D456, 0x1D49C),
1275    (0x1D49E, 0x1D49F),
1276    (0x1D4A2, 0x1D4A2),
1277    (0x1D4A5, 0x1D4A6),
1278    (0x1D4A9, 0x1D4AC),
1279    (0x1D4AE, 0x1D4B9),
1280    (0x1D4BB, 0x1D4BB),
1281    (0x1D4BD, 0x1D4C3),
1282    (0x1D4C5, 0x1D505),
1283    (0x1D507, 0x1D50A),
1284    (0x1D50D, 0x1D514),
1285    (0x1D516, 0x1D51C),
1286    (0x1D51E, 0x1D539),
1287    (0x1D53B, 0x1D53E),
1288    (0x1D540, 0x1D544),
1289    (0x1D546, 0x1D546),
1290    (0x1D54A, 0x1D550),
1291    (0x1D552, 0x1D6A5),
1292    (0x1D6A8, 0x1D6C0),
1293    (0x1D6C2, 0x1D6DA),
1294    (0x1D6DC, 0x1D6FA),
1295    (0x1D6FC, 0x1D714),
1296    (0x1D716, 0x1D734),
1297    (0x1D736, 0x1D74E),
1298    (0x1D750, 0x1D76E),
1299    (0x1D770, 0x1D788),
1300    (0x1D78A, 0x1D7A8),
1301    (0x1D7AA, 0x1D7C2),
1302    (0x1D7C4, 0x1D7CB),
1303    (0x1D7CE, 0x1D7FF),
1304    (0x1DA00, 0x1DA36),
1305    (0x1DA3B, 0x1DA6C),
1306    (0x1DA75, 0x1DA75),
1307    (0x1DA84, 0x1DA84),
1308    (0x1DA9B, 0x1DA9F),
1309    (0x1DAA1, 0x1DAAF),
1310    (0x1DF00, 0x1DF1E),
1311    (0x1DF25, 0x1DF2A),
1312    (0x1E000, 0x1E006),
1313    (0x1E008, 0x1E018),
1314    (0x1E01B, 0x1E021),
1315    (0x1E023, 0x1E024),
1316    (0x1E026, 0x1E02A),
1317    (0x1E030, 0x1E06D),
1318    (0x1E08F, 0x1E08F),
1319    (0x1E100, 0x1E12C),
1320    (0x1E130, 0x1E13D),
1321    (0x1E140, 0x1E149),
1322    (0x1E14E, 0x1E14E),
1323    (0x1E290, 0x1E2AE),
1324    (0x1E2C0, 0x1E2F9),
1325    (0x1E4D0, 0x1E4F9),
1326    (0x1E7E0, 0x1E7E6),
1327    (0x1E7E8, 0x1E7EB),
1328    (0x1E7ED, 0x1E7EE),
1329    (0x1E7F0, 0x1E7FE),
1330    (0x1E800, 0x1E8C4),
1331    (0x1E8D0, 0x1E8D6),
1332    (0x1E900, 0x1E94B),
1333    (0x1E950, 0x1E959),
1334    (0x1EE00, 0x1EE03),
1335    (0x1EE05, 0x1EE1F),
1336    (0x1EE21, 0x1EE22),
1337    (0x1EE24, 0x1EE24),
1338    (0x1EE27, 0x1EE27),
1339    (0x1EE29, 0x1EE32),
1340    (0x1EE34, 0x1EE37),
1341    (0x1EE39, 0x1EE39),
1342    (0x1EE3B, 0x1EE3B),
1343    (0x1EE42, 0x1EE42),
1344    (0x1EE47, 0x1EE47),
1345    (0x1EE49, 0x1EE49),
1346    (0x1EE4B, 0x1EE4B),
1347    (0x1EE4D, 0x1EE4F),
1348    (0x1EE51, 0x1EE52),
1349    (0x1EE54, 0x1EE54),
1350    (0x1EE57, 0x1EE57),
1351    (0x1EE59, 0x1EE59),
1352    (0x1EE5B, 0x1EE5B),
1353    (0x1EE5D, 0x1EE5D),
1354    (0x1EE5F, 0x1EE5F),
1355    (0x1EE61, 0x1EE62),
1356    (0x1EE64, 0x1EE64),
1357    (0x1EE67, 0x1EE6A),
1358    (0x1EE6C, 0x1EE72),
1359    (0x1EE74, 0x1EE77),
1360    (0x1EE79, 0x1EE7C),
1361    (0x1EE7E, 0x1EE7E),
1362    (0x1EE80, 0x1EE89),
1363    (0x1EE8B, 0x1EE9B),
1364    (0x1EEA1, 0x1EEA3),
1365    (0x1EEA5, 0x1EEA9),
1366    (0x1EEAB, 0x1EEBB),
1367    (0x1FBF0, 0x1FBF9),
1368    (0x20000, 0x2A6DF),
1369    (0x2A700, 0x2B739),
1370    (0x2B740, 0x2B81D),
1371    (0x2B820, 0x2CEA1),
1372    (0x2CEB0, 0x2EBE0),
1373    (0x2EBF0, 0x2EE5D),
1374    (0x2F800, 0x2FA1D),
1375    (0x30000, 0x3134A),
1376    (0x31350, 0x323AF),
1377];
1378
1379/// Membership test for the frozen Unicode 15.1 snapshot of
1380/// `XID_Continue \ Default_Ignorable_Code_Point` (see SPEC §"What counts
1381/// as a bare key" — UAX #31 §2). ASCII is handled by callers; this returns
1382/// `false` for `cp < 0x80` to keep the binary search out of the hot path.
1383fn is_xid_continue(cp: u32) -> bool {
1384    if cp < 0x80 {
1385        return false;
1386    }
1387    XID_CONTINUE_RANGES
1388        .binary_search_by(|&(lo, hi)| {
1389            if cp < lo {
1390                std::cmp::Ordering::Greater
1391            } else if cp > hi {
1392                std::cmp::Ordering::Less
1393            } else {
1394                std::cmp::Ordering::Equal
1395            }
1396        })
1397        .is_ok()
1398}
1399
1400pub(crate) fn is_bare_key_char(c: char) -> bool {
1401    if is_reserved_emoji_codepoint(c as u32) {
1402        return false;
1403    }
1404    c == '_'
1405        || c == '-'
1406        || c.is_ascii_alphanumeric()
1407        || (!c.is_ascii() && is_xid_continue(c as u32))
1408}
1409
1410// Frozen Unicode 15.1 snapshot of `Extended_Pictographic=Yes`, sourced
1411// from emoji-data.txt. SPEC §Lexical → "Reserved emoji characters".
1412// 78 inclusive ranges, sorted by start; binary searched in
1413// `is_extended_pictographic`.
1414static EXTENDED_PICTOGRAPHIC_RANGES: &[(u32, u32)] = &[
1415    (0x00A9, 0x00A9), (0x00AE, 0x00AE), (0x203C, 0x203C), (0x2049, 0x2049),
1416    (0x2122, 0x2122), (0x2139, 0x2139), (0x2194, 0x2199), (0x21A9, 0x21AA),
1417    (0x231A, 0x231B), (0x2328, 0x2328), (0x2388, 0x2388), (0x23CF, 0x23CF),
1418    (0x23E9, 0x23F3), (0x23F8, 0x23FA), (0x24C2, 0x24C2), (0x25AA, 0x25AB),
1419    (0x25B6, 0x25B6), (0x25C0, 0x25C0), (0x25FB, 0x25FE), (0x2600, 0x2605),
1420    (0x2607, 0x2612), (0x2614, 0x2685), (0x2690, 0x2705), (0x2708, 0x2712),
1421    (0x2714, 0x2714), (0x2716, 0x2716), (0x271D, 0x271D), (0x2721, 0x2721),
1422    (0x2728, 0x2728), (0x2733, 0x2734), (0x2744, 0x2744), (0x2747, 0x2747),
1423    (0x274C, 0x274C), (0x274E, 0x274E), (0x2753, 0x2755), (0x2757, 0x2757),
1424    (0x2763, 0x2767), (0x2795, 0x2797), (0x27A1, 0x27A1), (0x27B0, 0x27B0),
1425    (0x27BF, 0x27BF), (0x2934, 0x2935), (0x2B05, 0x2B07), (0x2B1B, 0x2B1C),
1426    (0x2B50, 0x2B50), (0x2B55, 0x2B55), (0x3030, 0x3030), (0x303D, 0x303D),
1427    (0x3297, 0x3297), (0x3299, 0x3299), (0x1F000, 0x1F0FF), (0x1F10D, 0x1F10F),
1428    (0x1F12F, 0x1F12F), (0x1F16C, 0x1F171), (0x1F17E, 0x1F17F), (0x1F18E, 0x1F18E),
1429    (0x1F191, 0x1F19A), (0x1F1AD, 0x1F1E5), (0x1F201, 0x1F20F), (0x1F21A, 0x1F21A),
1430    (0x1F22F, 0x1F22F), (0x1F232, 0x1F23A), (0x1F23C, 0x1F23F), (0x1F249, 0x1F3FA),
1431    (0x1F400, 0x1F53D), (0x1F546, 0x1F64F), (0x1F680, 0x1F6FF), (0x1F774, 0x1F77F),
1432    (0x1F7D5, 0x1F7FF), (0x1F80C, 0x1F80F), (0x1F848, 0x1F84F), (0x1F85A, 0x1F85F),
1433    (0x1F888, 0x1F88F), (0x1F8AE, 0x1F8FF), (0x1F90C, 0x1F93A), (0x1F93C, 0x1F945),
1434    (0x1F947, 0x1FAFF), (0x1FC00, 0x1FFFD),
1435];
1436
1437fn is_extended_pictographic(cp: u32) -> bool {
1438    if cp < 0xA9 {
1439        return false;
1440    }
1441    EXTENDED_PICTOGRAPHIC_RANGES
1442        .binary_search_by(|&(lo, hi)| {
1443            if cp < lo { std::cmp::Ordering::Greater }
1444            else if cp > hi { std::cmp::Ordering::Less }
1445            else { std::cmp::Ordering::Equal }
1446        })
1447        .is_ok()
1448}
1449
1450/// True if `cp` is a member of the Reserved Emoji Set defined in SPEC.md
1451/// §Lexical → "Reserved emoji characters". The set is the union of:
1452///   - `Extended_Pictographic=Yes` (UTS #51, frozen UCD 15.1)
1453///   - Regional Indicators (U+1F1E6..U+1F1FF)
1454///   - Emoji Modifiers (U+1F3FB..U+1F3FF)
1455///   - Combining Enclosing Keycap (U+20E3)
1456///
1457/// All four sub-ranges are frozen at Unicode 15.1.0 and shipped with this
1458/// port (no host-runtime delegation).
1459pub(crate) fn is_reserved_emoji_codepoint(cp: u32) -> bool {
1460    if (0x1F1E6..=0x1F1FF).contains(&cp) { return true; }   // regional indicator
1461    if (0x1F3FB..=0x1F3FF).contains(&cp) { return true; }   // skin-tone modifier
1462    if cp == 0x20E3 { return true; }                         // keycap combiner
1463    is_extended_pictographic(cp)
1464}
1465
1466#[inline]
1467pub(crate) fn is_regional_indicator(cp: u32) -> bool {
1468    (0x1F1E6..=0x1F1FF).contains(&cp)
1469}
1470
1471#[inline]
1472pub(crate) fn is_emoji_modifier(cp: u32) -> bool {
1473    (0x1F3FB..=0x1F3FF).contains(&cp)
1474}
1475
1476/// Read one extended grapheme cluster of *reserved-emoji* shape starting
1477/// at byte offset `start` in `s`. Returns `Some(end_byte)` (exclusive end)
1478/// if the cluster begins with a Reserved-Emoji codepoint, else `None`.
1479///
1480/// Implements a minimal subset of UAX #29 grapheme-cluster boundaries
1481/// sufficient for emoji clusters: GB12/GB13 (regional-indicator pair),
1482/// GB9/GB9a (Extend, ZWJ, SpacingMark — restricted to emoji-relevant
1483/// extenders), and GB11 (E_P × Extend* ZWJ × E_P).
1484///
1485/// Frozen behavior: matches the UAX #29 algorithm against frozen UCD
1486/// 15.1 property tables. The algorithm itself is stable across Unicode
1487/// versions; only the property tables it consults need pinning.
1488pub(crate) fn read_reserved_emoji_atom(s: &str, start: usize) -> Option<usize> {
1489    let bytes = s.as_bytes();
1490    if start >= bytes.len() {
1491        return None;
1492    }
1493    let mut chars = s[start..].char_indices();
1494    let (_, c0) = chars.next()?;
1495    let cp0 = c0 as u32;
1496    if !is_reserved_emoji_codepoint(cp0) {
1497        return None;
1498    }
1499    let mut end = start + c0.len_utf8();
1500
1501    // Regional-indicator pair: per UAX #29 GB12/GB13, RI pairs cluster.
1502    if is_regional_indicator(cp0) {
1503        if let Some((_, c1)) = chars.next() {
1504            if is_regional_indicator(c1 as u32) {
1505                end += c1.len_utf8();
1506            }
1507        }
1508        return Some(end);
1509    }
1510
1511    // GB9/GB9a/GB11 loop: extend cluster across modifiers, VS-16, keycap
1512    // combiner, and ZWJ-joined Extended_Pictographic continuations.
1513    loop {
1514        let rest = &s[end..];
1515        let mut it = rest.chars();
1516        let Some(c) = it.next() else { break };
1517        let cp = c as u32;
1518        if is_emoji_modifier(cp) || cp == 0xFE0F || cp == 0x20E3 {
1519            // GB9/GB9a — Extend or SpacingMark glued to base.
1520            end += c.len_utf8();
1521            continue;
1522        }
1523        if cp == 0x200D {
1524            // GB11 — ZWJ × Extended_Pictographic continues the cluster.
1525            let after_zwj = end + c.len_utf8();
1526            let after = &s[after_zwj..];
1527            if let Some(nc) = after.chars().next() {
1528                if is_extended_pictographic(nc as u32) {
1529                    end = after_zwj + nc.len_utf8();
1530                    continue;
1531                }
1532            }
1533            // ZWJ not followed by E_P: cluster ends before the ZWJ.
1534            break;
1535        }
1536        break;
1537    }
1538    Some(end)
1539}
1540
1541fn is_label_start(c: char) -> bool {
1542    c == '_' || c.is_ascii_alphabetic()
1543}
1544
1545fn is_label_cont(c: char) -> bool {
1546    c == '_' || c.is_ascii_alphanumeric()
1547}
1548
1549fn looks_like_date_prefix(s: &str) -> bool {
1550    let b = s.as_bytes();
1551    b.len() >= 10
1552        && b[0].is_ascii_digit()
1553        && b[1].is_ascii_digit()
1554        && b[2].is_ascii_digit()
1555        && b[3].is_ascii_digit()
1556        && b[4] == b'-'
1557        && b[5].is_ascii_digit()
1558        && b[6].is_ascii_digit()
1559        && b[7] == b'-'
1560        && b[8].is_ascii_digit()
1561        && b[9].is_ascii_digit()
1562}
1563
1564fn looks_like_time_prefix(s: &str) -> bool {
1565    let b = s.as_bytes();
1566    b.len() >= 8
1567        && b[0].is_ascii_digit()
1568        && b[1].is_ascii_digit()
1569        && b[2] == b':'
1570        && b[3].is_ascii_digit()
1571        && b[4].is_ascii_digit()
1572        && b[5] == b':'
1573        && b[6].is_ascii_digit()
1574        && b[7].is_ascii_digit()
1575}
1576
1577// ---------- parser ----------
1578
1579struct Parser<'a> {
1580    src: &'a str,
1581    pos: usize,
1582    line: usize,
1583    line_start: usize,
1584
1585    // ---- comment-attachment state ----
1586    /// All comments that have been finalized and attached so far, in
1587    /// source order.
1588    comments: Vec<AttachedComment>,
1589    /// Comments collected from comment-only lines that have not yet been
1590    /// attached. They become `Leading` on the next sibling node, or
1591    /// `Floating` on the enclosing container if a blank line appears
1592    /// before any sibling does, or if the block closes first.
1593    pending_leading: Vec<Comment>,
1594    /// The breadcrumb path to the *currently-being-parsed* container.
1595    /// Block parsers push the relevant segment (Key or Index) before
1596    /// recursing into a child node and pop after, so that whichever node
1597    /// is "current" at the time a comment is captured/flushed has the
1598    /// right path to attach to.
1599    path: Vec<BreadcrumbSegment>,
1600    /// Original-literal records captured during parsing. Sparse: only
1601    /// non-default forms get entries (e.g. an unsigned decimal integer
1602    /// without underscores has no entry; `0xFF` does). Consulted by
1603    /// `encode` for round-trip emission.
1604    original_forms: Vec<(Vec<BreadcrumbSegment>, OriginalLiteral)>,
1605    /// When false, integer/string lexeme recording is suppressed. Used
1606    /// while parsing heredoc-modifier args (which are values used at
1607    /// parse time but otherwise discarded — they shouldn't appear in
1608    /// the host node's `original_forms` slot).
1609    record_forms: bool,
1610    /// Lite mode short-circuit: when true, skip all comment-AST and
1611    /// `original_forms` bookkeeping. The grammar still runs in full,
1612    /// errors are unchanged; only the round-trip metadata is dropped.
1613    /// SPEC §Parsing modes — full and lite.
1614    lite: bool,
1615    /// Unordered mode: when true, body tables are produced as
1616    /// `Value::UnorderedTable` (HashMap-backed) instead of
1617    /// `Value::Table` (IndexMap-backed). Iteration order is arbitrary.
1618    /// Front-matter parsing ignores this flag — meta stays ordered.
1619    /// See SPEC §"Unordered tables".
1620    ignore_order: bool,
1621    /// When true, front-matter `_dms_tier: 1` is accepted rather than
1622    /// rejected. Set by `decode_document_accepting_tier1`. Default: false.
1623    accept_tier1: bool,
1624    /// The tier value observed in front matter (`_dms_tier`). Zero when
1625    /// no `_dms_tier` key is present or when tier is 0. Set only when
1626    /// `accept_tier1` is true and a valid tier value is seen.
1627    observed_tier: u32,
1628    /// Tier-1 only: decorator calls captured at line-start positions
1629    /// before the next sibling lands. Drained by
1630    /// `flush_pending_as_leading_on_current` / `flush_pending_as_floating`
1631    /// alongside `pending_leading` comments. Source-order preserved.
1632    pending_leading_decorators: Vec<tier1::DecoratorCall>,
1633    /// Tier-1 only: flat accumulator of (path, position, call) tuples.
1634    /// Grouped into `DecoratorEntry` records by `tier1::decode_t1` after
1635    /// parsing completes. Empty in tier-0 mode.
1636    decorations_raw: Vec<(Vec<BreadcrumbSegment>, tier1::DecorationPosition, tier1::DecoratorCall)>,
1637    /// Tier-1 only: paths where the source used decoration-only form
1638    /// (inner decoration with no base_value). Hoist pass substitutes
1639    /// the family's empty_default. TIER1.md §"Decoration-only".
1640    decoration_only_paths: Vec<Vec<BreadcrumbSegment>>,
1641}
1642
1643impl<'a> Parser<'a> {
1644    #[cfg(test)]
1645    fn new(src: &'a str) -> Self {
1646        Self::new_with_mode(src, ParseMode::Full)
1647    }
1648
1649    fn new_with_mode(src: &'a str, mode: ParseMode) -> Self {
1650        let src = src.strip_prefix('\u{feff}').unwrap_or(src);
1651        Self {
1652            src,
1653            pos: 0,
1654            line: 1,
1655            line_start: 0,
1656            comments: Vec::new(),
1657            pending_leading: Vec::new(),
1658            path: Vec::new(),
1659            original_forms: Vec::new(),
1660            record_forms: true,
1661            lite: matches!(mode, ParseMode::Lite),
1662            ignore_order: false,
1663            accept_tier1: false,
1664            observed_tier: 0,
1665            pending_leading_decorators: Vec::new(),
1666            decorations_raw: Vec::new(),
1667            decoration_only_paths: Vec::new(),
1668        }
1669    }
1670
1671    /// True iff the parser has observed `_dms_tier: 1` in front matter.
1672    /// Tier-1 lex/parse paths gate on this.
1673    fn is_t1_active(&self) -> bool {
1674        self.observed_tier >= 1
1675    }
1676
1677    /// Append an `OriginalLiteral` record at the current path. Skipped
1678    /// when `record_forms` is false (e.g. inside heredoc modifier args),
1679    /// when the parser is in lite mode (no round-trip metadata kept),
1680    /// or when the form is the implicit default.
1681    fn record_form(&mut self, lit: OriginalLiteral) {
1682        if self.lite || !self.record_forms {
1683            return;
1684        }
1685        self.original_forms.push((self.path.clone(), lit));
1686    }
1687
1688    // ----- position / chars -----
1689
1690    fn col(&self) -> usize {
1691        // 1-based column in bytes
1692        self.pos - self.line_start + 1
1693    }
1694
1695    fn err(&self, msg: impl Into<String>) -> DecodeError {
1696        DecodeError { line: self.line, column: self.col(), message: msg.into() }
1697    }
1698
1699    fn err_at(&self, line: usize, line_start: usize, pos: usize, msg: impl Into<String>) -> DecodeError {
1700        DecodeError { line, column: pos.saturating_sub(line_start) + 1, message: msg.into() }
1701    }
1702
1703    fn peek(&self) -> Option<char> {
1704        self.src[self.pos..].chars().next()
1705    }
1706
1707    fn rest(&self) -> &str {
1708        &self.src[self.pos..]
1709    }
1710
1711    fn bump(&mut self) -> Option<char> {
1712        let c = self.peek()?;
1713        self.pos += c.len_utf8();
1714        Some(c)
1715    }
1716
1717    fn eof(&self) -> bool {
1718        self.pos >= self.src.len()
1719    }
1720
1721    fn advance_line(&mut self) {
1722        self.line += 1;
1723        self.line_start = self.pos;
1724    }
1725
1726    // ----- whitespace and line terminators -----
1727
1728    fn skip_inline_ws(&mut self) {
1729        while matches!(self.peek(), Some(' ') | Some('\t')) {
1730            self.bump();
1731        }
1732    }
1733
1734    /// Consume a line terminator (LF or CRLF). Returns true if consumed.
1735    /// Bare CR is NOT a line terminator and is left for the caller to error on.
1736    fn consume_eol(&mut self) -> bool {
1737        if self.peek() == Some('\n') {
1738            self.bump();
1739            self.advance_line();
1740            true
1741        } else if self.rest().starts_with("\r\n") {
1742            self.pos += 2;
1743            self.advance_line();
1744            true
1745        } else {
1746            false
1747        }
1748    }
1749
1750    /// Skip blank lines, full-line comments (line + block) at the current
1751    /// position. Stops at the first content character at column 1+ that is
1752    /// not trivia.
1753    ///
1754    /// Captures full-line comments into `self.pending_leading` (they
1755    /// become `Leading` on the next sibling, or `Floating` on the
1756    /// enclosing container if a blank line intervenes). The blank-line
1757    /// rule: any blank line encountered in this loop while there are
1758    /// pending comments flushes them as `Floating` on the current path.
1759    fn skip_trivia(&mut self) -> Result<(), DecodeError> {
1760        loop {
1761            // skip indent on a line that is entirely trivia: leading ws is
1762            // okay only if the line is blank or comment.
1763            let line_start_pos = self.pos;
1764            self.skip_inline_ws();
1765            match self.peek() {
1766                Some('\n') | Some('\r') => {
1767                    if self.peek() == Some('\r') && !self.rest().starts_with("\r\n") {
1768                        return Err(self.err("bare CR is not a valid line terminator"));
1769                    }
1770                    // Blank line. If we have pending leading comments,
1771                    // they're separated from any future sibling — flush
1772                    // them as Floating on the enclosing container.
1773                    self.flush_pending_as_floating();
1774                    self.consume_eol();
1775                }
1776                Some('#') => {
1777                    if self.rest().starts_with("###") {
1778                        let raw = self.read_hash_block_comment()?;
1779                        if !self.lite {
1780                            self.pending_leading.push(Comment {
1781                                content: raw,
1782                                kind: CommentKind::Block,
1783                            });
1784                        }
1785                    } else {
1786                        let raw = self.read_line_comment_to_eol();
1787                        self.consume_eol();
1788                        if !self.lite {
1789                            self.pending_leading.push(Comment {
1790                                content: raw,
1791                                kind: CommentKind::Line,
1792                            });
1793                        }
1794                    }
1795                }
1796                Some('/') if self.rest().starts_with("//") => {
1797                    let raw = self.read_line_comment_to_eol();
1798                    self.consume_eol();
1799                    if !self.lite {
1800                        self.pending_leading.push(Comment {
1801                            content: raw,
1802                            kind: CommentKind::Line,
1803                        });
1804                    }
1805                }
1806                Some('/') if self.rest().starts_with("/*") => {
1807                    let raw = self.read_c_block_comment()?;
1808                    if !self.lite {
1809                        self.pending_leading.push(Comment {
1810                            content: raw,
1811                            kind: CommentKind::Block,
1812                        });
1813                    }
1814                    // after a c-style block, may be more on this line; loop
1815                }
1816                Some(c) if self.is_t1_active() && tier1::is_sigil_atom_start(c) => {
1817                    // Tier-1: line-start decorator call. Per TIER1.md
1818                    // §"Decoration sites" `leading_block = ( decoration NEWLINE )+`,
1819                    // each leading decoration is on its own line.
1820                    let (call, end) = tier1::parse_decorator_call(self.src, self.pos)?;
1821                    // Walk the consumed range byte-by-byte, bumping line/line_start on '\n'.
1822                    let mut walk = self.pos;
1823                    while walk < end {
1824                        if self.src.as_bytes()[walk] == b'\n' {
1825                            self.line += 1;
1826                            self.line_start = walk + 1;
1827                        }
1828                        walk += 1;
1829                    }
1830                    self.pos = end;
1831                    // Decorator call must be on its own line: skip trailing ws, then EOL/EOF.
1832                    self.skip_inline_ws();
1833                    if !(self.consume_eol() || self.eof()) {
1834                        return Err(self.err("trailing content after leading decorator (only EOL allowed in this build)"));
1835                    }
1836                    self.pending_leading_decorators.push(call);
1837                    // continue trivia loop
1838                }
1839                Some(_) => {
1840                    // not trivia — rewind to line start (preserve indent for caller)
1841                    self.pos = line_start_pos;
1842                    return Ok(());
1843                }
1844                None => return Ok(()),
1845            }
1846        }
1847    }
1848
1849    /// Flush any `pending_leading` comments as `Floating` on the current
1850    /// path (i.e. attached to the enclosing container). Called when a
1851    /// blank line appears between pending comments and the next sibling,
1852    /// and when a block closes with leftover pending comments.
1853    fn flush_pending_as_floating(&mut self) {
1854        let had_decorators = !self.pending_leading_decorators.is_empty();
1855        if !self.pending_leading.is_empty() {
1856            let drained: Vec<Comment> = self.pending_leading.drain(..).collect();
1857            for c in drained {
1858                self.comments.push(AttachedComment {
1859                    comment: c,
1860                    position: CommentPosition::Floating,
1861                    path: self.path.clone(),
1862                });
1863            }
1864        }
1865        if had_decorators {
1866            let drained: Vec<tier1::DecoratorCall> =
1867                self.pending_leading_decorators.drain(..).collect();
1868            for call in drained {
1869                self.decorations_raw.push((
1870                    self.path.clone(),
1871                    tier1::DecorationPosition::Floating,
1872                    call,
1873                ));
1874            }
1875        }
1876    }
1877
1878    /// Attach all `pending_leading` comments as `Leading` on the path
1879    /// formed by `self.path` + `last_segment`. Called by sibling-entry
1880    /// points right after pushing the new sibling's breadcrumb.
1881    fn flush_pending_as_leading_on_current(&mut self) {
1882        let had_decorators = !self.pending_leading_decorators.is_empty();
1883        if !self.pending_leading.is_empty() {
1884            let drained: Vec<Comment> = self.pending_leading.drain(..).collect();
1885            for c in drained {
1886                self.comments.push(AttachedComment {
1887                    comment: c,
1888                    position: CommentPosition::Leading,
1889                    path: self.path.clone(),
1890                });
1891            }
1892        }
1893        if had_decorators {
1894            let drained: Vec<tier1::DecoratorCall> =
1895                self.pending_leading_decorators.drain(..).collect();
1896            for call in drained {
1897                self.decorations_raw.push((
1898                    self.path.clone(),
1899                    tier1::DecorationPosition::Leading,
1900                    call,
1901                ));
1902            }
1903        }
1904    }
1905
1906    fn read_line_comment_to_eol(&mut self) -> String {
1907        let start = self.pos;
1908        while let Some(c) = self.peek() {
1909            if c == '\n' || c == '\r' {
1910                break;
1911            }
1912            self.bump();
1913        }
1914        self.src[start..self.pos].to_string()
1915    }
1916
1917    /// `/* ... */` with nesting. Caller has not yet consumed `/*`. Returns
1918    /// the raw comment text (including delimiters).
1919    fn read_c_block_comment(&mut self) -> Result<String, DecodeError> {
1920        let start_line = self.line;
1921        let start_lstart = self.line_start;
1922        let start_pos = self.pos;
1923        // consume opening /*
1924        self.pos += 2;
1925        let mut depth = 1usize;
1926        while depth > 0 {
1927            match self.peek() {
1928                None => {
1929                    return Err(self.err_at(
1930                        start_line,
1931                        start_lstart,
1932                        start_pos,
1933                        "unterminated /* block comment",
1934                    ));
1935                }
1936                Some('/') if self.rest().starts_with("/*") => {
1937                    self.pos += 2;
1938                    depth += 1;
1939                }
1940                Some('*') if self.rest().starts_with("*/") => {
1941                    self.pos += 2;
1942                    depth -= 1;
1943                }
1944                Some('\n') => {
1945                    self.bump();
1946                    self.advance_line();
1947                }
1948                Some('\r') if self.rest().starts_with("\r\n") => {
1949                    self.pos += 2;
1950                    self.advance_line();
1951                }
1952                Some(_) => {
1953                    self.bump();
1954                }
1955            }
1956        }
1957        Ok(self.src[start_pos..self.pos].to_string())
1958    }
1959
1960    /// `### ... ###` or `###LABEL ... LABEL`. Opener and terminator each on
1961    /// their own line. Caller has not yet consumed `###`. Returns the
1962    /// raw comment text including delimiters and the terminator line
1963    /// (but **not** the trailing EOL of the terminator).
1964    fn read_hash_block_comment(&mut self) -> Result<String, DecodeError> {
1965        let start_line = self.line;
1966        let start_lstart = self.line_start;
1967        let start_pos = self.pos;
1968        // consume ###
1969        self.pos += 3;
1970        // optional label, no whitespace between ### and label
1971        let label_start = self.pos;
1972        while let Some(c) = self.peek() {
1973            if !(c == '_' || c.is_ascii_alphanumeric()) {
1974                break;
1975            }
1976            self.bump();
1977        }
1978        let label_str: String = self.src[label_start..self.pos].to_string();
1979        if !label_str.is_empty() {
1980            // must start with letter or _
1981            if !label_str.chars().next().unwrap().is_ascii_alphabetic()
1982                && !label_str.starts_with('_')
1983            {
1984                return Err(self.err_at(
1985                    start_line,
1986                    start_lstart,
1987                    start_pos,
1988                    "block comment label must start with a letter or underscore",
1989                ));
1990            }
1991        }
1992        let terminator: String = if label_str.is_empty() {
1993            "###".to_string()
1994        } else {
1995            label_str.clone()
1996        };
1997        // require rest of opener line to be only whitespace, then EOL
1998        self.skip_inline_ws();
1999        if !(self.consume_eol() || self.eof()) {
2000            return Err(self.err(
2001                "block comment opener must be on its own line",
2002            ));
2003        }
2004        // consume body lines until we find a line whose trimmed content
2005        // equals `terminator`
2006        loop {
2007            if self.eof() {
2008                return Err(self.err_at(
2009                    start_line,
2010                    start_lstart,
2011                    start_pos,
2012                    "unterminated ### block comment",
2013                ));
2014            }
2015            // capture this line
2016            let line_begin = self.pos;
2017            while let Some(c) = self.peek() {
2018                if c == '\n' || c == '\r' {
2019                    break;
2020                }
2021                self.bump();
2022            }
2023            let line_text = &self.src[line_begin..self.pos];
2024            // The terminator line is part of the captured comment text;
2025            // capture the comment body BEFORE consuming the EOL so the
2026            // returned content does not include the terminator's EOL.
2027            let line_end = self.pos;
2028            let _ = self.consume_eol();
2029            if line_text.trim() == terminator {
2030                return Ok(self.src[start_pos..line_end].to_string());
2031            }
2032        }
2033    }
2034
2035    // ----- document entry -----
2036
2037    /// Parse optional front matter block. Leaves the parser positioned at
2038    /// the start of the body (which may be empty). Returns the user
2039    /// metadata table (Some) if a block was present, else None. Reserved
2040    /// `_dms_*` keys are consumed and validated here, not surfaced.
2041    fn parse_front_matter(&mut self) -> Result<Option<DmsMap<Value>>, DecodeError> {
2042        // Front matter must be the first *significant* line. Skip
2043        // leading trivia; if the next non-trivia line is `+++`, it's
2044        // front matter. Otherwise rewind so the body parser sees the
2045        // same position it would have seen.
2046        let save_pos = self.pos;
2047        let save_line = self.line;
2048        let save_lstart = self.line_start;
2049        let save_pending = self.pending_leading.len();
2050        let save_comments = self.comments.len();
2051        self.skip_trivia()?;
2052        // Check for +++ on its own line
2053        if !self.rest().starts_with("+++") {
2054            self.pos = save_pos;
2055            self.line = save_line;
2056            self.line_start = save_lstart;
2057            // Speculative skip_trivia may have captured comments — undo
2058            // so the body parser re-captures them with the correct
2059            // path/context.
2060            self.pending_leading.truncate(save_pending);
2061            self.comments.truncate(save_comments);
2062            return Ok(None);
2063        }
2064        // The char after +++ must be end-of-line (optionally with inline ws).
2065        // Any other trailing content on the opener line is a parse error
2066        // (SPEC §Front matter: "each `+++` must appear on its own line,
2067        // with no trailing content"). We position the parser past `+++`
2068        // (so the diagnostic points at the offending column) and let
2069        // the strict EOL check below produce the error.
2070        let opener_line = self.line;
2071        let opener_lstart = self.line_start;
2072        let opener_pos = self.pos;
2073        // consume `+++`, then trailing whitespace, then EOL
2074        self.pos += 3;
2075        self.skip_inline_ws();
2076        if !(self.consume_eol() || self.eof()) {
2077            return Err(self.err(
2078                "front matter opener must be on its own line",
2079            ));
2080        }
2081        // Collect lines into a scratch string until we hit the closer `+++`
2082        let mut inner = String::new();
2083        loop {
2084            if self.eof() {
2085                return Err(self.err_at(
2086                    opener_line, opener_lstart, opener_pos,
2087                    "unterminated front matter: missing closing '+++'",
2088                ));
2089            }
2090            // Read one line (without EOL)
2091            let line_begin = self.pos;
2092            while let Some(c) = self.peek() {
2093                if c == '\n' || c == '\r' { break; }
2094                self.bump();
2095            }
2096            let line_text = &self.src[line_begin..self.pos];
2097            // Is this line the closing `+++`?
2098            if line_text.trim() == "+++" {
2099                let _ = self.consume_eol();
2100                break;
2101            }
2102            // Otherwise, preserve this line in `inner` (including trailing EOL)
2103            inner.push_str(line_text);
2104            if self.consume_eol() {
2105                inner.push('\n');
2106            }
2107        }
2108        // Parse `inner` as a standalone DMS table block at indent 0.
2109        // Inherit lite mode from the outer parser — front matter
2110        // shouldn't preserve comments/forms if the outer doesn't.
2111        let inner_mode = if self.lite { ParseMode::Lite } else { ParseMode::Full };
2112        let mut sub = Parser::new_with_mode(&inner, inner_mode);
2113        let table = sub.parse_body_as_table()?;
2114        // Now process reserved keys
2115        let mut meta = DmsMap::default();
2116        for (k, v) in table {
2117            if k.strip_prefix('_').is_some() {
2118                match k.as_str() {
2119                    "_dms_tier" => {
2120                        let Value::Integer(n) = v else {
2121                            return Err(self.err_at(
2122                                opener_line, opener_lstart, opener_pos,
2123                                "_dms_tier must be a non-negative integer",
2124                            ));
2125                        };
2126                        if n < 0 {
2127                            return Err(self.err_at(
2128                                opener_line, opener_lstart, opener_pos,
2129                                "_dms_tier must be non-negative",
2130                            ));
2131                        }
2132                        if n >= 1 {
2133                            if self.accept_tier1 {
2134                                // Tier-1 accepted by the tier-1 entry
2135                                // point. Record the observed tier and
2136                                // consume the key silently.
2137                                self.observed_tier = n as u32;
2138                            } else {
2139                                // TIER1.md §"Behavior at the boundary":
2140                                // tier-0 decoders surface tier-1 input
2141                                // with an actionable forward-compat
2142                                // message pointing at decode_t1.
2143                                return Err(self.err_at(
2144                                    opener_line, opener_lstart, opener_pos,
2145                                    format!(
2146                                        "_dms_tier: {n} found, but this decoder only supports tier 0. Use decode_t1."
2147                                    ),
2148                                ));
2149                            }
2150                        }
2151                        // _dms_tier: 0 — accept and discard.
2152                    }
2153                    "_dms_imports" => {
2154                        if self.accept_tier1 {
2155                            // Pass through to meta so tier1::extract_imports
2156                            // can read and validate it. TIER1.md §"Front
2157                            // matter additions" (line 170).
2158                            meta.insert(k, v);
2159                        } else {
2160                            // Tier-0 decoders reject _dms_imports with an
2161                            // actionable message. TIER1.md line 179-181.
2162                            return Err(self.err_at(
2163                                opener_line, opener_lstart, opener_pos,
2164                                "_dms_imports requires _dms_tier: 1; \
2165                                 tier-0 documents must not contain _dms_imports",
2166                            ));
2167                        }
2168                    }
2169                    other => {
2170                        return Err(self.err_at(
2171                            opener_line, opener_lstart, opener_pos,
2172                            format!("unknown reserved key: {other}"),
2173                        ));
2174                    }
2175                }
2176            } else {
2177                meta.insert(k, v);
2178            }
2179        }
2180        // Hoist comments from the sub-parser, prefixing each path with
2181        // `Key("__fm__")` so callers can distinguish front-matter
2182        // comments from body comments. Comments attached to reserved
2183        // (parser-consumed) `_dms_*` keys are re-attached as `Floating`
2184        // on the front-matter table itself — the reserved-key node
2185        // doesn't appear in the final tree, but the comment was about
2186        // the document and shouldn't silently vanish.
2187        for ac in sub.comments {
2188            let attached_to_reserved = matches!(
2189                ac.path.first(),
2190                Some(BreadcrumbSegment::Key(k0)) if k0.starts_with('_')
2191            );
2192            if attached_to_reserved {
2193                self.comments.push(AttachedComment {
2194                    comment: ac.comment,
2195                    position: CommentPosition::Floating,
2196                    path: vec![BreadcrumbSegment::Key("__fm__".to_string())],
2197                });
2198                continue;
2199            }
2200            let mut new_path = Vec::with_capacity(ac.path.len() + 1);
2201            new_path.push(BreadcrumbSegment::Key("__fm__".to_string()));
2202            new_path.extend(ac.path);
2203            self.comments.push(AttachedComment {
2204                comment: ac.comment,
2205                position: ac.position,
2206                path: new_path,
2207            });
2208        }
2209        // Same hoist for original_forms: `__fm__` prefix, drop entries
2210        // attached to reserved (consumed) `_dms_*` keys.
2211        for (path, lit) in sub.original_forms {
2212            if let Some(BreadcrumbSegment::Key(k0)) = path.first() {
2213                if k0.starts_with('_') {
2214                    continue;
2215                }
2216            }
2217            let mut new_path = Vec::with_capacity(path.len() + 1);
2218            new_path.push(BreadcrumbSegment::Key("__fm__".to_string()));
2219            new_path.extend(path);
2220            self.original_forms.push((new_path, lit));
2221        }
2222        Ok(Some(meta))
2223    }
2224
2225    /// Parse the inner content of a front matter block as a table.
2226    fn parse_body_as_table(&mut self) -> Result<DmsMap<Value>, DecodeError> {
2227        self.skip_trivia()?;
2228        if self.eof() {
2229            // Empty FM body with comment-only content: flush any
2230            // pending-leading comments as floating on the (empty)
2231            // table root, so the outer hoist can re-attach them under
2232            // the `__fm__` prefix.
2233            self.flush_pending_as_floating();
2234            return Ok(DmsMap::default());
2235        }
2236        if matches!(self.peek(), Some(' ') | Some('\t')) {
2237            return Err(self.err("unexpected indentation inside front matter"));
2238        }
2239        // SPEC §Lexical: reject reserved decorator sigils at line-start.
2240        self.reject_reserved_decorator_sigil()?;
2241        if self.peek() == Some('+') && self.peek_after_plus_is_space_or_eol() {
2242            return Err(self.err("front matter block cannot have a list root"));
2243        }
2244        if !self.line_starts_kvpair() {
2245            return Err(self.err("front matter block must be a table"));
2246        }
2247        let t = self.parse_table_block(0)?;
2248        self.skip_trivia()?;
2249        if !self.eof() {
2250            return Err(self.err("trailing content inside front matter"));
2251        }
2252        // Any final pending-leading comments become floating on FM root.
2253        self.flush_pending_as_floating();
2254        Ok(t)
2255    }
2256
2257    /// Parse a complete document body.
2258    fn parse_body(&mut self) -> Result<Value, DecodeError> {
2259        self.skip_trivia()?;
2260        if self.eof() {
2261            // Empty / comment-only body: pending comments float on root.
2262            self.flush_pending_as_floating();
2263            if self.ignore_order {
2264                return Ok(Value::UnorderedTable(DmsHashMap::default()));
2265            }
2266            return Ok(Value::Table(DmsMap::default()));
2267        }
2268        // dispatch on the first significant char at column 1 (no indent allowed at root)
2269        if matches!(self.peek(), Some(' ') | Some('\t')) {
2270            return Err(self.err("unexpected indentation at document root"));
2271        }
2272        // SPEC §Lexical: reject reserved decorator sigils at line-start.
2273        self.reject_reserved_decorator_sigil()?;
2274        // Decide root type
2275        let result = if self.peek() == Some('+') && self.peek_after_plus_is_space_or_eol() {
2276            // list root
2277            let v = self.parse_list_block(0)?;
2278            self.skip_trivia()?;
2279            if !self.eof() {
2280                return Err(self.err("trailing content after list root"));
2281            }
2282            Value::List(v)
2283        } else if self.line_starts_kvpair() {
2284            let v = if self.ignore_order {
2285                Value::UnorderedTable(self.parse_table_block_unordered(0)?)
2286            } else {
2287                Value::Table(self.parse_table_block(0)?)
2288            };
2289            self.skip_trivia()?;
2290            if !self.eof() {
2291                return Err(self.err("trailing content after table root"));
2292            }
2293            v
2294        } else {
2295            // scalar root
2296            let v = self.parse_inline_value_or_heredoc()?;
2297            self.consume_after_value(true)?;
2298            self.skip_trivia()?;
2299            if !self.eof() {
2300                return Err(self.err("scalar root cannot be followed by more content"));
2301            }
2302            v
2303        };
2304        // Any post-body trivia comments float on root.
2305        self.flush_pending_as_floating();
2306        Ok(result)
2307    }
2308
2309    fn peek_after_plus_is_space_or_eol(&self) -> bool {
2310        let bytes = self.src.as_bytes();
2311        let next = bytes.get(self.pos + 1).copied();
2312        matches!(next, Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'\r') | None)
2313    }
2314
2315    /// SPEC §Lexical: the characters `! @ $ % ^ & * | ~ \`` are reserved as
2316    /// decorator sigils at line-start position. A body line whose first
2317    /// non-whitespace character is one of these is a parse error in tier 0.
2318    /// (Underscore `_` is **not** in this set — it has its own category for
2319    /// core / built-in decorators like heredoc modifiers `_trim`.)
2320    ///
2321    /// Callers position the parser at the first non-whitespace byte of a
2322    /// body line (i.e. at `line_start + indent`) and invoke this helper
2323    /// before any other dispatch. Returns `Err` if the current `peek()`
2324    /// is one of the reserved sigils, `Ok(())` otherwise.
2325    fn reject_reserved_decorator_sigil(&self) -> Result<(), DecodeError> {
2326        match self.peek() {
2327            Some(c @ ('!' | '@' | '$' | '%' | '^' | '&' | '*' | '|' | '~' | '`'
2328                | '.' | ',' | '>' | '<' | '?' | ';' | '=')) => {
2329                Err(self.err(format!(
2330                    "'{c}' is a reserved decorator sigil at line-start; \
2331                     these characters (! @ $ % ^ & * | ~ ` . , > < ? ; =) cannot begin a \
2332                     body line in tier 0"
2333                )))
2334            }
2335            Some(c) if is_reserved_emoji_codepoint(c as u32) => {
2336                Err(self.err(format!(
2337                    "'{c}' (U+{cp:04X}) is in the Reserved Emoji Set and cannot \
2338                     begin a body line in tier 0; quote it (\"{c}\": ...) or place \
2339                     it inside a string value",
2340                    cp = c as u32,
2341                )))
2342            }
2343            _ => Ok(()),
2344        }
2345    }
2346
2347    /// Heuristic: scan from current position to determine if the line is a
2348    /// kvpair (presence of `:` followed by ws/EOL after a key-shaped prefix)
2349    /// vs a scalar. We don't commit yet; we just classify.
2350    fn line_starts_kvpair(&self) -> bool {
2351        let mut p = self.pos;
2352        let bytes = self.src.as_bytes();
2353        // Try basic-quoted key
2354        if bytes.get(p).copied() == Some(b'"') {
2355            // skip until matching unescaped "
2356            p += 1;
2357            while p < bytes.len() {
2358                match bytes[p] {
2359                    b'\\' => p += 2,
2360                    b'"' => { p += 1; break; }
2361                    b'\n' | b'\r' => return false,
2362                    _ => p += 1,
2363                }
2364            }
2365        } else if bytes.get(p).copied() == Some(b'\'') {
2366            p += 1;
2367            while p < bytes.len() {
2368                match bytes[p] {
2369                    b'\'' => { p += 1; break; }
2370                    b'\n' | b'\r' => return false,
2371                    _ => p += 1,
2372                }
2373            }
2374        } else {
2375            // bare key chars
2376            let s = &self.src[p..];
2377            let mut chars = s.char_indices();
2378            let mut last_end = 0;
2379            let mut any = false;
2380            while let Some((i, c)) = chars.next() {
2381                if is_bare_key_char(c) {
2382                    last_end = i + c.len_utf8();
2383                    any = true;
2384                } else {
2385                    break;
2386                }
2387            }
2388            if !any {
2389                return false;
2390            }
2391            p += last_end;
2392        }
2393        // now expect ':'
2394        if bytes.get(p).copied() != Some(b':') {
2395            return false;
2396        }
2397        // followed by ws, EOL, or EOF — we consider it a kvpair
2398        match bytes.get(p + 1).copied() {
2399            Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'\r') | None => true,
2400            _ => false,
2401        }
2402    }
2403
2404    // ----- block parsers -----
2405
2406    /// Parses a table block: a sequence of kvpairs at exactly `indent`
2407    /// (column = indent+1). Stops at a line whose indent < indent+1 or EOF.
2408    fn parse_table_block(&mut self, indent: usize) -> Result<DmsMap<Value>, DecodeError> {
2409        let mut table: DmsMap<Value> = DmsMap::default();
2410        loop {
2411            self.skip_trivia()?;
2412            if self.eof() {
2413                break;
2414            }
2415            // measure this line's indent
2416            let line_indent = self.measure_line_indent();
2417            if line_indent < indent {
2418                break;
2419            }
2420            if line_indent != indent {
2421                return Err(self.err_at(
2422                    self.line,
2423                    self.line_start,
2424                    self.line_start + indent,
2425                    format!(
2426                        "inconsistent indent: expected {indent} spaces, got {line_indent}"
2427                    ),
2428                ));
2429            }
2430            // consume the indent
2431            self.pos = self.line_start + indent;
2432            // SPEC §Lexical: reject reserved decorator sigils at line-start.
2433            self.reject_reserved_decorator_sigil()?;
2434            let (key, value) = self.parse_kvpair(indent)?;
2435            if table.contains_key(&key) {
2436                return Err(self.err(format!("duplicate key: {key}")));
2437            }
2438            table.insert(key, value);
2439        }
2440        // Block closing: any leftover pending leading comments become
2441        // floating on the enclosing container (the table itself).
2442        self.flush_pending_as_floating();
2443        Ok(table)
2444    }
2445
2446    /// Unordered counterpart of `parse_table_block` — same grammar, same
2447    /// errors; only difference is the backing store is a `HashMap`
2448    /// (no insertion-order tracking). Called when `self.ignore_order`
2449    /// is true. See SPEC §"Unordered tables".
2450    fn parse_table_block_unordered(
2451        &mut self,
2452        indent: usize,
2453    ) -> Result<DmsHashMap<Value>, DecodeError> {
2454        let mut table: DmsHashMap<Value> = DmsHashMap::default();
2455        loop {
2456            self.skip_trivia()?;
2457            if self.eof() {
2458                break;
2459            }
2460            let line_indent = self.measure_line_indent();
2461            if line_indent < indent {
2462                break;
2463            }
2464            if line_indent != indent {
2465                return Err(self.err_at(
2466                    self.line,
2467                    self.line_start,
2468                    self.line_start + indent,
2469                    format!(
2470                        "inconsistent indent: expected {indent} spaces, got {line_indent}"
2471                    ),
2472                ));
2473            }
2474            self.pos = self.line_start + indent;
2475            // SPEC §Lexical: reject reserved decorator sigils at line-start.
2476            self.reject_reserved_decorator_sigil()?;
2477            let (key, value) = self.parse_kvpair(indent)?;
2478            if table.contains_key(&key) {
2479                return Err(self.err(format!("duplicate key: {key}")));
2480            }
2481            table.insert(key, value);
2482        }
2483        self.flush_pending_as_floating();
2484        Ok(table)
2485    }
2486
2487    /// Parses a list block: sibling `+ item` lines at exactly `indent`.
2488    fn parse_list_block(&mut self, indent: usize) -> Result<Vec<Value>, DecodeError> {
2489        let mut items: Vec<Value> = Vec::new();
2490        loop {
2491            self.skip_trivia()?;
2492            if self.eof() {
2493                break;
2494            }
2495            let line_indent = self.measure_line_indent();
2496            if line_indent < indent {
2497                break;
2498            }
2499            if line_indent != indent {
2500                return Err(self.err_at(
2501                    self.line,
2502                    self.line_start,
2503                    self.line_start + indent,
2504                    format!(
2505                        "inconsistent indent: expected {indent} spaces, got {line_indent}"
2506                    ),
2507                ));
2508            }
2509            self.pos = self.line_start + indent;
2510            // SPEC §Lexical: reject reserved decorator sigils at line-start.
2511            self.reject_reserved_decorator_sigil()?;
2512            if self.peek() != Some('+') {
2513                // not a list item — caller may treat as end of list
2514                break;
2515            }
2516            // We've committed to a new list item. Push its index, attach
2517            // pending leading comments to it, then parse the value.
2518            let idx = items.len();
2519            self.path.push(BreadcrumbSegment::Index(idx));
2520            self.flush_pending_as_leading_on_current();
2521            // consume '+'
2522            self.bump();
2523            // require whitespace OR end-of-line
2524            let item_result: Result<Value, DecodeError> = match self.peek() {
2525                Some(' ') | Some('\t') => {
2526                    self.bump();
2527                    self.skip_inline_ws();
2528                    // Capture inner /* ... */ comments before deciding
2529                    // empty-item vs scalar/kvpair. They attach to the
2530                    // current list item via path.
2531                    let dec_count_before = self.decorations_raw.len();
2532                    self.capture_inner_block_comments()?;
2533                    let inner_dec_captured = self.decorations_raw.len() > dec_count_before;
2534                    match self.peek() {
2535                        Some('\n') | Some('\r') | None => {
2536                            // Tier-1 decoration-only: `+ |decorator[EOL]` with no base
2537                            // value. The value is a placeholder (empty Table); the hoist
2538                            // pass will substitute the family's empty_default.
2539                            if self.is_t1_active() && inner_dec_captured {
2540                                self.decoration_only_paths.push(self.path.clone());
2541                                self.consume_eol();
2542                                Ok(Value::Table(DmsMap::default()))
2543                            } else {
2544                                // `+ /* ... */[EOL]` → empty item with
2545                                // inner comments, opens a nested block.
2546                                self.consume_eol();
2547                                self.skip_trivia()?;
2548                                if self.eof() {
2549                                    Err(self.err("expected indented block after empty '+' marker"))
2550                                } else {
2551                                    let inner_indent = self.measure_line_indent();
2552                                    if inner_indent <= indent {
2553                                        Err(self.err("expected indented block after empty '+' marker"))
2554                                    } else {
2555                                        self.parse_block_value(inner_indent)
2556                                    }
2557                                }
2558                            }
2559                        }
2560                        _ => self.parse_list_item_value(indent),
2561                    }
2562                }
2563                Some('\n') | Some('\r') | None => {
2564                    // empty `+` opens a nested block on next lines
2565                    self.consume_eol();
2566                    self.skip_trivia()?;
2567                    if self.eof() {
2568                        Err(self.err("expected indented block after empty '+' marker"))
2569                    } else {
2570                        let inner_indent = self.measure_line_indent();
2571                        if inner_indent <= indent {
2572                            Err(self.err("expected indented block after empty '+' marker"))
2573                        } else {
2574                            self.parse_block_value(inner_indent)
2575                        }
2576                    }
2577                }
2578                _ => Err(self.err("expected space after '+'")),
2579            };
2580            self.path.pop();
2581            let v = item_result?;
2582            items.push(v);
2583        }
2584        // Block close: leftover pending become floating on the list itself.
2585        self.flush_pending_as_floating();
2586        Ok(items)
2587    }
2588
2589    fn measure_line_indent(&self) -> usize {
2590        let b = self.src.as_bytes();
2591        let mut i = self.line_start;
2592        let mut n = 0usize;
2593        while i < b.len() && b[i] == b' ' {
2594            n += 1;
2595            i += 1;
2596        }
2597        n
2598    }
2599
2600    fn parse_block_value(&mut self, indent: usize) -> Result<Value, DecodeError> {
2601        // Decide: list or table
2602        // Consume the indent first
2603        self.pos = self.line_start + indent;
2604        // SPEC §Lexical: reject reserved decorator sigils at line-start.
2605        self.reject_reserved_decorator_sigil()?;
2606        if self.peek() == Some('+') && self.peek_after_plus_is_space_or_eol() {
2607            Ok(Value::List(self.parse_list_block(indent)?))
2608        } else if self.ignore_order {
2609            Ok(Value::UnorderedTable(self.parse_table_block_unordered(indent)?))
2610        } else {
2611            Ok(Value::Table(self.parse_table_block(indent)?))
2612        }
2613    }
2614
2615    fn parse_list_item_value(&mut self, list_indent: usize) -> Result<Value, DecodeError> {
2616        // The value sits after `+ `. It can be:
2617        //   - inline scalar (number/string/...) ending at EOL
2618        //   - the start of a kvpair (key:value), in which case the item is
2619        //     a table; subsequent keys of that table sit at the column of
2620        //     the first key (not the `+`).
2621        //   - heredoc opener
2622        if self.line_starts_kvpair() {
2623            // remember key column
2624            let key_col = self.col() - 1; // 0-based column of first key
2625            if self.ignore_order {
2626                let (k, v) = self.parse_kvpair(key_col)?;
2627                let mut t: DmsHashMap<Value> = DmsHashMap::default();
2628                t.insert(k, v);
2629                loop {
2630                    self.skip_trivia()?;
2631                    if self.eof() {
2632                        break;
2633                    }
2634                    let li = self.measure_line_indent();
2635                    if li < key_col {
2636                        break;
2637                    }
2638                    if li != key_col {
2639                        return Err(self.err_at(
2640                            self.line,
2641                            self.line_start,
2642                            self.line_start + key_col,
2643                            "list-item table sibling key must align with first key",
2644                        ));
2645                    }
2646                    self.pos = self.line_start + key_col;
2647                    // SPEC §Lexical: reject reserved decorator sigils
2648                    // at line-start.
2649                    self.reject_reserved_decorator_sigil()?;
2650                    if self.peek() == Some('+') {
2651                        return Err(self.err(
2652                            "'+' marker at sibling-key column is ambiguous",
2653                        ));
2654                    }
2655                    if !self.line_starts_kvpair() {
2656                        break;
2657                    }
2658                    let (k, v) = self.parse_kvpair(key_col)?;
2659                    if t.contains_key(&k) {
2660                        return Err(self.err(format!("duplicate key: {k}")));
2661                    }
2662                    t.insert(k, v);
2663                }
2664                self.flush_pending_as_floating();
2665                return Ok(Value::UnorderedTable(t));
2666            }
2667            let (k, v) = self.parse_kvpair(key_col)?;
2668            // additional keys at key_col are siblings of this table
2669            let mut t = DmsMap::default();
2670            t.insert(k, v);
2671            // continue reading sibling keys at key_col
2672            loop {
2673                self.skip_trivia()?;
2674                if self.eof() {
2675                    break;
2676                }
2677                let li = self.measure_line_indent();
2678                if li < key_col {
2679                    break;
2680                }
2681                if li != key_col {
2682                    return Err(self.err_at(
2683                        self.line,
2684                        self.line_start,
2685                        self.line_start + key_col,
2686                        "list-item table sibling key must align with first key",
2687                    ));
2688                }
2689                self.pos = self.line_start + key_col;
2690                // SPEC §Lexical: reject reserved decorator sigils at line-start.
2691                self.reject_reserved_decorator_sigil()?;
2692                // ensure not a `+` at key_col (would indicate next list item misaligned)
2693                if self.peek() == Some('+') {
2694                    return Err(self.err(
2695                        "'+' marker at sibling-key column is ambiguous",
2696                    ));
2697                }
2698                if !self.line_starts_kvpair() {
2699                    break;
2700                }
2701                let (k, v) = self.parse_kvpair(key_col)?;
2702                if t.contains_key(&k) {
2703                    return Err(self.err(format!("duplicate key: {k}")));
2704                }
2705                t.insert(k, v);
2706            }
2707            // End of this inline-table-in-list-item: any pending leading
2708            // comments belong to the enclosing list item itself (Floating).
2709            self.flush_pending_as_floating();
2710            Ok(Value::Table(t))
2711        } else {
2712            // inline scalar (or heredoc); same end-of-line discipline
2713            let v = self.parse_inline_value_or_heredoc()?;
2714            self.consume_after_value(false)?;
2715            // For a heredoc the parse already consumed up through the
2716            // terminator line; consume_after_value handled the EOL after
2717            // any inline scalar.
2718            // Lists do not support a child block when the item's value is a
2719            // simple scalar; we don't try to attach one here.
2720            let _ = list_indent;
2721            Ok(v)
2722        }
2723    }
2724
2725    // ----- kvpair parsing -----
2726
2727    fn parse_kvpair(&mut self, parent_indent: usize) -> Result<(String, Value), DecodeError> {
2728        let key = self.parse_key()?;
2729        if self.peek() != Some(':') {
2730            return Err(self.err("expected ':' after key"));
2731        }
2732        // We've now committed: this is a kvpair with key `key`. Push the
2733        // breadcrumb so any pending leading comments attach to this node,
2734        // and so trailing comments captured by `consume_after_value` (and
2735        // any floating comments inside the value's child block) get the
2736        // right path.
2737        self.path.push(BreadcrumbSegment::Key(key.clone()));
2738        self.flush_pending_as_leading_on_current();
2739        let result = self.parse_kvpair_after_key(parent_indent);
2740        self.path.pop();
2741        result.map(|v| (key, v))
2742    }
2743
2744    fn parse_kvpair_after_key(&mut self, parent_indent: usize) -> Result<Value, DecodeError> {
2745        // caller has consumed the key but not the ':'
2746        self.bump(); // consume ':'
2747        match self.peek() {
2748            Some(' ') | Some('\t') => {
2749                self.bump();
2750                self.skip_inline_ws();
2751                // Inner `/* ... */` comments live in this slot. Capture
2752                // them now; they attach to the current kvpair via path.
2753                let dec_count_before = self.decorations_raw.len();
2754                self.capture_inner_block_comments()?;
2755                let inner_dec_captured = self.decorations_raw.len() > dec_count_before;
2756                // After the `: <inner?>`, decide: inline value, or
2757                // (with at least one inner comment present) a child
2758                // block opened by EOL.
2759                match self.peek() {
2760                    Some('\n') | Some('\r') | None => {
2761                        // Tier-1 decoration-only: `key: |decorator[EOL]` with no base
2762                        // value. The value is a placeholder (empty Table); the hoist
2763                        // pass will substitute the family's empty_default.
2764                        if self.is_t1_active() && inner_dec_captured {
2765                            self.decoration_only_paths.push(self.path.clone());
2766                            self.consume_eol();
2767                            return Ok(Value::Table(DmsMap::default()));
2768                        }
2769                        // `key: /* ... */[EOL]` → child block. Without
2770                        // any inner comment this branch is reached only
2771                        // via the dedicated no-WS-after-colon path
2772                        // below; reaching it here means at least one
2773                        // inner comment was captured.
2774                        self.consume_eol();
2775                        self.skip_trivia()?;
2776                        if self.eof() {
2777                            return Err(self.err("expected indented child block"));
2778                        }
2779                        let child_indent = self.measure_line_indent();
2780                        if child_indent <= parent_indent {
2781                            return Err(self.err("expected indented child block"));
2782                        }
2783                        let v = self.parse_block_value(child_indent)?;
2784                        Ok(v)
2785                    }
2786                    _ => {
2787                        let v = self.parse_inline_value_or_heredoc()?;
2788                        self.consume_after_value(false)?;
2789                        Ok(v)
2790                    }
2791                }
2792            }
2793            Some('\n') | Some('\r') | None => {
2794                // `key:[EOL]` — child block, no inner-comment slot.
2795                self.consume_eol();
2796                self.skip_trivia()?;
2797                if self.eof() {
2798                    return Err(self.err("expected indented child block"));
2799                }
2800                let child_indent = self.measure_line_indent();
2801                if child_indent <= parent_indent {
2802                    return Err(self.err("expected indented child block"));
2803                }
2804                let v = self.parse_block_value(child_indent)?;
2805                Ok(v)
2806            }
2807            _ => Err(self.err("expected whitespace after ':'")),
2808        }
2809    }
2810
2811    // ----- key parsing -----
2812
2813    fn parse_key(&mut self) -> Result<String, DecodeError> {
2814        match self.peek() {
2815            Some('"') => self.parse_basic_string_key(),
2816            Some('\'') => self.parse_literal_string_key(),
2817            Some(_) => self.parse_bare_key(),
2818            None => Err(self.err("expected key")),
2819        }
2820    }
2821
2822    fn parse_bare_key(&mut self) -> Result<String, DecodeError> {
2823        let start = self.pos;
2824        while let Some(c) = self.peek() {
2825            if is_bare_key_char(c) {
2826                self.bump();
2827            } else {
2828                break;
2829            }
2830        }
2831        if self.pos == start {
2832            return Err(self.err("expected key"));
2833        }
2834        Ok(self.src[start..self.pos].to_string())
2835    }
2836
2837    fn parse_basic_string_key(&mut self) -> Result<String, DecodeError> {
2838        // disallow """ as key opener (heredoc-ish)
2839        if self.rest().starts_with("\"\"\"") {
2840            return Err(self.err("triple-quoted strings are not allowed as keys"));
2841        }
2842        // Suppress original-form recording: the key is not a value
2843        // and must not end up as an OriginalLiteral entry on the
2844        // parent container's path.
2845        let saved = self.record_forms;
2846        self.record_forms = false;
2847        let r = self.parse_basic_string_value();
2848        self.record_forms = saved;
2849        r
2850    }
2851
2852    fn parse_literal_string_key(&mut self) -> Result<String, DecodeError> {
2853        if self.rest().starts_with("'''") {
2854            return Err(self.err("triple-quoted strings are not allowed as keys"));
2855        }
2856        let saved = self.record_forms;
2857        self.record_forms = false;
2858        let r = self.parse_literal_string_value();
2859        self.record_forms = saved;
2860        r
2861    }
2862
2863    // ----- value parsing -----
2864
2865    /// Capture mid-token `/* ... */` block comments that appear between
2866    /// `:` and an inline value, or between `+` and a list item's content.
2867    /// Each one is pushed as `Inner` on the current `self.path` (the
2868    /// caller is responsible for ensuring the path already points at the
2869    /// kvpair / list-item being parsed). Inline whitespace between
2870    /// comments is consumed.
2871    ///
2872    /// In tier-1 mode, delegates to `capture_t1_decoration_run` which
2873    /// handles both block comments and decorator calls in source order.
2874    fn capture_inner_block_comments(&mut self) -> Result<(), DecodeError> {
2875        if self.is_t1_active() {
2876            return self.capture_t1_decoration_run(tier1::DecorationPosition::Inner);
2877        }
2878        loop {
2879            match self.peek() {
2880                Some('/') if self.rest().starts_with("/*") => {
2881                    let raw = self.read_c_block_comment()?;
2882                    if !self.lite {
2883                        self.comments.push(AttachedComment {
2884                            comment: Comment {
2885                                content: raw,
2886                                kind: CommentKind::Block,
2887                            },
2888                            position: CommentPosition::Inner,
2889                            path: self.path.clone(),
2890                        });
2891                    }
2892                    self.skip_inline_ws();
2893                }
2894                _ => break,
2895            }
2896        }
2897        Ok(())
2898    }
2899
2900    /// Tier-1 decoration-run consumer. Position is at start of run. Each
2901    /// recognized decoration is pushed to `decorations_raw` with the given
2902    /// position at `self.path`. Comments captured at the same position
2903    /// also use the existing `self.comments` accumulator with the matching
2904    /// `CommentPosition`. Returns when peek is non-whitespace,
2905    /// non-sigil, non-block-comment.
2906    ///
2907    /// Per TIER1.md §"Stacking and interleaving with comments", source
2908    /// order is preserved across decorators + block comments.
2909    fn capture_t1_decoration_run(
2910        &mut self,
2911        position: tier1::DecorationPosition,
2912    ) -> Result<(), DecodeError> {
2913        if !self.is_t1_active() {
2914            return Ok(());
2915        }
2916        let comment_position = match position {
2917            tier1::DecorationPosition::Inner => CommentPosition::Inner,
2918            tier1::DecorationPosition::Trailing => CommentPosition::Trailing,
2919            tier1::DecorationPosition::Leading => CommentPosition::Leading,
2920            tier1::DecorationPosition::Floating => CommentPosition::Floating,
2921        };
2922        loop {
2923            // skip inline ws between decorations
2924            self.skip_inline_ws();
2925            match self.peek() {
2926                Some(c) if tier1::is_sigil_atom_start(c) => {
2927                    let (mut call, end) = tier1::parse_decorator_call(self.src, self.pos)?;
2928                    // Bookkeeping: walk consumed bytes for line/line_start
2929                    let mut walk = self.pos;
2930                    while walk < end {
2931                        if self.src.as_bytes()[walk] == b'\n' {
2932                            self.line += 1;
2933                            self.line_start = walk + 1;
2934                        }
2935                        walk += 1;
2936                    }
2937                    self.pos = end;
2938                    call.position = position;
2939                    self.decorations_raw.push((self.path.clone(), position, call));
2940                }
2941                Some('/') if self.rest().starts_with("/*") => {
2942                    let raw = self.read_c_block_comment()?;
2943                    if !self.lite {
2944                        self.comments.push(AttachedComment {
2945                            comment: Comment {
2946                                content: raw,
2947                                kind: CommentKind::Block,
2948                            },
2949                            position: comment_position.clone(),
2950                            path: self.path.clone(),
2951                        });
2952                    }
2953                }
2954                _ => return Ok(()),
2955            }
2956        }
2957    }
2958
2959    /// Variant of [`capture_t1_decoration_run`] for use inside flow forms
2960    /// (`[...]` and `{...}`).
2961    ///
2962    /// In flow context `,`, `]`, and `}` are structural delimiters, but those
2963    /// characters also appear in the tier-0 reserved decorator sigil set.
2964    /// This wrapper peeks first: if the next non-inline-whitespace character
2965    /// is one of those three, the run is considered over and we return
2966    /// immediately without attempting a decorator parse.
2967    fn capture_t1_decoration_run_flow(
2968        &mut self,
2969        position: tier1::DecorationPosition,
2970    ) -> Result<(), DecodeError> {
2971        if !self.is_t1_active() {
2972            return Ok(());
2973        }
2974        let comment_position = match position {
2975            tier1::DecorationPosition::Inner => CommentPosition::Inner,
2976            tier1::DecorationPosition::Trailing => CommentPosition::Trailing,
2977            tier1::DecorationPosition::Leading => CommentPosition::Leading,
2978            tier1::DecorationPosition::Floating => CommentPosition::Floating,
2979        };
2980        loop {
2981            // Skip inline whitespace (spaces/tabs) between decorations.
2982            self.skip_inline_ws();
2983            match self.peek() {
2984                // Flow delimiters always terminate the run, even though
2985                // `,`, `]`, and `}` are in the reserved sigil set.
2986                Some(',') | Some(']') | Some('}') => return Ok(()),
2987                Some(c) if tier1::is_sigil_atom_start(c) => {
2988                    let (mut call, end) = tier1::parse_decorator_call(self.src, self.pos)?;
2989                    // Bookkeeping: walk consumed bytes for line/line_start
2990                    let mut walk = self.pos;
2991                    while walk < end {
2992                        if self.src.as_bytes()[walk] == b'\n' {
2993                            self.line += 1;
2994                            self.line_start = walk + 1;
2995                        }
2996                        walk += 1;
2997                    }
2998                    self.pos = end;
2999                    call.position = position;
3000                    self.decorations_raw.push((self.path.clone(), position, call));
3001                }
3002                Some('/') if self.rest().starts_with("/*") => {
3003                    let raw = self.read_c_block_comment()?;
3004                    if !self.lite {
3005                        self.comments.push(AttachedComment {
3006                            comment: Comment {
3007                                content: raw,
3008                                kind: CommentKind::Block,
3009                            },
3010                            position: comment_position.clone(),
3011                            path: self.path.clone(),
3012                        });
3013                    }
3014                }
3015                _ => return Ok(()),
3016            }
3017        }
3018    }
3019
3020    fn parse_inline_value_or_heredoc(&mut self) -> Result<Value, DecodeError> {
3021        // Inner /* ... */ block comments between `:` and value (or `+`
3022        // and item) are captured by the caller via
3023        // `capture_inner_block_comments` before this function runs, so
3024        // we don't expect a leading `/*` here.
3025        match self.peek() {
3026            Some('"') => {
3027                if self.rest().starts_with("\"\"\"") {
3028                    self.parse_heredoc_basic().map(Value::String)
3029                } else {
3030                    let r = self.parse_basic_string_value().map(Value::String);
3031                    // Basic is the emitter's default for strings, so
3032                    // we don't need to record it as an OriginalLiteral
3033                    // entry — `encode` falls back to Basic when no
3034                    // entry is found. Still walking through the parse
3035                    // path here for symmetry / future extension.
3036                    r
3037                }
3038            }
3039            Some('\'') => {
3040                if self.rest().starts_with("'''") {
3041                    self.parse_heredoc_literal().map(Value::String)
3042                } else {
3043                    let r = self.parse_literal_string_value().map(Value::String)?;
3044                    self.record_form(OriginalLiteral::String { form: StringForm::Literal });
3045                    Ok(r)
3046                }
3047            }
3048            Some('[') => Ok(Value::List(self.parse_flow_array()?)),
3049            Some('{') => {
3050                if self.ignore_order {
3051                    Ok(Value::UnorderedTable(self.parse_flow_table_unordered()?))
3052                } else {
3053                    Ok(Value::Table(self.parse_flow_table()?))
3054                }
3055            }
3056            Some('t') | Some('f') => self.parse_bool_or_keyword_value(),
3057            Some('i') => self.parse_inf_value(false),
3058            Some('n') => self.parse_nan_value(false),
3059            Some(c) if c == '+' || c == '-' || c.is_ascii_digit() => {
3060                self.parse_number_or_datetime()
3061            }
3062            Some(c) => Err(self.err(format!("unexpected character '{c}' in value"))),
3063            None => Err(self.err("expected value")),
3064        }
3065    }
3066
3067    fn parse_bool_or_keyword_value(&mut self) -> Result<Value, DecodeError> {
3068        let rest = self.rest();
3069        if let Some(after) = rest.strip_prefix("true") {
3070            if Self::is_value_terminator(after.chars().next()) {
3071                self.pos += 4;
3072                return Ok(Value::Bool(true));
3073            }
3074        }
3075        if let Some(after) = rest.strip_prefix("false") {
3076            if Self::is_value_terminator(after.chars().next()) {
3077                self.pos += 5;
3078                return Ok(Value::Bool(false));
3079            }
3080        }
3081        Err(self.err("expected value"))
3082    }
3083
3084    fn parse_inf_value(&mut self, signed: bool) -> Result<Value, DecodeError> {
3085        let after = if signed { 4 } else { 3 };
3086        let want: &'static str = if signed {
3087            // peek to see sign
3088            match self.peek() {
3089                Some('+') => "+inf",
3090                Some('-') => "-inf",
3091                _ => "inf",
3092            }
3093        } else {
3094            "inf"
3095        };
3096        let neg = want.starts_with('-');
3097        let rest_owned = self.rest().to_string();
3098        if let Some(rem) = rest_owned.strip_prefix(want) {
3099            if Self::is_value_terminator(rem.chars().next()) {
3100                self.pos += after;
3101                let v = if neg { f64::NEG_INFINITY } else { f64::INFINITY };
3102                return Ok(Value::Float(v));
3103            }
3104        }
3105        Err(self.err("expected 'inf'"))
3106    }
3107
3108    fn parse_nan_value(&mut self, signed: bool) -> Result<Value, DecodeError> {
3109        let after = if signed { 4 } else { 3 };
3110        let want: &'static str = if signed {
3111            match self.peek() { Some('+') => "+nan", Some('-') => "-nan", _ => "nan" }
3112        } else {
3113            "nan"
3114        };
3115        let rest_owned = self.rest().to_string();
3116        if let Some(rem) = rest_owned.strip_prefix(want) {
3117            if Self::is_value_terminator(rem.chars().next()) {
3118                self.pos += after;
3119                return Ok(Value::Float(f64::NAN));
3120            }
3121        }
3122        Err(self.err("expected 'nan'"))
3123    }
3124
3125    fn is_value_terminator(c: Option<char>) -> bool {
3126        match c {
3127            None => true,
3128            Some(c) => matches!(c, ' ' | '\t' | '\n' | '\r' | '#' | '/' | ',' | ']' | '}'),
3129        }
3130    }
3131
3132    // ----- numbers + datetime -----
3133
3134    fn parse_number_or_datetime(&mut self) -> Result<Value, DecodeError> {
3135        let rest = self.rest();
3136        let starts_with_sign = matches!(self.peek(), Some('+') | Some('-'));
3137        if !starts_with_sign && looks_like_date_prefix(rest) {
3138            return self.parse_datetime_value();
3139        }
3140        if !starts_with_sign && looks_like_time_prefix(rest) {
3141            return self.parse_local_time_value();
3142        }
3143        // Check inf/nan with sign
3144        if starts_with_sign {
3145            let after_sign = &rest[1..];
3146            if after_sign.starts_with("inf") {
3147                let signed_rest = &rest[..4];
3148                if Self::is_value_terminator(rest[4..].chars().next()) {
3149                    let neg = signed_rest.starts_with('-');
3150                    self.pos += 4;
3151                    return Ok(Value::Float(if neg { f64::NEG_INFINITY } else { f64::INFINITY }));
3152                }
3153            }
3154            // signed nan rejected per design — keep `nan` bare only
3155        }
3156        // determine if this is a float by scanning the token
3157        let tok = self.scan_number_token();
3158        let s = &self.src[self.pos..self.pos + tok.len];
3159        if tok.is_float {
3160            let f = parse_float(s).ok_or_else(|| self.err(format!("invalid float: {s}")))?;
3161            self.pos += tok.len;
3162            Ok(Value::Float(f))
3163        } else {
3164            let n = parse_integer(s).map_err(|e| self.err(e))?;
3165            self.pos += tok.len;
3166            // Record original lexeme only when it differs from the
3167            // canonical "decimal, no underscores, no '+' sign" form
3168            // used by the default emitter (i.e. `n.to_string()`).
3169            // Anything else — hex/oct/bin, underscore separators,
3170            // explicit `+` — gets recorded.
3171            //
3172            // Lite mode skips the full canonical-comparison alloc up
3173            // front: no `original_forms` is kept, so the lexeme is
3174            // never needed.
3175            if !self.lite && self.record_forms {
3176                let canonical = n.to_string();
3177                if s != canonical {
3178                    self.record_form(OriginalLiteral::Integer { lit: s.to_string() });
3179                }
3180            }
3181            Ok(Value::Integer(n))
3182        }
3183    }
3184
3185    fn scan_number_token(&self) -> NumTok {
3186        let s = self.rest();
3187        let bytes = s.as_bytes();
3188        let mut i = 0usize;
3189        if matches!(bytes.get(0), Some(b'+') | Some(b'-')) {
3190            i += 1;
3191        }
3192        // detect base prefix
3193        let is_prefixed = bytes.get(i) == Some(&b'0')
3194            && matches!(bytes.get(i + 1).copied(), Some(b'x') | Some(b'o') | Some(b'b'));
3195        if is_prefixed {
3196            i += 2;
3197            // mantissa digits + underscores + possible '.'
3198            let mut saw_dot = false;
3199            let mut saw_p = false;
3200            while let Some(&b) = bytes.get(i) {
3201                if b == b'_' || is_base_digit(b, bytes[i - if saw_dot { 1 } else { 0 } - 1]) {
3202                    i += 1;
3203                } else if b == b'.' && !saw_dot && !saw_p {
3204                    saw_dot = true;
3205                    i += 1;
3206                } else if b == b'p' && !saw_p {
3207                    saw_p = true;
3208                    i += 1;
3209                    if matches!(bytes.get(i), Some(b'+') | Some(b'-')) {
3210                        i += 1;
3211                    }
3212                } else if saw_p && b.is_ascii_digit() {
3213                    i += 1;
3214                } else {
3215                    break;
3216                }
3217            }
3218            return NumTok { len: i, is_float: saw_dot || saw_p };
3219        }
3220        // decimal
3221        let mut saw_dot = false;
3222        let mut saw_e = false;
3223        while let Some(&b) = bytes.get(i) {
3224            if b.is_ascii_digit() || b == b'_' {
3225                i += 1;
3226            } else if b == b'.' && !saw_dot && !saw_e {
3227                saw_dot = true;
3228                i += 1;
3229            } else if (b == b'e' || b == b'E') && !saw_e {
3230                saw_e = true;
3231                i += 1;
3232                if matches!(bytes.get(i), Some(b'+') | Some(b'-')) {
3233                    i += 1;
3234                }
3235            } else {
3236                break;
3237            }
3238        }
3239        NumTok { len: i, is_float: saw_dot || saw_e }
3240    }
3241
3242    fn parse_datetime_value(&mut self) -> Result<Value, DecodeError> {
3243        let rest: String = self.rest().to_string();
3244        let date = rest[..10].to_string();
3245        validate_date(&date).map_err(|e| self.err(e))?;
3246        let rest2 = &rest[10..];
3247        if !rest2.starts_with('T') && !rest2.starts_with(' ') {
3248            if rest2.starts_with('t') {
3249                return Err(self.err("date and time separator must be uppercase 'T' (lowercase 't' not permitted)"));
3250            }
3251            if !Self::is_value_terminator(rest2.chars().next()) {
3252                return Err(self.err("invalid character after date"));
3253            }
3254            self.pos += 10;
3255            return Ok(Value::LocalDate(date));
3256        }
3257        if rest2.starts_with(' ') {
3258            let after_ws = rest2.trim_start_matches(|c: char| c == ' ' || c == '\t');
3259            if matches!(after_ws.chars().next(), Some(c) if c.is_ascii_digit()) {
3260                return Err(self.err("date and time must be separated by 'T' (space not permitted)"));
3261            }
3262            self.pos += 10;
3263            return Ok(Value::LocalDate(date));
3264        }
3265        let after_t = &rest2[1..];
3266        if !looks_like_time_prefix(after_t) {
3267            return Err(self.err("expected HH:MM:SS after 'T'"));
3268        }
3269        let time_str = &after_t[..8];
3270        validate_time(time_str).map_err(|e| self.err(e))?;
3271        let mut consumed = 10 + 1 + 8;
3272        let after_time = &rest[consumed..];
3273        let mut frac_len = 0usize;
3274        if after_time.starts_with('.') {
3275            let bytes = after_time.as_bytes();
3276            let mut k = 1usize;
3277            while k < bytes.len() && bytes[k].is_ascii_digit() {
3278                k += 1;
3279            }
3280            let digits = k - 1;
3281            if digits == 0 {
3282                return Err(self.err("expected fractional digits after '.'"));
3283            }
3284            if digits > 9 {
3285                return Err(self.err("fractional seconds limited to 9 digits (nanosecond precision)"));
3286            }
3287            frac_len = k;
3288        }
3289        consumed += frac_len;
3290        let after_frac = &rest[consumed..];
3291        if after_frac.starts_with('Z') || after_frac.starts_with('z') {
3292            consumed += 1;
3293            let s = rest[..consumed].to_string();
3294            self.pos += consumed;
3295            return Ok(Value::OffsetDateTime(s));
3296        }
3297        if after_frac.starts_with('+') || after_frac.starts_with('-') {
3298            let bytes = after_frac.as_bytes();
3299            if bytes.len() < 6
3300                || !bytes[1].is_ascii_digit()
3301                || !bytes[2].is_ascii_digit()
3302                || bytes[3] != b':'
3303                || !bytes[4].is_ascii_digit()
3304                || !bytes[5].is_ascii_digit()
3305            {
3306                return Err(self.err("invalid offset; expected ±HH:MM"));
3307            }
3308            let oh = (bytes[1] - b'0') * 10 + (bytes[2] - b'0');
3309            let om = (bytes[4] - b'0') * 10 + (bytes[5] - b'0');
3310            if oh > 23 || om > 59 {
3311                return Err(self.err("offset out of range"));
3312            }
3313            consumed += 6;
3314            let s = rest[..consumed].to_string();
3315            self.pos += consumed;
3316            return Ok(Value::OffsetDateTime(s));
3317        }
3318        // no offset → local-datetime
3319        if !Self::is_value_terminator(after_frac.chars().next()) {
3320            return Err(self.err("invalid character after datetime"));
3321        }
3322        let s = rest[..consumed].to_string();
3323        self.pos += consumed;
3324        Ok(Value::LocalDateTime(s))
3325    }
3326
3327    fn parse_local_time_value(&mut self) -> Result<Value, DecodeError> {
3328        let rest = self.rest();
3329        let time_str = &rest[..8];
3330        validate_time(time_str).map_err(|e| self.err(e))?;
3331        let mut consumed = 8usize;
3332        let after = &rest[consumed..];
3333        if after.starts_with('.') {
3334            let bytes = after.as_bytes();
3335            let mut k = 1usize;
3336            while k < bytes.len() && bytes[k].is_ascii_digit() {
3337                k += 1;
3338            }
3339            let digits = k - 1;
3340            if digits == 0 {
3341                return Err(self.err("expected fractional digits after '.'"));
3342            }
3343            if digits > 9 {
3344                return Err(self.err("fractional seconds limited to 9 digits"));
3345            }
3346            consumed += k;
3347        }
3348        let after2 = &rest[consumed..];
3349        if !Self::is_value_terminator(after2.chars().next()) {
3350            return Err(self.err("invalid character after time"));
3351        }
3352        let s = rest[..consumed].to_string();
3353        self.pos += consumed;
3354        Ok(Value::LocalTime(s))
3355    }
3356
3357    // ----- strings -----
3358
3359    fn parse_basic_string_value(&mut self) -> Result<String, DecodeError> {
3360        let start_line = self.line;
3361        let start_lstart = self.line_start;
3362        let start_pos = self.pos;
3363        self.bump(); // consume opening "
3364        let mut out = String::new();
3365        loop {
3366            match self.peek() {
3367                None => return Err(self.err_at(start_line, start_lstart, start_pos, "unterminated string")),
3368                Some('\n') | Some('\r') => return Err(self.err("strings cannot span lines")),
3369                Some('"') => {
3370                    self.bump();
3371                    // SPEC §Unicode normalization: re-NFC after escape
3372                    // decoding — `é` resolves to U+0065 U+0301
3373                    // which the source-level NFC pass never saw.
3374                    return Ok(nfc_normalize(&out));
3375                }
3376                Some('\\') => {
3377                    self.bump();
3378                    let esc_pos = self.pos;
3379                    match self.bump() {
3380                        Some('"') => out.push('"'),
3381                        Some('\\') => out.push('\\'),
3382                        Some('n') => out.push('\n'),
3383                        Some('t') => out.push('\t'),
3384                        Some('r') => out.push('\r'),
3385                        Some('b') => out.push('\u{0008}'),
3386                        Some('f') => out.push('\u{000c}'),
3387                        Some('u') => {
3388                            let cp = self.read_hex_codepoint(4, esc_pos)?;
3389                            out.push(cp);
3390                        }
3391                        Some('U') => {
3392                            let cp = self.read_hex_codepoint(8, esc_pos)?;
3393                            out.push(cp);
3394                        }
3395                        Some(c) => return Err(self.err(format!("invalid escape '\\{c}'"))),
3396                        None => return Err(self.err("unterminated escape")),
3397                    }
3398                }
3399                Some(c) => {
3400                    self.bump();
3401                    out.push(c);
3402                }
3403            }
3404        }
3405    }
3406
3407    fn parse_literal_string_value(&mut self) -> Result<String, DecodeError> {
3408        let start_line = self.line;
3409        let start_lstart = self.line_start;
3410        let start_pos = self.pos;
3411        self.bump(); // consume '
3412        let mut out = String::new();
3413        loop {
3414            match self.peek() {
3415                None => return Err(self.err_at(start_line, start_lstart, start_pos, "unterminated string")),
3416                Some('\n') | Some('\r') => return Err(self.err("strings cannot span lines")),
3417                Some('\'') => {
3418                    self.bump();
3419                    return Ok(out);
3420                }
3421                Some(c) => {
3422                    self.bump();
3423                    out.push(c);
3424                }
3425            }
3426        }
3427    }
3428
3429    fn read_hex_codepoint(&mut self, n: usize, _esc_pos: usize) -> Result<char, DecodeError> {
3430        let s = self.rest();
3431        if s.len() < n {
3432            return Err(self.err(format!("expected {n} hex digits in unicode escape")));
3433        }
3434        let hex = &s[..n];
3435        if !hex.bytes().all(|b| b.is_ascii_hexdigit()) {
3436            return Err(self.err(format!("invalid hex in unicode escape: {hex}")));
3437        }
3438        let v = u32::from_str_radix(hex, 16)
3439            .map_err(|_| self.err("invalid unicode escape"))?;
3440        // SPEC: U+0000 is forbidden anywhere in DMS source, including via
3441        // escape decoding. `` / `\U00000000` must not slip through.
3442        if v == 0 {
3443            return Err(self.err("\\u0000 escape forbidden"));
3444        }
3445        self.pos += n;
3446        char::from_u32(v).ok_or_else(|| self.err("unicode escape is not a scalar value"))
3447    }
3448
3449    // ----- heredocs -----
3450
3451    fn parse_heredoc_basic(&mut self) -> Result<String, DecodeError> {
3452        // consume opening """
3453        self.pos += 3;
3454        let label = self.parse_heredoc_label();
3455        let modifiers = self.parse_heredoc_modifiers()?;
3456        // rest of opener line must be only whitespace, then EOL
3457        self.skip_inline_ws();
3458        if !(self.consume_eol() || self.eof()) {
3459            return Err(self.err("heredoc opener must be followed by end of line"));
3460        }
3461        let label_opt = if label.is_empty() { None } else { Some(label.clone()) };
3462        let terminator = if label.is_empty() { "\"\"\"".to_string() } else { label };
3463        let body = self.collect_heredoc_body(&terminator, true)?;
3464        // SPEC §basic-string escapes: surrogate codepoints (U+D800..U+DFFF)
3465        // are not valid Unicode scalars and are a parse error in `\uXXXX` /
3466        // `\UXXXXXXXX` escapes. Basic-heredoc bodies process the same
3467        // escapes as basic strings, so apply the same rejection here.
3468        validate_heredoc_basic_surrogates(&body)?;
3469        let stripped = strip_indent_and_continuations(&body, true)?;
3470        let processed = apply_modifiers(stripped, &modifiers).map_err(|e| self.err(e))?;
3471        let calls: Vec<HeredocModifierCall> = modifiers
3472            .into_iter()
3473            .map(|m| HeredocModifierCall { name: m.name, args: m.args })
3474            .collect();
3475        self.record_form(OriginalLiteral::String {
3476            form: StringForm::Heredoc {
3477                flavor: HeredocFlavor::BasicTriple,
3478                label: label_opt,
3479                modifiers: calls,
3480            },
3481        });
3482        // SPEC §Unicode normalization: re-NFC after escape decoding —
3483        // basic-heredoc bodies process the same `\u`/`\U` escapes as
3484        // basic strings, so escape-decoded scalars need normalizing.
3485        Ok(nfc_normalize(&processed))
3486    }
3487
3488    fn parse_heredoc_literal(&mut self) -> Result<String, DecodeError> {
3489        self.pos += 3;
3490        let label = self.parse_heredoc_label();
3491        let modifiers = self.parse_heredoc_modifiers()?;
3492        self.skip_inline_ws();
3493        if !(self.consume_eol() || self.eof()) {
3494            return Err(self.err("heredoc opener must be followed by end of line"));
3495        }
3496        let label_opt = if label.is_empty() { None } else { Some(label.clone()) };
3497        let terminator = if label.is_empty() { "'''".to_string() } else { label };
3498        let body = self.collect_heredoc_body(&terminator, false)?;
3499        let stripped = strip_indent_and_continuations(&body, false)?;
3500        let processed = apply_modifiers(stripped, &modifiers).map_err(|e| self.err(e))?;
3501        let calls: Vec<HeredocModifierCall> = modifiers
3502            .into_iter()
3503            .map(|m| HeredocModifierCall { name: m.name, args: m.args })
3504            .collect();
3505        self.record_form(OriginalLiteral::String {
3506            form: StringForm::Heredoc {
3507                flavor: HeredocFlavor::LiteralTriple,
3508                label: label_opt,
3509                modifiers: calls,
3510            },
3511        });
3512        Ok(processed)
3513    }
3514
3515    fn parse_heredoc_label(&mut self) -> String {
3516        let start = self.pos;
3517        if let Some(c) = self.peek() {
3518            if !is_label_start(c) {
3519                return String::new();
3520            }
3521        } else {
3522            return String::new();
3523        }
3524        while let Some(c) = self.peek() {
3525            if is_label_cont(c) {
3526                self.bump();
3527            } else {
3528                break;
3529            }
3530        }
3531        self.src[start..self.pos].to_string()
3532    }
3533
3534    fn parse_heredoc_modifiers(&mut self) -> Result<Vec<HMod>, DecodeError> {
3535        let mut mods = Vec::new();
3536        loop {
3537            // require whitespace before each modifier
3538            let ws_start = self.pos;
3539            self.skip_inline_ws();
3540            let had_ws = self.pos > ws_start;
3541            match self.peek() {
3542                Some(c) if is_label_start(c) => {
3543                    if !had_ws {
3544                        return Err(self.err("modifier must be preceded by whitespace"));
3545                    }
3546                    let m = self.parse_one_modifier()?;
3547                    mods.push(m);
3548                }
3549                _ => {
3550                    self.pos = ws_start;
3551                    return Ok(mods);
3552                }
3553            }
3554        }
3555    }
3556
3557    fn parse_one_modifier(&mut self) -> Result<HMod, DecodeError> {
3558        let name_start = self.pos;
3559        while let Some(c) = self.peek() {
3560            if is_label_cont(c) {
3561                self.bump();
3562            } else {
3563                break;
3564            }
3565        }
3566        let name = self.src[name_start..self.pos].to_string();
3567        if self.peek() != Some('(') {
3568            return Err(self.err("modifiers require parentheses"));
3569        }
3570        self.bump();
3571        // Suppress original-form recording while parsing modifier args:
3572        // these values are consumed at parse time (passed into the modifier
3573        // call) and otherwise discarded — they must not pollute the host
3574        // heredoc node's `original_forms` slot. e.g. `_trim("\n", ">")`
3575        // should leave the heredoc's String form intact, not push entries
3576        // for the basic-string `","` or literal-string `">"` arguments.
3577        let prev_record = self.record_forms;
3578        self.record_forms = false;
3579        // parse args: comma-separated inline values
3580        let mut args = Vec::new();
3581        let result: Result<(), DecodeError> = (|| {
3582            loop {
3583                self.skip_inline_ws();
3584                if self.peek() == Some(')') {
3585                    self.bump();
3586                    return Ok(());
3587                }
3588                let v = self.parse_inline_value_or_heredoc()?;
3589                args.push(v);
3590                self.skip_inline_ws();
3591                match self.peek() {
3592                    Some(',') => { self.bump(); }
3593                    Some(')') => { self.bump(); return Ok(()); }
3594                    _ => return Err(self.err("expected ',' or ')' in modifier args")),
3595                }
3596            }
3597        })();
3598        self.record_forms = prev_record;
3599        result?;
3600        Ok(HMod { name, args })
3601    }
3602
3603    fn collect_heredoc_body(&mut self, terminator: &str, _allow_continuation: bool) -> Result<HBody, DecodeError> {
3604        let mut lines: Vec<HLine> = Vec::new();
3605        let opener_line = self.line;
3606        let opener_lstart = self.line_start;
3607        let opener_pos = self.pos;
3608        loop {
3609            if self.eof() {
3610                return Err(self.err_at(opener_line, opener_lstart, opener_pos, "unterminated heredoc"));
3611            }
3612            let line_begin = self.pos;
3613            // read until eol
3614            while let Some(c) = self.peek() {
3615                if c == '\n' || c == '\r' {
3616                    break;
3617                }
3618                self.bump();
3619            }
3620            let line_text = &self.src[line_begin..self.pos];
3621            let raw = line_text.to_string();
3622            let this_line = self.line;
3623            let this_line_start = self.line_start;
3624            if raw.trim() == terminator {
3625                // Leave the terminator's trailing EOL for the caller's
3626                // consume_after_value() to consume; otherwise we'd skip
3627                // past it and the next line's content gets misread.
3628                let strip_depth = raw.bytes().take_while(|&b| b == b' ').count();
3629                return Ok(HBody { lines, strip_depth });
3630            }
3631            // not the terminator — consume this line's EOL and continue
3632            let _consumed_eol = self.consume_eol();
3633            lines.push(HLine { text: raw, line: this_line, line_start: this_line_start });
3634        }
3635    }
3636
3637    // ----- flow forms -----
3638
3639    fn parse_flow_array(&mut self) -> Result<Vec<Value>, DecodeError> {
3640        self.bump(); // consume [
3641        let mut items = Vec::new();
3642        loop {
3643            self.skip_flow_ws()?;
3644            if self.peek() == Some(']') {
3645                self.bump();
3646                return Ok(items);
3647            }
3648            // Push the current index onto the path so decoration captures and
3649            // OriginalLiteral records get the right breadcrumb. The path is
3650            // popped after inner + value + trailing have all been processed.
3651            let idx = items.len();
3652            self.path.push(BreadcrumbSegment::Index(idx));
3653            // Tier-1: capture any inner decoration run before the value.
3654            if let Err(e) = self.capture_t1_decoration_run_flow(tier1::DecorationPosition::Inner) {
3655                self.path.pop();
3656                return Err(e);
3657            }
3658            let parsed = self.parse_inline_value_or_heredoc_in_flow();
3659            if let Err(e) = parsed {
3660                self.path.pop();
3661                return Err(e);
3662            }
3663            let v = parsed.unwrap();
3664            // Tier-1: capture any trailing decoration run after the value.
3665            if let Err(e) = self.capture_t1_decoration_run_flow(tier1::DecorationPosition::Trailing) {
3666                self.path.pop();
3667                return Err(e);
3668            }
3669            self.path.pop();
3670            items.push(v);
3671            self.skip_flow_ws()?;
3672            match self.peek() {
3673                Some(',') => { self.bump(); }
3674                Some(']') => { self.bump(); return Ok(items); }
3675                Some(c) => return Err(self.err(format!("unexpected '{c}' in flow array; expected ',' or ']'"))),
3676                None => return Err(self.err("unterminated flow array")),
3677            }
3678        }
3679    }
3680
3681    fn parse_flow_table(&mut self) -> Result<DmsMap<Value>, DecodeError> {
3682        self.bump(); // consume {
3683        let mut t: DmsMap<Value> = DmsMap::default();
3684        loop {
3685            self.skip_flow_ws()?;
3686            if self.peek() == Some('}') {
3687                self.bump();
3688                return Ok(t);
3689            }
3690            let key = self.parse_key()?;
3691            if self.peek() != Some(':') {
3692                return Err(self.err("expected ':' after flow-table key"));
3693            }
3694            self.bump();
3695            // require at least one space or newline (whitespace is insignificant in flow)
3696            if !matches!(self.peek(), Some(' ') | Some('\t') | Some('\n') | Some('\r')) {
3697                return Err(self.err("expected whitespace after ':'"));
3698            }
3699            self.skip_flow_ws()?;
3700            // Path push so decoration captures and OriginalLiteral records on
3701            // the value get attached to the key path.
3702            self.path.push(BreadcrumbSegment::Key(key.clone()));
3703            // Tier-1: capture any inner decoration run between ':' and value.
3704            if let Err(e) = self.capture_t1_decoration_run_flow(tier1::DecorationPosition::Inner) {
3705                self.path.pop();
3706                return Err(e);
3707            }
3708            let parsed = self.parse_inline_value_or_heredoc_in_flow();
3709            if let Err(e) = parsed {
3710                self.path.pop();
3711                return Err(e);
3712            }
3713            let v = parsed.unwrap();
3714            // Tier-1: capture any trailing decoration run after the value.
3715            if let Err(e) = self.capture_t1_decoration_run_flow(tier1::DecorationPosition::Trailing) {
3716                self.path.pop();
3717                return Err(e);
3718            }
3719            self.path.pop();
3720            if t.contains_key(&key) {
3721                return Err(self.err(format!("duplicate key: {key}")));
3722            }
3723            t.insert(key, v);
3724            self.skip_flow_ws()?;
3725            match self.peek() {
3726                Some(',') => { self.bump(); }
3727                Some('}') => { self.bump(); return Ok(t); }
3728                Some(c) => return Err(self.err(format!("unexpected '{c}' in flow table; expected ',' or '}}'"))),
3729                None => return Err(self.err("unterminated flow table")),
3730            }
3731        }
3732    }
3733
3734    /// Unordered counterpart of `parse_flow_table`. Same grammar; the
3735    /// only difference is the backing `HashMap` (no insertion-order
3736    /// tracking). See SPEC §"Unordered tables".
3737    fn parse_flow_table_unordered(&mut self) -> Result<DmsHashMap<Value>, DecodeError> {
3738        self.bump(); // consume {
3739        let mut t: DmsHashMap<Value> = DmsHashMap::default();
3740        loop {
3741            self.skip_flow_ws()?;
3742            if self.peek() == Some('}') {
3743                self.bump();
3744                return Ok(t);
3745            }
3746            let key = self.parse_key()?;
3747            if self.peek() != Some(':') {
3748                return Err(self.err("expected ':' after flow-table key"));
3749            }
3750            self.bump();
3751            if !matches!(self.peek(), Some(' ') | Some('\t') | Some('\n') | Some('\r')) {
3752                return Err(self.err("expected whitespace after ':'"));
3753            }
3754            self.skip_flow_ws()?;
3755            // Path push so decoration captures and OriginalLiteral records on
3756            // the value get attached to the key path.
3757            self.path.push(BreadcrumbSegment::Key(key.clone()));
3758            // Tier-1: capture any inner decoration run between ':' and value.
3759            if let Err(e) = self.capture_t1_decoration_run_flow(tier1::DecorationPosition::Inner) {
3760                self.path.pop();
3761                return Err(e);
3762            }
3763            let parsed = self.parse_inline_value_or_heredoc_in_flow();
3764            if let Err(e) = parsed {
3765                self.path.pop();
3766                return Err(e);
3767            }
3768            let v = parsed.unwrap();
3769            // Tier-1: capture any trailing decoration run after the value.
3770            if let Err(e) = self.capture_t1_decoration_run_flow(tier1::DecorationPosition::Trailing) {
3771                self.path.pop();
3772                return Err(e);
3773            }
3774            self.path.pop();
3775            if t.contains_key(&key) {
3776                return Err(self.err(format!("duplicate key: {key}")));
3777            }
3778            t.insert(key, v);
3779            self.skip_flow_ws()?;
3780            match self.peek() {
3781                Some(',') => { self.bump(); }
3782                Some('}') => { self.bump(); return Ok(t); }
3783                Some(c) => return Err(self.err(format!("unexpected '{c}' in flow table; expected ',' or '}}'"))),
3784                None => return Err(self.err("unterminated flow table")),
3785            }
3786        }
3787    }
3788
3789    /// Whitespace inside flow forms: spaces, tabs, newlines (LF/CRLF). NO comments.
3790    fn skip_flow_ws(&mut self) -> Result<(), DecodeError> {
3791        loop {
3792            match self.peek() {
3793                Some(' ') | Some('\t') => { self.bump(); }
3794                Some('\n') => { self.bump(); self.advance_line(); }
3795                Some('\r') if self.rest().starts_with("\r\n") => {
3796                    self.pos += 2; self.advance_line();
3797                }
3798                Some('#') => return Err(self.err("comments not allowed inside flow forms")),
3799                Some('/') if self.rest().starts_with("//") => return Err(self.err("comments not allowed inside flow forms")),
3800                Some('/') if self.rest().starts_with("/*") => return Err(self.err("comments not allowed inside flow forms")),
3801                _ => return Ok(()),
3802            }
3803        }
3804    }
3805
3806    /// Inside a flow form, heredocs are not permitted (would need newline-significant content).
3807    fn parse_inline_value_or_heredoc_in_flow(&mut self) -> Result<Value, DecodeError> {
3808        match self.peek() {
3809            Some('"') if self.rest().starts_with("\"\"\"") => {
3810                Err(self.err("heredocs are not allowed inside flow forms"))
3811            }
3812            Some('\'') if self.rest().starts_with("'''") => {
3813                Err(self.err("heredocs are not allowed inside flow forms"))
3814            }
3815            _ => self.parse_inline_value_or_heredoc(),
3816        }
3817    }
3818
3819    // ----- post-value -----
3820
3821    fn consume_after_value(&mut self, allow_eof: bool) -> Result<(), DecodeError> {
3822        // Same-line comment(s) after a value attach as `Trailing` on the
3823        // node currently identified by `self.path`. Callers (kvpair / list
3824        // item entry points) push the breadcrumb segment for that node
3825        // before parsing the value, so `self.path` is correct here.
3826        //
3827        // Multiple trailing comments may appear on one line, separated
3828        // by whitespace, e.g. `port: 3 /* a */ /* b */ # last`. Block
3829        // comments don't terminate the line; a `#` or `//` line comment
3830        // (if present) consumes to EOL and must therefore come last.
3831        //
3832        // Tier-1: consume decorator calls + interleaved block comments
3833        // before handing off to the existing trailing-comment logic.
3834        self.capture_t1_decoration_run(tier1::DecorationPosition::Trailing)?;
3835        let mut first = true;
3836        loop {
3837            let ws_start = self.pos;
3838            self.skip_inline_ws();
3839            let had_ws = self.pos > ws_start;
3840            let captured = match self.peek() {
3841                Some('#') if !self.rest().starts_with("###") => {
3842                    if !had_ws {
3843                        return Err(self.err("expected whitespace before '#' comment"));
3844                    }
3845                    let raw = self.read_line_comment_to_eol();
3846                    if !self.lite {
3847                        self.comments.push(AttachedComment {
3848                            comment: Comment { content: raw, kind: CommentKind::Line },
3849                            position: CommentPosition::Trailing,
3850                            path: self.path.clone(),
3851                        });
3852                    }
3853                    // Line comment consumes to EOL → no more trailing.
3854                    break;
3855                }
3856                Some('/') if self.rest().starts_with("//") => {
3857                    if !had_ws {
3858                        return Err(self.err("expected whitespace before '//' comment"));
3859                    }
3860                    let raw = self.read_line_comment_to_eol();
3861                    if !self.lite {
3862                        self.comments.push(AttachedComment {
3863                            comment: Comment { content: raw, kind: CommentKind::Line },
3864                            position: CommentPosition::Trailing,
3865                            path: self.path.clone(),
3866                        });
3867                    }
3868                    break;
3869                }
3870                Some('/') if self.rest().starts_with("/*") => {
3871                    let raw = self.read_c_block_comment()?;
3872                    if !self.lite {
3873                        self.comments.push(AttachedComment {
3874                            comment: Comment { content: raw, kind: CommentKind::Block },
3875                            position: CommentPosition::Trailing,
3876                            path: self.path.clone(),
3877                        });
3878                    }
3879                    true
3880                }
3881                _ => false,
3882            };
3883            if !captured {
3884                // No more comments — restore position past whitespace,
3885                // then require EOL/EOF below. (Whitespace before EOL is
3886                // benign; we leave it consumed.)
3887                let _ = first;
3888                break;
3889            }
3890            first = false;
3891        }
3892        // Now expect EOL, EOF, or — for nested contexts — nothing.
3893        match self.peek() {
3894            None => {
3895                if allow_eof { Ok(()) } else { Ok(()) }
3896            }
3897            Some('\n') => { self.bump(); self.advance_line(); Ok(()) }
3898            Some('\r') if self.rest().starts_with("\r\n") => {
3899                self.pos += 2; self.advance_line(); Ok(())
3900            }
3901            Some(c) => Err(self.err(format!("unexpected character '{c}' after value"))),
3902        }
3903    }
3904}
3905
3906// ---------- helper structs ----------
3907
3908struct NumTok {
3909    len: usize,
3910    is_float: bool,
3911}
3912
3913#[derive(Debug, Clone)]
3914struct HMod {
3915    name: String,
3916    args: Vec<Value>,
3917}
3918
3919struct HBody {
3920    lines: Vec<HLine>,
3921    strip_depth: usize,
3922}
3923
3924struct HLine {
3925    text: String,
3926    line: usize,
3927    line_start: usize,
3928}
3929
3930// ---------- number helpers ----------
3931
3932fn is_base_digit(b: u8, _prev_for_disambig: u8) -> bool {
3933    // Base-agnostic: any hex digit qualifies. Final base-correctness check
3934    // happens in parse_integer / parse_nondec_float. Note: 'p' is NOT a
3935    // hex digit (it's the binary exponent marker for non-decimal floats).
3936    b.is_ascii_hexdigit()
3937}
3938
3939fn parse_integer(s: &str) -> Result<i64, String> {
3940    let (sign, rest) = if let Some(r) = s.strip_prefix('-') { (-1i128, r) }
3941        else if let Some(r) = s.strip_prefix('+') { (1i128, r) }
3942        else { (1i128, s) };
3943    let (radix, body) = if let Some(r) = rest.strip_prefix("0x") {
3944        if rest.starts_with("0X") {
3945            return Err("hex prefix must be lowercase '0x'".into());
3946        }
3947        (16u32, r)
3948    } else if let Some(r) = rest.strip_prefix("0o") {
3949        (8u32, r)
3950    } else if let Some(r) = rest.strip_prefix("0b") {
3951        (2u32, r)
3952    } else {
3953        (10u32, rest)
3954    };
3955    if body.is_empty() {
3956        return Err("empty number".into());
3957    }
3958    if body.starts_with('_') || body.ends_with('_') {
3959        return Err("underscore must be between digits".into());
3960    }
3961    if radix == 10 && rest.len() > 1 && rest.starts_with('0') {
3962        return Err("leading zeros are not allowed on decimal integers".into());
3963    }
3964    // strip underscores, ensuring they're between digits
3965    let mut clean = String::with_capacity(body.len());
3966    let mut prev_is_digit = false;
3967    for c in body.chars() {
3968        if c == '_' {
3969            if !prev_is_digit {
3970                return Err("underscore must be between digits".into());
3971            }
3972            prev_is_digit = false;
3973        } else {
3974            if !c.is_digit(radix) {
3975                return Err(format!("invalid digit '{c}' for base {radix}"));
3976            }
3977            clean.push(c);
3978            prev_is_digit = true;
3979        }
3980    }
3981    if !prev_is_digit {
3982        return Err("underscore must be between digits".into());
3983    }
3984    let n = i128::from_str_radix(&clean, radix)
3985        .map_err(|_| "integer out of range".to_string())?;
3986    let signed = sign * n;
3987    if signed < i64::MIN as i128 || signed > i64::MAX as i128 {
3988        return Err("integer out of i64 range".into());
3989    }
3990    Ok(signed as i64)
3991}
3992
3993fn parse_float(s: &str) -> Option<f64> {
3994    let (sign, rest) = if let Some(r) = s.strip_prefix('-') { (-1.0f64, r) }
3995        else if let Some(r) = s.strip_prefix('+') { (1.0f64, r) }
3996        else { (1.0f64, s) };
3997    if rest.starts_with("0x") || rest.starts_with("0o") || rest.starts_with("0b") {
3998        return parse_nondec_float(rest).map(|v| sign * v);
3999    }
4000    parse_dec_float(rest).map(|v| sign * v)
4001}
4002
4003fn parse_dec_float(s: &str) -> Option<f64> {
4004    // require digit on both sides of '.'
4005    let (m, e) = if let Some(idx) = s.find(['e', 'E']) {
4006        (&s[..idx], Some(&s[idx + 1..]))
4007    } else {
4008        (s, None)
4009    };
4010    if !m.contains('.') {
4011        return None; // decimal float requires a dot in our spec slice
4012    }
4013    let parts: Vec<&str> = m.splitn(2, '.').collect();
4014    if parts.len() != 2 || parts[0].is_empty() || parts[1].is_empty() {
4015        return None;
4016    }
4017    if !parts[0].chars().all(|c| c.is_ascii_digit() || c == '_')
4018        || !parts[1].chars().all(|c| c.is_ascii_digit() || c == '_')
4019    {
4020        return None;
4021    }
4022    if !valid_underscores(parts[0]) || !valid_underscores(parts[1]) {
4023        return None;
4024    }
4025    let int_part: String = parts[0].chars().filter(|&c| c != '_').collect();
4026    let frac_part: String = parts[1].chars().filter(|&c| c != '_').collect();
4027    let mut full = format!("{int_part}.{frac_part}");
4028    if let Some(es) = e {
4029        let es_clean = es.trim_start_matches(['+', '-']);
4030        if es_clean.contains('_') {
4031            return None;
4032        }
4033        if !es.chars().all(|c| c.is_ascii_digit() || c == '+' || c == '-') {
4034            return None;
4035        }
4036        if es_clean.is_empty() {
4037            return None;
4038        }
4039        full.push('e');
4040        full.push_str(es);
4041    }
4042    full.parse::<f64>().ok()
4043}
4044
4045fn parse_nondec_float(s: &str) -> Option<f64> {
4046    // `0xMANT[.FRAC]pEXP`
4047    let (radix, rest) = if let Some(r) = s.strip_prefix("0x") { (16u32, r) }
4048        else if let Some(r) = s.strip_prefix("0o") { (8u32, r) }
4049        else if let Some(r) = s.strip_prefix("0b") { (2u32, r) }
4050        else { return None; };
4051    let p_idx = rest.find('p')?;
4052    let mant = &rest[..p_idx];
4053    let exp_str = &rest[p_idx + 1..];
4054    if exp_str.is_empty() {
4055        return None;
4056    }
4057    if exp_str.contains('_') {
4058        return None;
4059    }
4060    let exp_clean = exp_str.trim_start_matches(['+', '-']);
4061    if !exp_clean.chars().all(|c| c.is_ascii_digit()) {
4062        return None;
4063    }
4064    let exp: i32 = exp_str.parse().ok()?;
4065    // mantissa: digit on both sides of dot if dot present
4066    let (int_part, frac_part) = if let Some(idx) = mant.find('.') {
4067        (&mant[..idx], &mant[idx + 1..])
4068    } else {
4069        (mant, "")
4070    };
4071    if mant.contains('.') {
4072        if int_part.is_empty() || frac_part.is_empty() {
4073            return None;
4074        }
4075    }
4076    if !valid_underscores(int_part) || !valid_underscores(frac_part) {
4077        return None;
4078    }
4079    let int_clean: String = int_part.chars().filter(|&c| c != '_').collect();
4080    let frac_clean: String = frac_part.chars().filter(|&c| c != '_').collect();
4081    if int_clean.is_empty() && frac_clean.is_empty() {
4082        return None;
4083    }
4084    if !int_clean.chars().all(|c| c.is_digit(radix)) {
4085        return None;
4086    }
4087    if !frac_clean.chars().all(|c| c.is_digit(radix)) {
4088        return None;
4089    }
4090    let int_val = if int_clean.is_empty() {
4091        0u128
4092    } else {
4093        u128::from_str_radix(&int_clean, radix).ok()?
4094    };
4095    let mut frac_val = 0f64;
4096    let mut div = radix as f64;
4097    for c in frac_clean.chars() {
4098        let d = c.to_digit(radix)? as f64;
4099        frac_val += d / div;
4100        div *= radix as f64;
4101    }
4102    let mantissa = int_val as f64 + frac_val;
4103    Some(mantissa * (2f64).powi(exp))
4104}
4105
4106fn valid_underscores(s: &str) -> bool {
4107    if s.is_empty() {
4108        return true;
4109    }
4110    if s.starts_with('_') || s.ends_with('_') {
4111        return false;
4112    }
4113    let mut prev_us = false;
4114    for c in s.chars() {
4115        if c == '_' {
4116            if prev_us {
4117                return false;
4118            }
4119            prev_us = true;
4120        } else {
4121            prev_us = false;
4122        }
4123    }
4124    true
4125}
4126
4127// ---------- date/time validation ----------
4128
4129fn validate_date(s: &str) -> Result<(), String> {
4130    if s.len() != 10 {
4131        return Err("invalid date length".into());
4132    }
4133    let b = s.as_bytes();
4134    if b[4] != b'-' || b[7] != b'-' {
4135        return Err("invalid date format".into());
4136    }
4137    for &i in &[0, 1, 2, 3, 5, 6, 8, 9] {
4138        if !b[i].is_ascii_digit() {
4139            return Err("date must be all digits".into());
4140        }
4141    }
4142    let year: u16 = (b[0] - b'0') as u16 * 1000
4143        + (b[1] - b'0') as u16 * 100
4144        + (b[2] - b'0') as u16 * 10
4145        + (b[3] - b'0') as u16;
4146    let month: u8 = (b[5] - b'0') * 10 + (b[6] - b'0');
4147    let day: u8 = (b[8] - b'0') * 10 + (b[9] - b'0');
4148    if month < 1 || month > 12 {
4149        return Err("month out of range".into());
4150    }
4151    if day < 1 || day > days_in_month(year, month) {
4152        return Err("day out of range".into());
4153    }
4154    Ok(())
4155}
4156
4157fn validate_time(s: &str) -> Result<(), String> {
4158    if s.len() != 8 {
4159        return Err("invalid time length".into());
4160    }
4161    let b = s.as_bytes();
4162    if b[2] != b':' || b[5] != b':' {
4163        return Err("invalid time format".into());
4164    }
4165    for &i in &[0, 1, 3, 4, 6, 7] {
4166        if !b[i].is_ascii_digit() {
4167            return Err("time must be all digits".into());
4168        }
4169    }
4170    let hh: u8 = (b[0] - b'0') * 10 + (b[1] - b'0');
4171    let mm: u8 = (b[3] - b'0') * 10 + (b[4] - b'0');
4172    let ss: u8 = (b[6] - b'0') * 10 + (b[7] - b'0');
4173    if hh > 23 {
4174        return Err("hour out of range".into());
4175    }
4176    if mm > 59 {
4177        return Err("minute out of range".into());
4178    }
4179    if ss > 59 {
4180        return Err("second out of range (leap seconds not supported)".into());
4181    }
4182    Ok(())
4183}
4184
4185fn days_in_month(year: u16, month: u8) -> u8 {
4186    match month {
4187        1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
4188        4 | 6 | 9 | 11 => 30,
4189        2 => if is_leap(year) { 29 } else { 28 },
4190        _ => 0,
4191    }
4192}
4193
4194fn is_leap(y: u16) -> bool {
4195    (y % 4 == 0 && y % 100 != 0) || y % 400 == 0
4196}
4197
4198// ---------- heredoc body processing ----------
4199
4200/// SPEC §basic-string escapes: a `\uXXXX` / `\UXXXXXXXX` escape whose
4201/// decoded value falls in the surrogate range U+D800..U+DFFF is not a
4202/// Unicode scalar and is a parse error. The basic-string lexer
4203/// (`read_hex_codepoint`) already enforces this via `char::from_u32`,
4204/// which returns `None` for surrogates; basic-heredoc body lines are
4205/// collected raw here, so we validate the same rule by scanning the
4206/// body for surrogate escape sequences.
4207fn validate_heredoc_basic_surrogates(body: &HBody) -> Result<(), DecodeError> {
4208    for line in &body.lines {
4209        let bytes = line.text.as_bytes();
4210        let mut i = 0;
4211        while i < bytes.len() {
4212            // Only consider unescaped `\` — a `\\` is a literal
4213            // backslash and the following `u`/`U` is not an escape
4214            // introducer. Count preceding contiguous backslashes; an
4215            // odd count means this `\` is unescaped.
4216            if bytes[i] == b'\\' {
4217                // find the run of consecutive backslashes starting at i
4218                let mut j = i;
4219                while j < bytes.len() && bytes[j] == b'\\' {
4220                    j += 1;
4221                }
4222                let run = j - i;
4223                // pairs of `\\` consume themselves; only the last `\` of
4224                // an odd-length run is an escape introducer.
4225                if run % 2 == 1 && j < bytes.len() {
4226                    let intro = bytes[j];
4227                    let n = match intro {
4228                        b'u' => 4,
4229                        b'U' => 8,
4230                        _ => 0,
4231                    };
4232                    if n > 0 && j + 1 + n <= bytes.len() {
4233                        let hex = &line.text[j + 1..j + 1 + n];
4234                        if hex.bytes().all(|b| b.is_ascii_hexdigit()) {
4235                            if let Ok(cp) = u32::from_str_radix(hex, 16) {
4236                                if (0xD800..=0xDFFF).contains(&cp) {
4237                                    // column points at the leading `\`
4238                                    // of the escape (1-based, byte col).
4239                                    let esc_off = j - 1;
4240                                    return Err(DecodeError {
4241                                        line: line.line,
4242                                        column: esc_off + 1,
4243                                        message: format!(
4244                                            "surrogate codepoint U+{cp:04X} in escape"
4245                                        ),
4246                                    });
4247                                }
4248                            }
4249                        }
4250                    }
4251                }
4252                i = j;
4253            } else {
4254                i += 1;
4255            }
4256        }
4257    }
4258    Ok(())
4259}
4260
4261fn strip_indent_and_continuations(body: &HBody, allow_continuation: bool) -> Result<String, DecodeError> {
4262    let mut out = String::new();
4263    let mut first = true;
4264    let mut pending_continuation = false;
4265    let mut last_line_pos = (1usize, 0usize); // for trailing-continuation error
4266    for line in &body.lines {
4267        let raw = &line.text;
4268        last_line_pos = (line.line, line.line_start);
4269        // blank lines (only whitespace) are exempt from strip-depth check
4270        let is_blank = raw.bytes().all(|b| b == b' ' || b == b'\t');
4271        let stripped: &str = if is_blank {
4272            ""
4273        } else {
4274            // count leading spaces; must be >= strip_depth
4275            let leading_spaces = raw.bytes().take_while(|&b| b == b' ').count();
4276            if leading_spaces < body.strip_depth {
4277                return Err(DecodeError {
4278                    line: line.line,
4279                    column: leading_spaces + 1,
4280                    message: format!(
4281                        "heredoc body line indented {} spaces, less than strip depth {}",
4282                        leading_spaces, body.strip_depth
4283                    ),
4284                });
4285            }
4286            &raw[body.strip_depth..]
4287        };
4288        let mut piece = stripped.to_string();
4289        // line continuation: a `\` that is the last non-whitespace char
4290        let mut splice = false;
4291        if allow_continuation {
4292            // scan for trailing \ that is not preceded by an unescaped escape
4293            let trimmed_end = piece.trim_end_matches(|c: char| c == ' ' || c == '\t').to_string();
4294            if let Some(last_idx) = trimmed_end.rfind('\\') {
4295                if last_idx == trimmed_end.len() - 1 {
4296                    // count preceding backslashes
4297                    let preceding = trimmed_end[..last_idx]
4298                        .bytes()
4299                        .rev()
4300                        .take_while(|&b| b == b'\\')
4301                        .count();
4302                    if preceding % 2 == 0 {
4303                        // unescaped trailing backslash -> continuation
4304                        // remove the trailing \ and any following ws
4305                        piece = trimmed_end[..last_idx].to_string();
4306                        splice = true;
4307                    }
4308                }
4309            }
4310        }
4311        if first {
4312            out.push_str(&piece);
4313            first = false;
4314        } else if pending_continuation {
4315            // splice into previous: skip leading ws of this line
4316            let trimmed_start = piece.trim_start_matches(|c: char| c == ' ' || c == '\t');
4317            // also: a blank line during continuation is consumed entirely
4318            if !is_blank {
4319                out.push_str(trimmed_start);
4320            }
4321        } else {
4322            out.push('\n');
4323            out.push_str(&piece);
4324        }
4325        pending_continuation = splice;
4326    }
4327    if pending_continuation {
4328        return Err(DecodeError {
4329            line: last_line_pos.0,
4330            column: 1,
4331            message: "trailing line continuation has nothing to splice to".into(),
4332        });
4333    }
4334    Ok(out)
4335}
4336
4337fn apply_modifiers(s: String, mods: &[HMod]) -> Result<String, String> {
4338    let mut cur = s;
4339    for m in mods {
4340        match m.name.as_str() {
4341            "_fold_paragraphs" => {
4342                if !m.args.is_empty() {
4343                    return Err("fold_paragraphs() takes no arguments".into());
4344                }
4345                cur = fold_paragraphs(&cur);
4346            }
4347            "_trim" => {
4348                if m.args.len() < 2 || m.args.len() > 3 {
4349                    return Err("trim(chars, where, replacement = \"\") expects 2 or 3 arguments".into());
4350                }
4351                let chars = match &m.args[0] {
4352                    Value::String(s) => s.clone(),
4353                    _ => return Err("trim: first argument (chars) must be a string".into()),
4354                };
4355                let where_s = match &m.args[1] {
4356                    Value::String(s) => s.clone(),
4357                    _ => return Err("trim: second argument (where) must be a string".into()),
4358                };
4359                let replacement = if m.args.len() == 3 {
4360                    match &m.args[2] {
4361                        Value::String(s) => s.clone(),
4362                        _ => return Err("trim: third argument (replacement) must be a string".into()),
4363                    }
4364                } else {
4365                    String::new()
4366                };
4367                cur = apply_trim(&cur, &chars, &where_s, &replacement);
4368            }
4369            other => return Err(format!("unknown modifier: {other}")),
4370        }
4371    }
4372    Ok(cur)
4373}
4374
4375fn fold_paragraphs(s: &str) -> String {
4376    // Split by double-newline (paragraph breaks); within each paragraph,
4377    // join lines with single space.
4378    let paragraphs: Vec<&str> = s.split("\n\n").collect();
4379    let folded: Vec<String> = paragraphs
4380        .iter()
4381        .map(|p| {
4382            let lines: Vec<&str> = p.split('\n').collect();
4383            lines
4384                .iter()
4385                .filter(|l| !l.is_empty())
4386                .copied()
4387                .collect::<Vec<&str>>()
4388                .join(" ")
4389        })
4390        .collect();
4391    folded.join("\n")
4392}
4393
4394/// Swiss-army trim/replace.
4395///
4396/// `chars` = bag of characters to match (any single char in this string).
4397/// `where_s` = DSL flags. Recognized: `<` leading (whole string), `>`
4398/// trailing (whole string), `|` per-line edges, `*` every occurrence.
4399/// Unknown flags are silently ignored for forward compatibility.
4400/// `replacement` = what each run of matching chars becomes.
4401///
4402/// **Run collapse**: consecutive matching chars become one replacement,
4403/// not one per char.
4404///
4405/// If `*` is set, the whole-string and per-line flags become redundant
4406/// (every run anywhere is replaced). Otherwise, flags are applied in a
4407/// defined order: per-line (`|`) first, then leading (`<`), then
4408/// trailing (`>`).
4409fn apply_trim(s: &str, chars: &str, where_s: &str, replacement: &str) -> String {
4410    if chars.is_empty() {
4411        return s.to_string();
4412    }
4413    let char_set: std::collections::HashSet<char> = chars.chars().collect();
4414    let has_star = where_s.contains('*');
4415    let has_pipe = where_s.contains('|');
4416    let has_lt = where_s.contains('<');
4417    let has_gt = where_s.contains('>');
4418    if !(has_star || has_pipe || has_lt || has_gt) {
4419        return s.to_string();
4420    }
4421    if has_star {
4422        return replace_all_runs(s, &char_set, replacement);
4423    }
4424    let mut cur = s.to_string();
4425    if has_pipe {
4426        cur = per_line_edges(&cur, &char_set, replacement);
4427    }
4428    if has_lt {
4429        cur = replace_leading_run(&cur, &char_set, replacement);
4430    }
4431    if has_gt {
4432        cur = replace_trailing_run(&cur, &char_set, replacement);
4433    }
4434    cur
4435}
4436
4437fn replace_all_runs(s: &str, char_set: &std::collections::HashSet<char>, replacement: &str) -> String {
4438    let mut out = String::with_capacity(s.len());
4439    let mut chars = s.chars().peekable();
4440    while let Some(c) = chars.next() {
4441        if char_set.contains(&c) {
4442            while let Some(&nc) = chars.peek() {
4443                if char_set.contains(&nc) {
4444                    chars.next();
4445                } else {
4446                    break;
4447                }
4448            }
4449            out.push_str(replacement);
4450        } else {
4451            out.push(c);
4452        }
4453    }
4454    out
4455}
4456
4457fn replace_leading_run(s: &str, char_set: &std::collections::HashSet<char>, replacement: &str) -> String {
4458    let mut end = 0;
4459    for (i, c) in s.char_indices() {
4460        if char_set.contains(&c) {
4461            end = i + c.len_utf8();
4462        } else {
4463            break;
4464        }
4465    }
4466    if end == 0 {
4467        return s.to_string();
4468    }
4469    let mut out = String::with_capacity(s.len());
4470    out.push_str(replacement);
4471    out.push_str(&s[end..]);
4472    out
4473}
4474
4475fn replace_trailing_run(s: &str, char_set: &std::collections::HashSet<char>, replacement: &str) -> String {
4476    let mut start = s.len();
4477    for (i, c) in s.char_indices().rev() {
4478        if char_set.contains(&c) {
4479            start = i;
4480        } else {
4481            break;
4482        }
4483    }
4484    if start == s.len() {
4485        return s.to_string();
4486    }
4487    let mut out = String::with_capacity(s.len());
4488    out.push_str(&s[..start]);
4489    out.push_str(replacement);
4490    out
4491}
4492
4493fn per_line_edges(s: &str, char_set: &std::collections::HashSet<char>, replacement: &str) -> String {
4494    let mut out = String::with_capacity(s.len());
4495    let mut lines = s.split('\n').peekable();
4496    let mut first = true;
4497    while let Some(line) = lines.next() {
4498        if !first {
4499            out.push('\n');
4500        }
4501        first = false;
4502        let trimmed_start = replace_leading_run(line, char_set, replacement);
4503        let trimmed = replace_trailing_run(&trimmed_start, char_set, replacement);
4504        out.push_str(&trimmed);
4505    }
4506    out
4507}
4508
4509// ---------- encode emitter ----------
4510
4511/// Re-emit a decoded `Document` as DMS source. See SPEC §encode.
4512///
4513/// Contract: `decode(encode(decode(source)))` is data-equivalent to
4514/// `decode(source)`, has the same comments at the same attached paths,
4515/// and uses the same literal forms for values where preserved
4516/// (integer base, string form). Float formatting is `ryu`-shortest;
4517/// indentation is 2 spaces; lists/tables default to block form.
4518///
4519/// Round-trip stability: `encode(decode(encode(decode(source))))` is
4520/// byte-equal to `encode(decode(source))`.
4521///
4522/// Returns `Err(EncodeError::UnorderedInFullMode)` if the `Document`'s
4523/// body contains any `Value::UnorderedTable` (built only by the
4524/// `*_unordered` decoder entry points). Per SPEC §"Unordered tables",
4525/// a full-mode round-trip requires a stable iteration order; an
4526/// unordered Document cannot satisfy that contract. Use `encode_lite`
4527/// for canonical emit on unordered Documents.
4528pub fn encode(doc: &Document) -> Result<String, EncodeError> {
4529    if contains_unordered_table(&doc.body) {
4530        return Err(EncodeError::UnorderedInFullMode);
4531    }
4532    let mut emitter = DmsEmitter::new(doc);
4533    emitter.emit_document();
4534    Ok(emitter.out)
4535}
4536
4537/// Recursively checks whether a `Value` tree contains any
4538/// `Value::UnorderedTable`. Used by `encode` to enforce the SPEC
4539/// contract that full-mode emit refuses unordered documents.
4540fn contains_unordered_table(v: &Value) -> bool {
4541    match v {
4542        Value::UnorderedTable(_) => true,
4543        Value::Table(t) => t.values().any(contains_unordered_table),
4544        Value::List(items) => items.iter().any(contains_unordered_table),
4545        _ => false,
4546    }
4547}
4548
4549/// Lite-mode `encode` — emits the same data tree in **canonical
4550/// form**: comments are dropped, integers are emitted in decimal
4551/// regardless of source base, strings are emitted in basic-quoted
4552/// form regardless of source flavour. Accepts both full-mode and
4553/// lite-mode decoded `Document`s — the `comments` and `original_forms`
4554/// fields are simply ignored. See SPEC §encode.
4555///
4556/// Round-trip stability for lite-mode emit is **data-only**:
4557/// `decode(encode_lite(doc))` must be data-equivalent to `doc`. It is
4558/// **lossy by design** for comments and source forms.
4559pub fn encode_lite(doc: &Document) -> String {
4560    let mut emitter = DmsEmitter::new_lite(doc);
4561    emitter.emit_document();
4562    emitter.out
4563}
4564
4565/// Mirror of `ParseMode` for the emitter side.
4566#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4567pub enum EmitMode {
4568    /// Default. Re-emits comments + `original_forms` for full
4569    /// round-trip preservation. Requires a full-mode-decoded
4570    /// `Document` (or one constructed in code with those fields
4571    /// populated).
4572    Full,
4573    /// Canonical-form emit. Drops comments + `original_forms` even
4574    /// when present. Accepts any `Document`.
4575    Lite,
4576}
4577
4578/// Mode-parameterised `encode`. `EmitMode::Full` matches `encode()`
4579/// (and can fail with [`EncodeError::UnorderedInFullMode`]);
4580/// `EmitMode::Lite` matches `encode_lite()` (always `Ok`). See SPEC
4581/// §encode.
4582pub fn encode_with_mode(doc: &Document, mode: EmitMode) -> Result<String, EncodeError> {
4583    match mode {
4584        EmitMode::Full => encode(doc),
4585        EmitMode::Lite => Ok(encode_lite(doc)),
4586    }
4587}
4588
4589/// Tier-1 encode entry point — like `encode` but also emits tier-1
4590/// decorator calls at their recorded positions. Used by `tier1::encode_t1`.
4591#[allow(dead_code)]
4592pub(crate) fn encode_with_decorators(
4593    doc: &Document,
4594    decorators: &[tier1::DecoratorEntry],
4595) -> Result<String, EncodeError> {
4596    encode_with_decorators_and_suppressed(doc, decorators, std::collections::HashSet::new())
4597}
4598
4599/// Like [`encode_with_decorators`] but also accepts a set of paths whose
4600/// body value emission is suppressed. At suppressed paths the encoder emits
4601/// the decorator call(s) only — no `key: value` / `+ value` token — so that
4602/// Inner decoration-only nodes round-trip as `key: |dec` (no redundant `{}` or
4603/// other `empty_default` body). Used by `tier1::encode_t1_with_registry`.
4604pub(crate) fn encode_with_decorators_and_suppressed(
4605    doc: &Document,
4606    decorators: &[tier1::DecoratorEntry],
4607    suppressed_paths: std::collections::HashSet<Vec<BreadcrumbSegment>>,
4608) -> Result<String, EncodeError> {
4609    if contains_unordered_table(&doc.body) {
4610        return Err(EncodeError::UnorderedInFullMode);
4611    }
4612    let comments_by_path = SortedIndex::from_comments(&doc.comments);
4613    let forms_by_path = SortedIndex::from_forms(&doc.original_forms);
4614    let decorators_by_path = SortedIndex::from_decorators(decorators);
4615    let mut emitter = DmsEmitter {
4616        out: String::new(),
4617        comments_by_path,
4618        forms_by_path,
4619        decorators_by_path,
4620        suppressed_paths,
4621        lite: false,
4622        doc,
4623    };
4624    emitter.emit_document();
4625    Ok(emitter.out)
4626}
4627
4628// ---------- deprecated `to_dms*` aliases (SPEC v0.14 rename) ----------
4629//
4630// SPEC v0.14 renamed `to_dms` → `encode`. The old names live on for
4631// one release as deprecated thin wrappers — slated for removal in
4632// v0.15. See PORTING.md §"Migration from the `parse`/`to_dms` era".
4633
4634/// Deprecated alias for [`encode`]. Removed in v0.4.
4635#[deprecated(since = "0.3.0", note = "use `encode` instead")]
4636pub fn to_dms(doc: &Document) -> Result<String, EncodeError> {
4637    encode(doc)
4638}
4639
4640/// Deprecated alias for [`encode_lite`]. Removed in v0.4.
4641#[deprecated(since = "0.3.0", note = "use `encode_lite` instead")]
4642pub fn to_dms_lite(doc: &Document) -> String {
4643    encode_lite(doc)
4644}
4645
4646/// Deprecated alias for [`encode_with_mode`]. Removed in v0.4.
4647#[deprecated(since = "0.3.0", note = "use `encode_with_mode` instead")]
4648pub fn to_dms_with_mode(doc: &Document, mode: EmitMode) -> Result<String, EncodeError> {
4649    encode_with_mode(doc, mode)
4650}
4651
4652/// Sorted index of `(path, value)` pairs with `O(log n)` lookup.
4653///
4654/// Replaces the previous `HashMap<Vec<BreadcrumbSegment>, V>` design,
4655/// which paid a `Vec<BreadcrumbSegment>` clone (and a sip-hash) per
4656/// entry on insert. On a comment-heavy config (30k comments) those
4657/// clones + hashes dominated `encode` wall time. The sorted-Vec
4658/// design borrows path slices directly from `doc.comments` /
4659/// `doc.original_forms` — zero allocations during build, lookup is
4660/// `binary_search_by`.
4661///
4662/// Same shape `dms-c` uses (see `sort_indices` in `dms.c`).
4663struct SortedIndex<'a, V> {
4664    /// Sorted-ascending by path.
4665    entries: Vec<(&'a [BreadcrumbSegment], V)>,
4666}
4667
4668impl<'a, V> SortedIndex<'a, V> {
4669    fn empty() -> Self {
4670        Self { entries: Vec::new() }
4671    }
4672
4673    /// Look up by exact-path match. Equivalent to `HashMap::get`.
4674    fn get(&self, query: &[BreadcrumbSegment]) -> Option<&V> {
4675        match self.entries.binary_search_by(|(p, _)| (*p).cmp(query)) {
4676            Ok(i) => Some(&self.entries[i].1),
4677            Err(_) => None,
4678        }
4679    }
4680}
4681
4682impl<'a> SortedIndex<'a, NodeComments<'a>> {
4683    /// Build from `doc.comments`. Groups by path; within a group,
4684    /// comments retain their original `doc.comments` order (the sort
4685    /// is stable on path, so equal paths preserve source order).
4686    fn from_comments(comments: &'a [AttachedComment]) -> Self {
4687        if comments.is_empty() {
4688            return Self::empty();
4689        }
4690        let mut idx: Vec<usize> = (0..comments.len()).collect();
4691        idx.sort_by(|&a, &b| comments[a].path.cmp(&comments[b].path));
4692        let mut entries: Vec<(&'a [BreadcrumbSegment], NodeComments<'a>)> =
4693            Vec::with_capacity(comments.len());
4694        for i in idx {
4695            let ac = &comments[i];
4696            let path: &'a [BreadcrumbSegment] = ac.path.as_slice();
4697            let same_as_prev = entries.last().is_some_and(|(p, _)| *p == path);
4698            if !same_as_prev {
4699                entries.push((path, NodeComments::default()));
4700            }
4701            let nc = &mut entries.last_mut().unwrap().1;
4702            match ac.position {
4703                CommentPosition::Leading => nc.leading.push(&ac.comment),
4704                CommentPosition::Inner => nc.inner.push(&ac.comment),
4705                CommentPosition::Trailing => nc.trailing.push(&ac.comment),
4706                CommentPosition::Floating => nc.floating.push(&ac.comment),
4707            }
4708        }
4709        // Capacity was over-allocated (one entry per comment, then
4710        // merged). Trim — these arrays live the whole emit.
4711        entries.shrink_to_fit();
4712        Self { entries }
4713    }
4714}
4715
4716impl<'a> SortedIndex<'a, &'a OriginalLiteral> {
4717    /// Build from `doc.original_forms`. Sorts by path; first entry
4718    /// wins on collision (matches the HashMap impl's `or_insert`).
4719    fn from_forms(forms: &'a [(Vec<BreadcrumbSegment>, OriginalLiteral)]) -> Self {
4720        if forms.is_empty() {
4721            return Self::empty();
4722        }
4723        let mut idx: Vec<usize> = (0..forms.len()).collect();
4724        idx.sort_by(|&a, &b| forms[a].0.cmp(&forms[b].0));
4725        let mut entries: Vec<(&'a [BreadcrumbSegment], &'a OriginalLiteral)> =
4726            Vec::with_capacity(forms.len());
4727        for i in idx {
4728            let (p, lit) = &forms[i];
4729            let path: &'a [BreadcrumbSegment] = p.as_slice();
4730            // First wins on path collision — skip duplicates.
4731            if entries.last().is_some_and(|(prev, _)| *prev == path) {
4732                continue;
4733            }
4734            entries.push((path, lit));
4735        }
4736        entries.shrink_to_fit();
4737        Self { entries }
4738    }
4739}
4740
4741impl<'a> SortedIndex<'a, NodeDecorations<'a>> {
4742    /// Build from a slice of `DecoratorEntry`. Groups by path;
4743    /// within a group, retains source order (sigil-map order, then
4744    /// within-sigil vec order).
4745    fn from_decorators(decorators: &'a [tier1::DecoratorEntry]) -> Self {
4746        if decorators.is_empty() {
4747            return Self::empty();
4748        }
4749        // Collect (path_slice, position, call_ref) triples.
4750        // We don't sort by position here — we bucket into per-position
4751        // vecs in a second pass, preserving the per-path source order
4752        // within each position bucket.
4753        let mut path_indices: Vec<usize> = (0..decorators.len()).collect();
4754        path_indices.sort_by(|&a, &b| decorators[a].path.cmp(&decorators[b].path));
4755
4756        let mut entries: Vec<(&'a [BreadcrumbSegment], NodeDecorations<'a>)> =
4757            Vec::with_capacity(decorators.len());
4758
4759        for i in path_indices {
4760            let entry = &decorators[i];
4761            let path: &'a [BreadcrumbSegment] = entry.path.as_slice();
4762            let same_as_prev = entries.last().is_some_and(|(p, _)| *p == path);
4763            if !same_as_prev {
4764                entries.push((path, NodeDecorations::default()));
4765            }
4766            let nd = &mut entries.last_mut().unwrap().1;
4767            // Iterate sigils in IndexMap order, then within-sigil vec order.
4768            for (_sigil, calls) in &entry.decorators {
4769                for call in calls {
4770                    match call.position {
4771                        tier1::DecorationPosition::Leading => nd.leading.push(call),
4772                        tier1::DecorationPosition::Inner => nd.inner.push(call),
4773                        tier1::DecorationPosition::Trailing => nd.trailing.push(call),
4774                        tier1::DecorationPosition::Floating => nd.floating.push(call),
4775                    }
4776                }
4777            }
4778        }
4779        entries.shrink_to_fit();
4780        Self { entries }
4781    }
4782}
4783
4784struct DmsEmitter<'a> {
4785    out: String,
4786    /// Path → `NodeComments` (per-position comment slices into
4787    /// `doc.comments`). Empty in lite mode. See `SortedIndex` doc for
4788    /// why this isn't a HashMap.
4789    comments_by_path: SortedIndex<'a, NodeComments<'a>>,
4790    /// Path → `&OriginalLiteral`. Empty in lite mode.
4791    forms_by_path: SortedIndex<'a, &'a OriginalLiteral>,
4792    /// Path → `NodeDecorations` (per-position decorator-call slices).
4793    /// Empty in tier-0 emit.
4794    decorators_by_path: SortedIndex<'a, NodeDecorations<'a>>,
4795    /// Paths whose body emission is suppressed. For an Inner-position
4796    /// decoration-only node where the body value equals the family's
4797    /// `empty_default`, we emit `key: |dec` (or `+ |dec`) and skip
4798    /// the value token entirely. Set by `encode_t1_with_registry` when
4799    /// it detects such paths; empty for all tier-0 and unregistered-
4800    /// tier-1 emit paths.
4801    suppressed_paths: std::collections::HashSet<Vec<BreadcrumbSegment>>,
4802    /// Lite mode (canonical-form emit) — drops comments and
4803    /// `original_forms` even when present in `doc`. See SPEC §encode.
4804    lite: bool,
4805    doc: &'a Document,
4806}
4807
4808#[derive(Default)]
4809struct NodeComments<'a> {
4810    leading: Vec<&'a Comment>,
4811    inner: Vec<&'a Comment>,
4812    trailing: Vec<&'a Comment>,
4813    floating: Vec<&'a Comment>,
4814}
4815
4816#[derive(Default)]
4817struct NodeDecorations<'a> {
4818    leading: Vec<&'a tier1::DecoratorCall>,
4819    inner: Vec<&'a tier1::DecoratorCall>,
4820    trailing: Vec<&'a tier1::DecoratorCall>,
4821    floating: Vec<&'a tier1::DecoratorCall>,
4822}
4823
4824const INDENT_STR: &str = "  ";
4825
4826impl<'a> DmsEmitter<'a> {
4827    fn new(doc: &'a Document) -> Self {
4828        Self {
4829            out: String::new(),
4830            comments_by_path: SortedIndex::from_comments(&doc.comments),
4831            forms_by_path: SortedIndex::from_forms(&doc.original_forms),
4832            decorators_by_path: SortedIndex::empty(),
4833            suppressed_paths: std::collections::HashSet::new(),
4834            lite: false,
4835            doc,
4836        }
4837    }
4838
4839    /// Lite-mode constructor — empty comment + form lookups, so the
4840    /// walker emits canonical form (no comments, decimal integers,
4841    /// basic-quoted strings) even when `doc.comments` and
4842    /// `doc.original_forms` are populated. See SPEC §encode.
4843    fn new_lite(doc: &'a Document) -> Self {
4844        Self {
4845            out: String::new(),
4846            comments_by_path: SortedIndex::empty(),
4847            forms_by_path: SortedIndex::empty(),
4848            decorators_by_path: SortedIndex::empty(),
4849            suppressed_paths: std::collections::HashSet::new(),
4850            lite: true,
4851            doc,
4852        }
4853    }
4854
4855    fn emit_document(&mut self) {
4856        // Front matter: emit the `+++` block whenever the parsed doc
4857        // carried one (meta=Some(...)), even if it's empty. The SPEC
4858        // §encode allows omitting an empty-meta block with no comments,
4859        // but round-trip data equivalence is cleaner when we preserve
4860        // the `Some({})` ↔ `None` distinction, since the canonical JSON
4861        // encoder keys off `meta.is_some()`. Emitting `+++\n+++\n` for
4862        // an empty-meta doc with no FM comments gives byte-stable round
4863        // trips through parse/emit.
4864        // Lite mode emits no comments, so FM comments don't force
4865        // a `+++` block — only an explicit `meta = Some(...)` does.
4866        let has_fm_comments = !self.lite && self.doc.comments.iter().any(|ac| {
4867            matches!(ac.path.first(), Some(BreadcrumbSegment::Key(k)) if k == "__fm__")
4868        });
4869        let fm_present = self.doc.meta.is_some();
4870        if fm_present || has_fm_comments {
4871            self.out.push_str("+++\n");
4872            // Emit FM keys at indent 0, with paths prefixed by __fm__.
4873            let fm_path: Vec<BreadcrumbSegment> = vec![BreadcrumbSegment::Key("__fm__".to_string())];
4874            if let Some(meta) = &self.doc.meta {
4875                self.emit_table_block(meta, &fm_path, 0);
4876            } else {
4877                // No meta keys, only floating FM comments — emit them at
4878                // the FM root.
4879                self.emit_floating(&fm_path, 0);
4880            }
4881            self.out.push_str("+++\n\n");
4882        }
4883        // Body
4884        let body_path: Vec<BreadcrumbSegment> = Vec::new();
4885        match &self.doc.body {
4886            Value::Table(t) => self.emit_table_block(t, &body_path, 0),
4887            Value::UnorderedTable(t) => {
4888                // Lite-mode emit on an unordered Document: iterate the
4889                // HashMap directly. Full-mode emit is gated upstream by
4890                // `encode()`, which returns
4891                // `Err(EncodeError::UnorderedInFullMode)` before we get
4892                // here.
4893                self.emit_unordered_table_block(t, &body_path, 0);
4894            }
4895            Value::List(items) => self.emit_list_block(items, &body_path, 0),
4896            other => {
4897                // Scalar root: emit any leading comments, then the value
4898                // on its own line, then trailing.
4899                let leading: Vec<&Comment> = self
4900                    .comments_by_path
4901                    .get(&body_path)
4902                    .map(|nc| nc.leading.iter().copied().collect())
4903                    .unwrap_or_default();
4904                for c in leading {
4905                    self.emit_comment_line(c, 0);
4906                }
4907                self.emit_value_inline(other, &body_path);
4908                self.emit_trailing_for(&body_path);
4909                self.out.push('\n');
4910                let floating: Vec<&Comment> = self
4911                    .comments_by_path
4912                    .get(&body_path)
4913                    .map(|nc| nc.floating.iter().copied().collect())
4914                    .unwrap_or_default();
4915                for c in floating {
4916                    self.emit_comment_line(c, 0);
4917                }
4918            }
4919        }
4920    }
4921
4922    fn emit_table_block(
4923        &mut self,
4924        t: &DmsMap<Value>,
4925        path: &[BreadcrumbSegment],
4926        indent: usize,
4927    ) {
4928        for (k, v) in t {
4929            let mut child_path: Vec<BreadcrumbSegment> = path.to_vec();
4930            child_path.push(BreadcrumbSegment::Key(k.clone()));
4931            // Leading comments for this child node
4932            if let Some(nc) = self.comments_by_path.get(&child_path) {
4933                let leading: Vec<&Comment> = nc.leading.iter().copied().collect();
4934                for c in leading {
4935                    self.emit_comment_line(c, indent);
4936                }
4937            }
4938            // Leading decorators for this child node (after comments)
4939            self.emit_leading_decorators_for(&child_path, indent);
4940            // The "key:" line. For block-form children (non-empty
4941            // table/list), we use `key:\n` then a deeper-indented block;
4942            // for inline values, `key: value`.
4943            //
4944            // Exception: if the container has a trailing comment AND its
4945            // contents are all flow-safe (scalars + nested flow-safe
4946            // containers, no heredocs, no comments inside), we emit it
4947            // as flow form so the trailing comment can sit on the same
4948            // line. Block-form would force us to emit `key:\n  # ...`,
4949            // which the parser rejects (`:<space>#` is not valid
4950            // inline-value syntax).
4951            let has_trailing = self
4952                .comments_by_path
4953                .get(&child_path)
4954                .map(|nc| !nc.trailing.is_empty())
4955                .unwrap_or(false)
4956                || self
4957                    .decorators_by_path
4958                    .get(&child_path)
4959                    .map(|nd| !nd.trailing.is_empty())
4960                    .unwrap_or(false);
4961            let has_inner = self.has_inner(&child_path)
4962                || self
4963                    .decorators_by_path
4964                    .get(&child_path)
4965                    .map(|nd| !nd.inner.is_empty())
4966                    .unwrap_or(false);
4967            let can_block = matches!(v,
4968                Value::Table(tt) if !tt.is_empty()
4969            ) || matches!(v,
4970                Value::UnorderedTable(tt) if !tt.is_empty()
4971            ) || matches!(v,
4972                Value::List(items) if !items.is_empty()
4973            );
4974            let needs_block = can_block
4975                && !(has_trailing && self.is_flow_safe(v, &child_path));
4976            self.push_indent(indent);
4977            self.out.push_str(&format_key(k));
4978            self.out.push(':');
4979            if self.suppressed_paths.contains(&child_path) {
4980                // Suppression takes priority over block-form: body emission is
4981                // omitted entirely. Used for inner decoration-only form (body
4982                // equals empty_default) AND for flow re-fold (body folded into
4983                // decorator params). Emit `key: |dec` with no value token.
4984                self.out.push(' ');
4985                self.emit_inner_decorators_for(&child_path);
4986                // trim trailing space left by emit_inner_decorators_for
4987                if self.out.ends_with(' ') {
4988                    self.out.pop();
4989                }
4990                self.out.push('\n');
4991            } else if needs_block {
4992                // `key: /* inner */ <newline>` — inner sits between
4993                // the colon and the EOL that opens the child block.
4994                if has_inner {
4995                    self.out.push(' ');
4996                    self.emit_inner_for(&child_path);
4997                    self.emit_inner_decorators_for(&child_path);
4998                    // trim trailing space
4999                    if self.out.ends_with(' ') {
5000                        self.out.pop();
5001                    }
5002                }
5003                self.out.push('\n');
5004                match v {
5005                    Value::Table(tt) => self.emit_table_block(tt, &child_path, indent + 1),
5006                    Value::UnorderedTable(tt) => {
5007                        self.emit_unordered_table_block(tt, &child_path, indent + 1)
5008                    }
5009                    Value::List(items) => self.emit_list_block(items, &child_path, indent + 1),
5010                    _ => unreachable!(),
5011                }
5012            } else {
5013                self.out.push(' ');
5014                self.emit_inner_for(&child_path);
5015                self.emit_inner_decorators_for(&child_path);
5016                self.emit_value_inline(v, &child_path);
5017                self.emit_trailing_for(&child_path);
5018                self.emit_trailing_decorators_for(&child_path);
5019                self.out.push('\n');
5020            }
5021        }
5022        // Floating comments + decorators attached to this container
5023        self.emit_floating(path, indent);
5024    }
5025
5026    /// Lite-mode-only counterpart of `emit_table_block` for unordered
5027    /// tables (HashMap-backed). Iteration order is arbitrary. Full-mode
5028    /// emit is gated upstream by `encode()`, which returns an error
5029    /// before this is reached. SPEC §"Unordered tables".
5030    fn emit_unordered_table_block(
5031        &mut self,
5032        t: &DmsHashMap<Value>,
5033        path: &[BreadcrumbSegment],
5034        indent: usize,
5035    ) {
5036        for (k, v) in t {
5037            let mut child_path: Vec<BreadcrumbSegment> = path.to_vec();
5038            child_path.push(BreadcrumbSegment::Key(k.clone()));
5039            // (Lite mode has no comment_by_path entries; the lookup
5040            // calls below are no-ops, but kept for symmetry with the
5041            // ordered variant in case a caller constructs an
5042            // UnorderedTable Document with comments by hand.)
5043            if let Some(nc) = self.comments_by_path.get(&child_path) {
5044                let leading: Vec<&Comment> = nc.leading.iter().copied().collect();
5045                for c in leading {
5046                    self.emit_comment_line(c, indent);
5047                }
5048            }
5049            let has_trailing = self
5050                .comments_by_path
5051                .get(&child_path)
5052                .map(|nc| !nc.trailing.is_empty())
5053                .unwrap_or(false);
5054            let has_inner = self.has_inner(&child_path);
5055            let can_block = matches!(v,
5056                Value::Table(tt) if !tt.is_empty()
5057            ) || matches!(v,
5058                Value::UnorderedTable(tt) if !tt.is_empty()
5059            ) || matches!(v,
5060                Value::List(items) if !items.is_empty()
5061            );
5062            let needs_block = can_block
5063                && !(has_trailing && self.is_flow_safe(v, &child_path));
5064            self.push_indent(indent);
5065            self.out.push_str(&format_key(k));
5066            self.out.push(':');
5067            if needs_block {
5068                if has_inner {
5069                    self.out.push(' ');
5070                    self.emit_inner_for(&child_path);
5071                    if self.out.ends_with(' ') {
5072                        self.out.pop();
5073                    }
5074                }
5075                self.out.push('\n');
5076                match v {
5077                    Value::Table(tt) => self.emit_table_block(tt, &child_path, indent + 1),
5078                    Value::UnorderedTable(tt) => {
5079                        self.emit_unordered_table_block(tt, &child_path, indent + 1)
5080                    }
5081                    Value::List(items) => self.emit_list_block(items, &child_path, indent + 1),
5082                    _ => unreachable!(),
5083                }
5084            } else {
5085                self.out.push(' ');
5086                self.emit_inner_for(&child_path);
5087                self.emit_value_inline(v, &child_path);
5088                self.emit_trailing_for(&child_path);
5089                self.out.push('\n');
5090            }
5091        }
5092        self.emit_floating(path, indent);
5093    }
5094
5095    fn emit_list_block(
5096        &mut self,
5097        items: &[Value],
5098        path: &[BreadcrumbSegment],
5099        indent: usize,
5100    ) {
5101        for (i, v) in items.iter().enumerate() {
5102            let mut child_path: Vec<BreadcrumbSegment> = path.to_vec();
5103            child_path.push(BreadcrumbSegment::Index(i));
5104            if let Some(nc) = self.comments_by_path.get(&child_path) {
5105                let leading: Vec<&Comment> = nc.leading.iter().copied().collect();
5106                for c in leading {
5107                    self.emit_comment_line(c, indent);
5108                }
5109            }
5110            // Leading decorators for this list item
5111            self.emit_leading_decorators_for(&child_path, indent);
5112            self.push_indent(indent);
5113            self.out.push('+');
5114            let has_inner = self.has_inner(&child_path)
5115                || self
5116                    .decorators_by_path
5117                    .get(&child_path)
5118                    .map(|nd| !nd.inner.is_empty())
5119                    .unwrap_or(false);
5120            if self.suppressed_paths.contains(&child_path) {
5121                // Suppression takes priority: body emission is omitted entirely.
5122                // Used for inner decoration-only form AND flow re-fold. Emit
5123                // `+ |dec` with no value token.
5124                self.out.push(' ');
5125                self.emit_inner_decorators_for(&child_path);
5126                if self.out.ends_with(' ') {
5127                    self.out.pop();
5128                }
5129                self.out.push('\n');
5130            } else {
5131            match v {
5132                Value::Table(tt) if !tt.is_empty() => {
5133                    // `+ /* inner */[EOL]<indent><first key>:`
5134                    if has_inner {
5135                        self.out.push(' ');
5136                        self.emit_inner_for(&child_path);
5137                        self.emit_inner_decorators_for(&child_path);
5138                        if self.out.ends_with(' ') {
5139                            self.out.pop();
5140                        }
5141                    }
5142                    self.emit_trailing_for(&child_path);
5143                    self.emit_trailing_decorators_for(&child_path);
5144                    self.out.push('\n');
5145                    self.emit_table_block(tt, &child_path, indent + 1);
5146                }
5147                Value::UnorderedTable(tt) if !tt.is_empty() => {
5148                    if has_inner {
5149                        self.out.push(' ');
5150                        self.emit_inner_for(&child_path);
5151                        self.emit_inner_decorators_for(&child_path);
5152                        if self.out.ends_with(' ') {
5153                            self.out.pop();
5154                        }
5155                    }
5156                    self.emit_trailing_for(&child_path);
5157                    self.emit_trailing_decorators_for(&child_path);
5158                    self.out.push('\n');
5159                    self.emit_unordered_table_block(tt, &child_path, indent + 1);
5160                }
5161                Value::List(inner) if !inner.is_empty() => {
5162                    if has_inner {
5163                        self.out.push(' ');
5164                        self.emit_inner_for(&child_path);
5165                        self.emit_inner_decorators_for(&child_path);
5166                        if self.out.ends_with(' ') {
5167                            self.out.pop();
5168                        }
5169                    }
5170                    self.emit_trailing_for(&child_path);
5171                    self.emit_trailing_decorators_for(&child_path);
5172                    self.out.push('\n');
5173                    self.emit_list_block(inner, &child_path, indent + 1);
5174                }
5175                _ => {
5176                    self.out.push(' ');
5177                    self.emit_inner_for(&child_path);
5178                    self.emit_inner_decorators_for(&child_path);
5179                    self.emit_value_inline(v, &child_path);
5180                    self.emit_trailing_for(&child_path);
5181                    self.emit_trailing_decorators_for(&child_path);
5182                    self.out.push('\n');
5183                }
5184            }
5185            } // end !suppressed
5186        }
5187        self.emit_floating(path, indent);
5188    }
5189
5190    fn emit_value_inline(&mut self, v: &Value, path: &[BreadcrumbSegment]) {
5191        match v {
5192            Value::Bool(b) => self.out.push_str(if *b { "true" } else { "false" }),
5193            Value::Integer(n) => self.emit_integer(*n, path),
5194            Value::Float(f) => self.emit_float(*f),
5195            Value::String(s) => self.emit_string(s, path),
5196            Value::OffsetDateTime(s)
5197            | Value::LocalDateTime(s)
5198            | Value::LocalDate(s)
5199            | Value::LocalTime(s) => self.out.push_str(s),
5200            Value::List(items) => {
5201                if items.is_empty() {
5202                    self.out.push_str("[]");
5203                } else {
5204                    // Non-empty list emitted as flow form would clash with
5205                    // our block-form rule, but inline contexts only arrive
5206                    // here for empty lists (block-form path takes
5207                    // non-empty). Defensive: emit flow-form.
5208                    self.out.push('[');
5209                    for (i, item) in items.iter().enumerate() {
5210                        if i > 0 {
5211                            self.out.push_str(", ");
5212                        }
5213                        let mut sub_path: Vec<BreadcrumbSegment> = path.to_vec();
5214                        sub_path.push(BreadcrumbSegment::Index(i));
5215                        self.emit_value_inline(item, &sub_path);
5216                    }
5217                    self.out.push(']');
5218                }
5219            }
5220            Value::Table(t) => {
5221                if t.is_empty() {
5222                    self.out.push_str("{}");
5223                } else {
5224                    self.out.push('{');
5225                    let mut first = true;
5226                    for (k, vv) in t {
5227                        if !first {
5228                            self.out.push_str(", ");
5229                        }
5230                        first = false;
5231                        self.out.push_str(&format_key(k));
5232                        self.out.push_str(": ");
5233                        let mut sub_path: Vec<BreadcrumbSegment> = path.to_vec();
5234                        sub_path.push(BreadcrumbSegment::Key(k.clone()));
5235                        self.emit_value_inline(vv, &sub_path);
5236                    }
5237                    self.out.push('}');
5238                }
5239            }
5240            Value::UnorderedTable(t) => {
5241                if t.is_empty() {
5242                    self.out.push_str("{}");
5243                } else {
5244                    self.out.push('{');
5245                    let mut first = true;
5246                    for (k, vv) in t {
5247                        if !first {
5248                            self.out.push_str(", ");
5249                        }
5250                        first = false;
5251                        self.out.push_str(&format_key(k));
5252                        self.out.push_str(": ");
5253                        let mut sub_path: Vec<BreadcrumbSegment> = path.to_vec();
5254                        sub_path.push(BreadcrumbSegment::Key(k.clone()));
5255                        self.emit_value_inline(vv, &sub_path);
5256                    }
5257                    self.out.push('}');
5258                }
5259            }
5260        }
5261    }
5262
5263    fn emit_integer(&mut self, n: i64, path: &[BreadcrumbSegment]) {
5264        if let Some(lit_ref) = self.forms_by_path.get(path).copied() {
5265            if let OriginalLiteral::Integer { lit } = lit_ref {
5266                self.out.push_str(lit.as_str());
5267                return;
5268            }
5269        }
5270        self.out.push_str(&n.to_string());
5271    }
5272
5273    fn emit_float(&mut self, f: f64) {
5274        if f.is_nan() {
5275            self.out.push_str("nan");
5276        } else if f.is_infinite() {
5277            self.out.push_str(if f > 0.0 { "inf" } else { "-inf" });
5278        } else {
5279            let mut buf = ryu::Buffer::new();
5280            self.out.push_str(buf.format(f));
5281        }
5282    }
5283
5284    fn emit_string(&mut self, s: &str, path: &[BreadcrumbSegment]) {
5285        let form_opt = self
5286            .forms_by_path
5287            .get(path)
5288            .and_then(|lit| match lit {
5289                OriginalLiteral::String { form } => Some(form.clone()),
5290                _ => None,
5291            });
5292        let form = form_opt.unwrap_or(StringForm::Basic);
5293        match form {
5294            StringForm::Basic => {
5295                self.out.push('"');
5296                self.out.push_str(&escape_basic(s));
5297                self.out.push('"');
5298            }
5299            StringForm::Literal => {
5300                self.out.push('\'');
5301                self.out.push_str(s);
5302                self.out.push('\'');
5303            }
5304            StringForm::Heredoc { flavor, label, modifiers } => {
5305                // The stored body is post-modifier. For byte-stable
5306                // round-trips, the emitted source must re-produce the
5307                // same post-modifier value under re-application. For
5308                // idempotent modifiers this is mostly automatic; for
5309                // `_fold_paragraphs()` (which joins lines within a
5310                // paragraph with spaces), we pre-expand each `\n` in
5311                // the stored value into a `\n\n` paragraph break so
5312                // that the re-applied modifier preserves (not merges)
5313                // line boundaries.
5314                let body_to_emit = if modifiers.iter().any(|m| m.name == "_fold_paragraphs") {
5315                    s.replace('\n', "\n\n")
5316                } else {
5317                    s.to_string()
5318                };
5319                self.emit_heredoc(&body_to_emit, flavor, label.as_deref(), &modifiers);
5320            }
5321        }
5322    }
5323
5324    fn emit_heredoc(
5325        &mut self,
5326        body: &str,
5327        flavor: HeredocFlavor,
5328        label: Option<&str>,
5329        modifiers: &[HeredocModifierCall],
5330    ) {
5331        // The "current line" the kvpair was just written on already has
5332        // text up to and including ": " (or "+ "). The caller's
5333        // `emit_value_inline` placed us right after the space. We compute
5334        // the kvpair's indent from the most recent newline in `out`.
5335        let kv_indent_spaces = {
5336            let bytes = self.out.as_bytes();
5337            let mut last_nl = None;
5338            for i in (0..bytes.len()).rev() {
5339                if bytes[i] == b'\n' {
5340                    last_nl = Some(i);
5341                    break;
5342                }
5343            }
5344            let line_start = last_nl.map(|i| i + 1).unwrap_or(0);
5345            // count leading spaces on this line
5346            let mut k = line_start;
5347            let mut n = 0usize;
5348            while k < bytes.len() && bytes[k] == b' ' {
5349                n += 1;
5350                k += 1;
5351            }
5352            n
5353        };
5354        let body_indent_str = " ".repeat(kv_indent_spaces + INDENT_STR.len());
5355        let term_indent_str = " ".repeat(kv_indent_spaces + INDENT_STR.len());
5356        let opener = match flavor {
5357            HeredocFlavor::BasicTriple => "\"\"\"",
5358            HeredocFlavor::LiteralTriple => "'''",
5359        };
5360        self.out.push_str(opener);
5361        if let Some(lbl) = label {
5362            self.out.push_str(lbl);
5363        }
5364        for m in modifiers {
5365            self.out.push(' ');
5366            self.out.push_str(&m.name);
5367            self.out.push('(');
5368            for (i, a) in m.args.iter().enumerate() {
5369                if i > 0 {
5370                    self.out.push_str(", ");
5371                }
5372                self.emit_modifier_arg(a);
5373            }
5374            self.out.push(')');
5375        }
5376        self.out.push('\n');
5377        // Body lines, indented to body_indent_str. The body is the
5378        // post-strip content as stored in Value::String — to round-trip
5379        // through the parser, we re-indent each line to body_indent and
5380        // emit a final body line that is empty (so the terminator
5381        // appears alone on its line). The parser's strip-indent logic
5382        // then yields the same content.
5383        if body.is_empty() {
5384            // Emit just the terminator on its own line.
5385        } else {
5386            for line in body.split('\n') {
5387                if line.is_empty() {
5388                    // Blank source line — output just newline (no
5389                    // indentation prefix, matches strip-indent rule).
5390                    self.out.push('\n');
5391                } else {
5392                    self.out.push_str(&body_indent_str);
5393                    self.out.push_str(line);
5394                    self.out.push('\n');
5395                }
5396            }
5397        }
5398        // Terminator
5399        self.out.push_str(&term_indent_str);
5400        let terminator = match (flavor, label) {
5401            (HeredocFlavor::BasicTriple, None) => "\"\"\"".to_string(),
5402            (HeredocFlavor::LiteralTriple, None) => "'''".to_string(),
5403            (_, Some(lbl)) => lbl.to_string(),
5404        };
5405        self.out.push_str(&terminator);
5406    }
5407
5408    fn emit_modifier_arg(&mut self, v: &Value) {
5409        match v {
5410            Value::Bool(b) => self.out.push_str(if *b { "true" } else { "false" }),
5411            Value::Integer(n) => self.out.push_str(&n.to_string()),
5412            Value::Float(f) => {
5413                if f.is_nan() {
5414                    self.out.push_str("nan");
5415                } else if f.is_infinite() {
5416                    self.out.push_str(if *f > 0.0 { "inf" } else { "-inf" });
5417                } else {
5418                    let mut buf = ryu::Buffer::new();
5419                    self.out.push_str(buf.format(*f));
5420                }
5421            }
5422            Value::String(s) => {
5423                self.out.push('"');
5424                self.out.push_str(&escape_basic(s));
5425                self.out.push('"');
5426            }
5427            Value::OffsetDateTime(s)
5428            | Value::LocalDateTime(s)
5429            | Value::LocalDate(s)
5430            | Value::LocalTime(s) => self.out.push_str(s),
5431            Value::List(_) | Value::Table(_) | Value::UnorderedTable(_) => {
5432                // Modifier args are scalars in practice; defensive
5433                // fallback emits empty container literal.
5434                if matches!(v, Value::List(_)) {
5435                    self.out.push_str("[]");
5436                } else {
5437                    self.out.push_str("{}");
5438                }
5439            }
5440        }
5441    }
5442
5443    fn emit_comment_line(&mut self, c: &Comment, indent: usize) {
5444        // The comment's `content` field preserves its original internal
5445        // whitespace verbatim (block-comment body lines retain their
5446        // own indentation, line comments are single-line text). Only
5447        // the FIRST line gets re-indented to the current emit context;
5448        // subsequent body lines keep their stored indentation, so a
5449        // round-trip is byte-stable. The terminator on a `###LABEL`
5450        // block sits on its own line, also stored verbatim.
5451        let text = &c.content;
5452        let prefix = INDENT_STR.repeat(indent);
5453        if !text.contains('\n') {
5454            self.out.push_str(&prefix);
5455            self.out.push_str(text);
5456            self.out.push('\n');
5457            return;
5458        }
5459        let mut first = true;
5460        for line in text.split('\n') {
5461            if !first {
5462                self.out.push('\n');
5463            }
5464            if first {
5465                self.out.push_str(&prefix);
5466                first = false;
5467            }
5468            self.out.push_str(line);
5469        }
5470        self.out.push('\n');
5471    }
5472
5473    fn emit_trailing_for(&mut self, path: &[BreadcrumbSegment]) {
5474        if let Some(nc) = self.comments_by_path.get(path) {
5475            let mut first = true;
5476            for t in &nc.trailing {
5477                if first {
5478                    self.out.push_str("  ");
5479                    first = false;
5480                } else {
5481                    self.out.push(' ');
5482                }
5483                self.out.push_str(&c_oneliner(t));
5484            }
5485        }
5486    }
5487
5488    fn emit_inner_for(&mut self, path: &[BreadcrumbSegment]) {
5489        if let Some(nc) = self.comments_by_path.get(path) {
5490            for c in &nc.inner {
5491                self.out.push_str(&c_oneliner(c));
5492                self.out.push(' ');
5493            }
5494        }
5495    }
5496
5497    fn has_inner(&self, path: &[BreadcrumbSegment]) -> bool {
5498        self.comments_by_path
5499            .get(path)
5500            .map(|nc| !nc.inner.is_empty())
5501            .unwrap_or(false)
5502    }
5503
5504    fn emit_floating(&mut self, path: &[BreadcrumbSegment], indent: usize) {
5505        if let Some(nc) = self.comments_by_path.get(path) {
5506            let floating: Vec<&Comment> = nc.floating.iter().copied().collect();
5507            for c in floating {
5508                self.emit_comment_line(c, indent);
5509            }
5510        }
5511        // Floating decorators at same path, after comments.
5512        if let Some(nd) = self.decorators_by_path.get(path) {
5513            let floating: Vec<&tier1::DecoratorCall> = nd.floating.iter().copied().collect();
5514            for call in floating {
5515                self.push_indent(indent);
5516                self.emit_decorator_call(call);
5517                self.out.push('\n');
5518            }
5519        }
5520    }
5521
5522    /// Emit leading decorators for a path (own lines, before the value line).
5523    /// Call this after comment leading emission at the same path.
5524    fn emit_leading_decorators_for(&mut self, path: &[BreadcrumbSegment], indent: usize) {
5525        if let Some(nd) = self.decorators_by_path.get(path) {
5526            let leading: Vec<&tier1::DecoratorCall> = nd.leading.iter().copied().collect();
5527            for call in leading {
5528                self.push_indent(indent);
5529                self.emit_decorator_call(call);
5530                self.out.push('\n');
5531            }
5532        }
5533    }
5534
5535    /// Emit inner decorators for a path (between key: and value, same line).
5536    /// Call this after comment inner emission at the same path.
5537    fn emit_inner_decorators_for(&mut self, path: &[BreadcrumbSegment]) {
5538        if let Some(nd) = self.decorators_by_path.get(path) {
5539            let inner: Vec<&tier1::DecoratorCall> = nd.inner.iter().copied().collect();
5540            for call in inner {
5541                self.emit_decorator_call(call);
5542                self.out.push(' ');
5543            }
5544        }
5545    }
5546
5547    /// Emit trailing decorators for a path (after value, same line, before EOL).
5548    /// Call this after comment trailing emission at the same path.
5549    fn emit_trailing_decorators_for(&mut self, path: &[BreadcrumbSegment]) {
5550        if let Some(nd) = self.decorators_by_path.get(path) {
5551            let trailing: Vec<&tier1::DecoratorCall> = nd.trailing.iter().copied().collect();
5552            for call in trailing {
5553                self.out.push(' ');
5554                self.emit_decorator_call(call);
5555            }
5556        }
5557    }
5558
5559    /// Emit one decorator call: `<sigil><name>[(group)...]`.
5560    /// Bare form when params is a single empty Named group.
5561    /// Respects `call_style`: Nameless calls emit `<sigil>` with no name.
5562    fn emit_decorator_call(&mut self, call: &tier1::DecoratorCall) {
5563        self.out.push_str(&call.sigil);
5564        if call.call_style == tier1::CallStyle::Named {
5565            // Build name: ns.fn or just fn
5566            let name = match &call.ns {
5567                Some(ns) => format!("{}.{}", ns, call.fn_name),
5568                None => call.fn_name.clone(),
5569            };
5570            self.out.push_str(&name);
5571        }
5572        // Nameless form: sigil only — no name emitted.
5573
5574        // Bare form: single Named(empty) → no parens at all.
5575        if call.params.len() == 1 {
5576            if let tier1::ParamGroup::Named(m) = &call.params[0] {
5577                if m.is_empty() {
5578                    // bare form — emit nothing
5579                    return;
5580                }
5581            }
5582        }
5583
5584        // Otherwise emit each group as `(...)`.
5585        for group in &call.params {
5586            self.out.push('(');
5587            match group {
5588                tier1::ParamGroup::Named(m) => {
5589                    let mut first = true;
5590                    for (k, v) in m {
5591                        if !first {
5592                            self.out.push_str(", ");
5593                        }
5594                        first = false;
5595                        self.out.push_str(&format_key(k));
5596                        self.out.push_str(": ");
5597                        let s = fmt_value_inline(v);
5598                        self.out.push_str(&s);
5599                    }
5600                }
5601                tier1::ParamGroup::Positional(items) => {
5602                    let mut first = true;
5603                    for v in items {
5604                        if !first {
5605                            self.out.push_str(", ");
5606                        }
5607                        first = false;
5608                        let s = fmt_value_inline(v);
5609                        self.out.push_str(&s);
5610                    }
5611                }
5612            }
5613            self.out.push(')');
5614        }
5615    }
5616
5617    fn push_indent(&mut self, indent: usize) {
5618        for _ in 0..indent {
5619            self.out.push_str(INDENT_STR);
5620        }
5621    }
5622
5623    /// True if `v` (rooted at `path`) can be safely emitted as a flow
5624    /// form: no heredoc strings (heredocs require own line), no
5625    /// comments attached to any descendant. Used to decide flow-vs-block
5626    /// when a trailing comment forces flow form.
5627    fn is_flow_safe(&self, v: &Value, path: &[BreadcrumbSegment]) -> bool {
5628        // Any descendant comment ⇒ unsafe, since flow form has no
5629        // place to put it. Skipped in lite mode (no comments emitted
5630        // anyway). We check by scanning all comment paths.
5631        if !self.lite {
5632            for ac in &self.doc.comments {
5633                if ac.path.len() > path.len() && ac.path.starts_with(path) {
5634                    // descendant has a comment
5635                    return false;
5636                }
5637            }
5638        }
5639        match v {
5640            Value::String(_) => {
5641                // If the string's original form is a heredoc, it's not
5642                // flow-safe (heredocs require their own line).
5643                if let Some(OriginalLiteral::String { form: StringForm::Heredoc { .. } }) =
5644                    self.forms_by_path.get(path).copied()
5645                {
5646                    return false;
5647                }
5648                true
5649            }
5650            Value::List(items) => {
5651                for (i, item) in items.iter().enumerate() {
5652                    let mut sub: Vec<BreadcrumbSegment> = path.to_vec();
5653                    sub.push(BreadcrumbSegment::Index(i));
5654                    if !self.is_flow_safe(item, &sub) {
5655                        return false;
5656                    }
5657                }
5658                true
5659            }
5660            Value::Table(t) => {
5661                for (k, vv) in t {
5662                    let mut sub: Vec<BreadcrumbSegment> = path.to_vec();
5663                    sub.push(BreadcrumbSegment::Key(k.clone()));
5664                    if !self.is_flow_safe(vv, &sub) {
5665                        return false;
5666                    }
5667                }
5668                true
5669            }
5670            Value::UnorderedTable(t) => {
5671                for (k, vv) in t {
5672                    let mut sub: Vec<BreadcrumbSegment> = path.to_vec();
5673                    sub.push(BreadcrumbSegment::Key(k.clone()));
5674                    if !self.is_flow_safe(vv, &sub) {
5675                        return false;
5676                    }
5677                }
5678                true
5679            }
5680            _ => true,
5681        }
5682    }
5683}
5684
5685/// Emit a comment whose content fits on one line, returning the text to
5686/// place after a value (no leading whitespace included). For block
5687/// comments that span multiple lines, the result still contains
5688/// newlines — only used in the `emit_trailing_for` path where the
5689/// parser invariant guarantees a single-line trailing comment.
5690fn c_oneliner(c: &Comment) -> String {
5691    c.content.clone()
5692}
5693
5694/// Serialize a `Value` to inline string form for use in decorator param groups.
5695/// Handles scalar values and flow containers. Heredoc strings are emitted as
5696/// basic-quoted strings (lossy but valid for round-trip).
5697fn fmt_value_inline(v: &Value) -> String {
5698    match v {
5699        Value::Bool(b) => (if *b { "true" } else { "false" }).to_string(),
5700        Value::Integer(n) => n.to_string(),
5701        Value::Float(f) => {
5702            if f.is_nan() {
5703                "nan".to_string()
5704            } else if f.is_infinite() {
5705                if *f > 0.0 { "inf".to_string() } else { "-inf".to_string() }
5706            } else {
5707                let mut buf = ryu::Buffer::new();
5708                buf.format(*f).to_string()
5709            }
5710        }
5711        Value::String(s) => {
5712            format!("\"{}\"", escape_basic(s))
5713        }
5714        Value::OffsetDateTime(s)
5715        | Value::LocalDateTime(s)
5716        | Value::LocalDate(s)
5717        | Value::LocalTime(s) => s.clone(),
5718        Value::List(items) => {
5719            let mut out = String::from("[");
5720            for (i, item) in items.iter().enumerate() {
5721                if i > 0 { out.push_str(", "); }
5722                out.push_str(&fmt_value_inline(item));
5723            }
5724            out.push(']');
5725            out
5726        }
5727        Value::Table(t) => {
5728            let mut out = String::from("{");
5729            let mut first = true;
5730            for (k, vv) in t {
5731                if !first { out.push_str(", "); }
5732                first = false;
5733                out.push_str(&format_key(k));
5734                out.push_str(": ");
5735                out.push_str(&fmt_value_inline(vv));
5736            }
5737            out.push('}');
5738            out
5739        }
5740        Value::UnorderedTable(t) => {
5741            let mut out = String::from("{");
5742            let mut first = true;
5743            for (k, vv) in t {
5744                if !first { out.push_str(", "); }
5745                first = false;
5746                out.push_str(&format_key(k));
5747                out.push_str(": ");
5748                out.push_str(&fmt_value_inline(vv));
5749            }
5750            out.push('}');
5751            out
5752        }
5753    }
5754}
5755
5756fn escape_basic(s: &str) -> String {
5757    let mut out = String::with_capacity(s.len());
5758    for c in s.chars() {
5759        match c {
5760            '\\' => out.push_str("\\\\"),
5761            '"' => out.push_str("\\\""),
5762            '\n' => out.push_str("\\n"),
5763            '\r' => out.push_str("\\r"),
5764            '\t' => out.push_str("\\t"),
5765            '\u{08}' => out.push_str("\\b"),
5766            '\u{0c}' => out.push_str("\\f"),
5767            c if (c as u32) < 0x20 => {
5768                out.push_str(&format!("\\u{:04X}", c as u32));
5769            }
5770            c => out.push(c),
5771        }
5772    }
5773    out
5774}
5775
5776fn format_key(k: &str) -> String {
5777    // If the key is a valid bare key (and non-empty), emit bare;
5778    // otherwise quote it. Bare key: `[A-Za-z0-9_-]+` plus unicode
5779    // letters/digits per `is_bare_key_char`.
5780    if !k.is_empty() && k.chars().all(is_bare_key_char) {
5781        k.to_string()
5782    } else {
5783        // Use literal quoting if the key contains no single quotes,
5784        // basic otherwise.
5785        if !k.contains('\'') && !k.contains('\n') && !k.contains('\r') {
5786            format!("'{}'", k)
5787        } else {
5788            format!("\"{}\"", escape_basic(k))
5789        }
5790    }
5791}
5792
5793// ---------- tests ----------
5794
5795#[cfg(test)]
5796mod tests {
5797    use super::*;
5798
5799    fn parse_table(src: &str) -> DmsMap<Value> {
5800        match decode(src).expect("decode failed") {
5801            Value::Table(t) => t,
5802            other => panic!("expected table, got {other:?}"),
5803        }
5804    }
5805
5806    #[test]
5807    fn empty_doc() {
5808        assert!(parse_table("").is_empty());
5809    }
5810
5811    #[test]
5812    fn single_int() {
5813        let t = parse_table("n: 42\n");
5814        assert_eq!(t["n"], Value::Integer(42));
5815    }
5816
5817    #[test]
5818    fn hex_int() {
5819        let t = parse_table("n: 0xFF\n");
5820        assert_eq!(t["n"], Value::Integer(255));
5821    }
5822
5823    #[test]
5824    fn float_dec() {
5825        let t = parse_table("n: 3.14\n");
5826        if let Value::Float(f) = t["n"] {
5827            assert!((f - 3.14).abs() < 1e-12);
5828        } else {
5829            panic!();
5830        }
5831    }
5832
5833    #[test]
5834    fn float_inf() {
5835        let t = parse_table("n: inf\n");
5836        assert_eq!(t["n"], Value::Float(f64::INFINITY));
5837    }
5838
5839    #[test]
5840    fn flow_array() {
5841        let t = parse_table("xs: [1, 2, 3]\n");
5842        if let Value::List(v) = &t["xs"] {
5843            assert_eq!(v.len(), 3);
5844        } else {
5845            panic!();
5846        }
5847    }
5848
5849    #[test]
5850    fn flow_table() {
5851        let t = parse_table("p: {x: 1, y: 2}\n");
5852        if let Value::Table(v) = &t["p"] {
5853            assert_eq!(v.len(), 2);
5854        } else {
5855            panic!();
5856        }
5857    }
5858
5859    #[test]
5860    fn nested_table() {
5861        let t = parse_table("a:\n  b: 1\n  c: 2\n");
5862        if let Value::Table(v) = &t["a"] {
5863            assert_eq!(v.len(), 2);
5864        } else {
5865            panic!();
5866        }
5867    }
5868
5869    #[test]
5870    fn nested_list() {
5871        let t = parse_table("xs:\n  + 1\n  + 2\n  + 3\n");
5872        if let Value::List(v) = &t["xs"] {
5873            assert_eq!(v.len(), 3);
5874        } else {
5875            panic!();
5876        }
5877    }
5878
5879    #[test]
5880    fn list_of_tables() {
5881        let src = "servers:\n  + name: \"web1\"\n    port: 80\n  + name: \"web2\"\n    port: 81\n";
5882        let t = parse_table(src);
5883        if let Value::List(v) = &t["servers"] {
5884            assert_eq!(v.len(), 2);
5885        } else {
5886            panic!();
5887        }
5888    }
5889
5890    #[test]
5891    fn heredoc_basic() {
5892        let src = "doc: \"\"\"EOF\n    hello\n    world\n    EOF\n";
5893        let t = parse_table(src);
5894        assert_eq!(t["doc"], Value::String("hello\nworld".into()));
5895    }
5896
5897    #[test]
5898    fn date_only() {
5899        let t = parse_table("d: 1979-05-27\n");
5900        assert_eq!(t["d"], Value::LocalDate("1979-05-27".into()));
5901    }
5902
5903    #[test]
5904    fn offset_dt() {
5905        let t = parse_table("d: 1979-05-27T07:32:00Z\n");
5906        assert_eq!(t["d"], Value::OffsetDateTime("1979-05-27T07:32:00Z".into()));
5907    }
5908
5909    #[test]
5910    fn rejects_tier1_in_front_matter() {
5911        let err = decode("+++\n_dms_tier: 1\n+++\nx: 1\n").unwrap_err();
5912        // TIER1.md §"Behavior at the boundary" mandates an
5913        // actionable forward-compat message that points the
5914        // caller at `decode_t1`.
5915        assert!(
5916            err.message.contains("decode_t1"),
5917            "expected tier-1 rejection to mention decode_t1, got: {}",
5918            err.message
5919        );
5920    }
5921
5922    #[test]
5923    fn accepts_explicit_tier_zero() {
5924        let t = match decode_document("+++\n_dms_tier: 0\n+++\nx: 1\n").unwrap().body {
5925            Value::Table(t) => t,
5926            other => panic!("expected table, got {other:?}"),
5927        };
5928        assert_eq!(t["x"], Value::Integer(1));
5929    }
5930
5931    // ----- comment-attachment tests (tier-0 feature) -----
5932
5933    fn key(s: &str) -> BreadcrumbSegment {
5934        BreadcrumbSegment::Key(s.to_string())
5935    }
5936
5937    #[test]
5938    fn comment_leading_attaches_to_next_kvpair() {
5939        // # leading\nport: 8080
5940        let doc = decode_document("# leading\nport: 8080\n").unwrap();
5941        assert_eq!(doc.comments.len(), 1);
5942        let ac = &doc.comments[0];
5943        assert_eq!(ac.position, CommentPosition::Leading);
5944        assert_eq!(ac.path, vec![key("port")]);
5945        assert_eq!(ac.comment.kind, CommentKind::Line);
5946        assert_eq!(ac.comment.content, "# leading");
5947    }
5948
5949    #[test]
5950    fn comment_blank_line_gap_is_floating_on_container() {
5951        // # floating\n\nport: 8080  → blank line separates → floating on root
5952        let doc = decode_document("# floating\n\nport: 8080\n").unwrap();
5953        assert_eq!(doc.comments.len(), 1);
5954        let ac = &doc.comments[0];
5955        assert_eq!(ac.position, CommentPosition::Floating);
5956        // root path
5957        assert!(ac.path.is_empty(), "expected empty path, got {:?}", ac.path);
5958        assert_eq!(ac.comment.content, "# floating");
5959    }
5960
5961    #[test]
5962    fn comment_trailing_on_same_line() {
5963        // port: 8080   # default
5964        let doc = decode_document("port: 8080   # default\n").unwrap();
5965        assert_eq!(doc.comments.len(), 1);
5966        let ac = &doc.comments[0];
5967        assert_eq!(ac.position, CommentPosition::Trailing);
5968        assert_eq!(ac.path, vec![key("port")]);
5969        assert_eq!(ac.comment.content, "# default");
5970    }
5971
5972    #[test]
5973    fn comment_floating_at_end_of_block() {
5974        // a:\n  x: 1\n  # leftover            (no sibling follows in `a`)
5975        let src = "a:\n  x: 1\n  # leftover\n";
5976        let doc = decode_document(src).unwrap();
5977        assert_eq!(doc.comments.len(), 1);
5978        let ac = &doc.comments[0];
5979        assert_eq!(ac.position, CommentPosition::Floating);
5980        assert_eq!(ac.path, vec![key("a")]);
5981        assert_eq!(ac.comment.content, "# leftover");
5982    }
5983
5984    #[test]
5985    fn block_comment_attaches_as_leading() {
5986        let src = "###\nNOTE\n###\nport: 8080\n";
5987        let doc = decode_document(src).unwrap();
5988        assert_eq!(doc.comments.len(), 1);
5989        let ac = &doc.comments[0];
5990        assert_eq!(ac.position, CommentPosition::Leading);
5991        assert_eq!(ac.path, vec![key("port")]);
5992        assert_eq!(ac.comment.kind, CommentKind::Block);
5993        assert!(
5994            ac.comment.content.starts_with("###") && ac.comment.content.ends_with("###"),
5995            "expected delimiters preserved, got: {:?}",
5996            ac.comment.content
5997        );
5998    }
5999
6000    #[test]
6001    fn front_matter_comment_recorded_with_fm_prefix() {
6002        // +++\n# meta-leading\nauthor: \"x\"\n+++\nbody: 1\n
6003        let src = "+++\n# meta-leading\nauthor: \"x\"\n+++\nbody: 1\n";
6004        let doc = decode_document(src).unwrap();
6005        assert_eq!(doc.comments.len(), 1);
6006        let ac = &doc.comments[0];
6007        assert_eq!(ac.position, CommentPosition::Leading);
6008        assert_eq!(ac.path, vec![key("__fm__"), key("author")]);
6009        assert_eq!(ac.comment.content, "# meta-leading");
6010    }
6011
6012    #[test]
6013    fn document_with_no_comments_has_empty_comments_vec() {
6014        let doc = decode_document("a: 1\nb: 2\n").unwrap();
6015        assert!(doc.comments.is_empty(), "expected no comments, got {:?}", doc.comments);
6016    }
6017
6018    // ----- encode emitter tests (round-trip contract) -----
6019
6020    fn roundtrip(src: &str) -> String {
6021        let doc = decode_document(src).expect("parse failed");
6022        encode(&doc).expect("encode failed")
6023    }
6024
6025    #[test]
6026    fn encode_int_base_preserved() {
6027        let src = "a: 0x1F40\nb: 0o755\nc: 0b1010_0110\nd: 1_000_000\ne: +42\nf: -7\n";
6028        let doc = decode_document(src).unwrap();
6029        let out = encode(&doc).unwrap();
6030        assert!(out.contains("a: 0x1F40"), "out:\n{out}");
6031        assert!(out.contains("b: 0o755"), "out:\n{out}");
6032        assert!(out.contains("c: 0b1010_0110"), "out:\n{out}");
6033        assert!(out.contains("d: 1_000_000"), "out:\n{out}");
6034        assert!(out.contains("e: +42"), "out:\n{out}");
6035        assert!(out.contains("f: -7"), "out:\n{out}");
6036        // round-trip preserves integer values
6037        let doc2 = decode_document(&out).unwrap();
6038        assert_eq!(doc.body, doc2.body);
6039    }
6040
6041    #[test]
6042    fn encode_string_forms_preserved() {
6043        let src = concat!(
6044            "basic: \"hello\"\n",
6045            "lit: 'C:\\path'\n",
6046            "hd_b_lab: \"\"\"END\n",
6047            "  hello\n",
6048            "  END\n",
6049            "hd_b_unl: \"\"\"\n",
6050            "  one\n",
6051            "  \"\"\"\n",
6052            "hd_l_lab: '''END\n",
6053            "  raw\n",
6054            "  END\n",
6055            "hd_l_unl: '''\n",
6056            "  raw2\n",
6057            "  '''\n",
6058        );
6059        let doc = decode_document(src).unwrap();
6060        let out = encode(&doc).unwrap();
6061        assert!(out.contains("basic: \"hello\""), "out:\n{out}");
6062        assert!(out.contains("lit: 'C:\\path'"), "out:\n{out}");
6063        assert!(out.contains("\"\"\"END"), "out:\n{out}");
6064        assert!(out.contains("'''END"), "out:\n{out}");
6065        // Round-trip values match.
6066        let doc2 = decode_document(&out).unwrap();
6067        assert_eq!(doc.body, doc2.body);
6068    }
6069
6070    #[test]
6071    fn encode_heredoc_modifiers_preserved() {
6072        let src = concat!(
6073            "msg: \"\"\"END _trim(\"\\n\", \">\")\n",
6074            "  >  hi\n",
6075            "  END\n",
6076        );
6077        let doc = decode_document(src).unwrap();
6078        let out = encode(&doc).unwrap();
6079        assert!(out.contains("_trim("), "expected _trim() in output:\n{out}");
6080        // Round-trip value match.
6081        let doc2 = decode_document(&out).unwrap();
6082        assert_eq!(doc.body, doc2.body);
6083    }
6084
6085    #[test]
6086    fn encode_comments_at_attached_paths() {
6087        let src = concat!(
6088            "# leading on a\n",
6089            "a: 1   # trailing on a\n",
6090            "b:\n",
6091            "  x: 2\n",
6092            "  # floating in b\n",
6093        );
6094        let doc = decode_document(src).unwrap();
6095        let out = encode(&doc).unwrap();
6096        // Re-parse and check comment paths survived.
6097        let doc2 = decode_document(&out).unwrap();
6098        assert_eq!(doc2.comments.len(), 3);
6099        let mut have_leading = false;
6100        let mut have_trailing = false;
6101        let mut have_floating = false;
6102        for ac in &doc2.comments {
6103            match ac.position {
6104                CommentPosition::Leading => {
6105                    if ac.path == vec![key("a")] {
6106                        have_leading = true;
6107                    }
6108                }
6109                CommentPosition::Trailing => {
6110                    if ac.path == vec![key("a")] {
6111                        have_trailing = true;
6112                    }
6113                }
6114                CommentPosition::Floating => {
6115                    if ac.path == vec![key("b")] {
6116                        have_floating = true;
6117                    }
6118                }
6119                CommentPosition::Inner => { /* not produced by this fixture */ }
6120            }
6121        }
6122        assert!(have_leading && have_trailing && have_floating, "out:\n{out}\ncomments: {:?}", doc2.comments);
6123    }
6124
6125    #[test]
6126    fn encode_front_matter_omitted_when_empty() {
6127        let src = "x: 1\n";
6128        let out = roundtrip(src);
6129        assert!(!out.contains("+++"), "expected no front matter in output:\n{out}");
6130    }
6131
6132    #[test]
6133    fn encode_second_round_byte_stable() {
6134        let cases = [
6135            "a: 1\nb: 2\n",
6136            "# leading\nport: 0x1F40   # trailing\n",
6137            "+++\nauthor: \"x\"\n+++\nbody: 1\n",
6138            concat!(
6139                "msg: \"\"\"END\n",
6140                "  hello world\n",
6141                "  END\n",
6142                "ints:\n",
6143                "  + 0xFF\n",
6144                "  + 1_000\n",
6145            ),
6146            "items: [1, 2, 3]\np: {x: 1, y: 2}\n",
6147        ];
6148        for src in cases {
6149            let out1 = encode(&decode_document(src).unwrap()).unwrap();
6150            let out2 = encode(&decode_document(&out1).unwrap()).unwrap();
6151            assert_eq!(out1, out2, "round-trip not stable for src:\n{src}\nout1:\n{out1}\nout2:\n{out2}");
6152        }
6153    }
6154
6155    // -- Modification scenarios (decode → edit → re-encode) --------------
6156    //
6157    // The SPEC §Comments §Round-trip semantics promises:
6158    //   "comments on still-present nodes travel with them; newly inserted
6159    //    nodes carry no comments; deleted nodes drop theirs."
6160    // These tests exercise that promise with explicit add / update / delete
6161    // mutations between parse and emit.
6162
6163    fn count_comments_at_path(doc: &Document, path: &[BreadcrumbSegment]) -> usize {
6164        doc.comments.iter().filter(|c| c.path == path).count()
6165    }
6166
6167    #[test]
6168    fn encode_value_update_preserves_attached_comments() {
6169        // Update a leaf value; leading + trailing comments on its kvpair
6170        // must travel with it.
6171        let src = "# the listening port\nport: 8080   # default for staging\nhost: \"localhost\"\n";
6172        let mut doc = decode_document(src).unwrap();
6173        if let Value::Table(ref mut t) = doc.body {
6174            // Replace 8080 with 5432.
6175            *t.get_mut("port").unwrap() = Value::Integer(5432);
6176        } else {
6177            panic!("expected table");
6178        }
6179        let emitted = encode(&doc).unwrap();
6180        let doc2 = decode_document(&emitted).unwrap();
6181        // Value updated.
6182        if let Value::Table(t) = &doc2.body {
6183            assert_eq!(t["port"], Value::Integer(5432));
6184        } else {
6185            panic!("expected table after re-parse");
6186        }
6187        // Comments still attached to `port`.
6188        let port = vec![BreadcrumbSegment::Key("port".to_string())];
6189        let leading = doc2.comments.iter().filter(|c| c.path == port && c.position == CommentPosition::Leading).count();
6190        let trailing = doc2.comments.iter().filter(|c| c.path == port && c.position == CommentPosition::Trailing).count();
6191        assert_eq!(leading, 1, "leading comment on `port` should survive update; got doc2.comments = {:?}", doc2.comments);
6192        assert_eq!(trailing, 1, "trailing comment on `port` should survive update; got doc2.comments = {:?}", doc2.comments);
6193    }
6194
6195    #[test]
6196    fn encode_deleted_key_drops_attached_comments() {
6197        // Delete a kvpair; its leading + trailing comments must NOT
6198        // survive (the node they attached to is gone).
6199        let src = "# keep this\nkeep: 1   # me too\n# drop this\ndrop: 2   # bye\n";
6200        let mut doc = decode_document(src).unwrap();
6201        if let Value::Table(ref mut t) = doc.body {
6202            t.shift_remove("drop");
6203        } else {
6204            panic!("expected table");
6205        }
6206        let emitted = encode(&doc).unwrap();
6207        let doc2 = decode_document(&emitted).unwrap();
6208        // `drop` is gone.
6209        if let Value::Table(t) = &doc2.body {
6210            assert!(!t.contains_key("drop"), "deleted key should not reappear");
6211            assert!(t.contains_key("keep"), "non-deleted key should remain");
6212        } else {
6213            panic!("expected table after re-parse");
6214        }
6215        // No comments at the deleted path.
6216        let drop_path = vec![BreadcrumbSegment::Key("drop".to_string())];
6217        assert_eq!(
6218            count_comments_at_path(&doc2, &drop_path),
6219            0,
6220            "deleted key's comments must not survive; got: {:?}",
6221            doc2.comments
6222        );
6223        // `keep`'s comments still there.
6224        let keep_path = vec![BreadcrumbSegment::Key("keep".to_string())];
6225        let keep_leading = doc2.comments.iter().filter(|c| c.path == keep_path && c.position == CommentPosition::Leading).count();
6226        let keep_trailing = doc2.comments.iter().filter(|c| c.path == keep_path && c.position == CommentPosition::Trailing).count();
6227        assert_eq!(keep_leading, 1, "leading comment on `keep` must survive sibling deletion");
6228        assert_eq!(keep_trailing, 1, "trailing comment on `keep` must survive sibling deletion");
6229    }
6230
6231    #[test]
6232    fn encode_inserted_key_carries_no_comments() {
6233        // Insert a new kvpair; it must come back with zero attached
6234        // comments.
6235        let src = "# leading on existing\nexisting: 1   # trailing on existing\n";
6236        let mut doc = decode_document(src).unwrap();
6237        if let Value::Table(ref mut t) = doc.body {
6238            t.insert("inserted".to_string(), Value::String("new".to_string()));
6239        } else {
6240            panic!("expected table");
6241        }
6242        let emitted = encode(&doc).unwrap();
6243        let doc2 = decode_document(&emitted).unwrap();
6244        // Both keys present.
6245        if let Value::Table(t) = &doc2.body {
6246            assert_eq!(t["existing"], Value::Integer(1));
6247            assert_eq!(t["inserted"], Value::String("new".to_string()));
6248        } else {
6249            panic!("expected table after re-parse");
6250        }
6251        // Inserted key has no comments.
6252        let inserted_path = vec![BreadcrumbSegment::Key("inserted".to_string())];
6253        assert_eq!(
6254            count_comments_at_path(&doc2, &inserted_path),
6255            0,
6256            "newly inserted key must not pick up comments; got: {:?}",
6257            doc2.comments
6258        );
6259        // Existing key's comments preserved.
6260        let existing_path = vec![BreadcrumbSegment::Key("existing".to_string())];
6261        assert_eq!(
6262            count_comments_at_path(&doc2, &existing_path),
6263            2,
6264            "existing key's leading + trailing comments must survive insertion of a sibling"
6265        );
6266    }
6267
6268    #[test]
6269    fn encode_combined_mutations() {
6270        // Stress: do all three at once — update one value, delete one
6271        // key, insert a new one. Each should behave per the SPEC
6272        // independently.
6273        let src = "# A\na: 1   # a-trail\n# B\nb: 2   # b-trail\n# C\nc: 3   # c-trail\n";
6274        let mut doc = decode_document(src).unwrap();
6275        if let Value::Table(ref mut t) = doc.body {
6276            *t.get_mut("a").unwrap() = Value::Integer(100);  // update
6277            t.shift_remove("b");                              // delete
6278            t.insert("d".to_string(), Value::Integer(4));     // insert
6279        } else {
6280            panic!("expected table");
6281        }
6282        let emitted = encode(&doc).unwrap();
6283        let doc2 = decode_document(&emitted).unwrap();
6284        if let Value::Table(t) = &doc2.body {
6285            assert_eq!(t["a"], Value::Integer(100));
6286            assert!(!t.contains_key("b"));
6287            assert_eq!(t["c"], Value::Integer(3));
6288            assert_eq!(t["d"], Value::Integer(4));
6289        } else {
6290            panic!("expected table after re-parse");
6291        }
6292        let p = |k: &str| vec![BreadcrumbSegment::Key(k.to_string())];
6293        // a: leading + trailing survived update
6294        assert_eq!(count_comments_at_path(&doc2, &p("a")), 2, "a should keep both comments");
6295        // b: deleted, no comments
6296        assert_eq!(count_comments_at_path(&doc2, &p("b")), 0, "b should be gone with its comments");
6297        // c: untouched, both comments present
6298        assert_eq!(count_comments_at_path(&doc2, &p("c")), 2, "c should keep both comments");
6299        // d: inserted, no comments
6300        assert_eq!(count_comments_at_path(&doc2, &p("d")), 0, "d should have no comments");
6301    }
6302
6303    // ----- encode_lite (canonical-form emit) tests -----
6304
6305    #[test]
6306    fn encode_lite_drops_comments() {
6307        // Full-mode parse captures comments; lite-mode emit drops them.
6308        let src = "# leading\na: 1   # trailing\n# floating\n";
6309        let doc = decode_document(src).unwrap();
6310        assert!(!doc.comments.is_empty(), "fixture should capture comments");
6311        let lite = encode_lite(&doc);
6312        assert!(!lite.contains('#'), "lite emit should have no comments, got:\n{lite}");
6313        // Data still round-trips.
6314        let doc2 = decode_document(&lite).unwrap();
6315        assert_eq!(doc.body, doc2.body);
6316        assert!(doc2.comments.is_empty(), "re-parsed lite emit should have no comments");
6317    }
6318
6319    #[test]
6320    fn encode_lite_canonicalises_integer_base() {
6321        // Full mode preserves source base; lite drops original_forms
6322        // and emits decimal.
6323        let src = "a: 0x1F40\nb: 0o755\nc: 0b1010_0110\nd: 1_000_000\n";
6324        let doc = decode_document(src).unwrap();
6325        let lite = encode_lite(&doc);
6326        // Decimal-only output: no 0x / 0o / 0b prefixes, no underscores.
6327        assert!(!lite.contains("0x"), "lite emit should not preserve hex base, got:\n{lite}");
6328        assert!(!lite.contains("0o"), "lite emit should not preserve oct base, got:\n{lite}");
6329        assert!(!lite.contains("0b"), "lite emit should not preserve bin base, got:\n{lite}");
6330        assert!(lite.contains("a: 8000"),       "lite emit should canonicalise 0x1F40 to 8000, got:\n{lite}");
6331        assert!(lite.contains("b: 493"),        "lite emit should canonicalise 0o755 to 493, got:\n{lite}");
6332        assert!(lite.contains("c: 166"),        "lite emit should canonicalise 0b10100110 to 166, got:\n{lite}");
6333        assert!(lite.contains("d: 1000000"),    "lite emit should canonicalise underscored 1_000_000 to 1000000, got:\n{lite}");
6334        // Data round-trips via lite emit.
6335        let doc2 = decode_document(&lite).unwrap();
6336        assert_eq!(doc.body, doc2.body);
6337    }
6338
6339    #[test]
6340    fn encode_lite_canonicalises_string_form() {
6341        // Full mode preserves '...'; lite emits "...".
6342        let src = "a: 'literal'\nb: \"basic\"\n";
6343        let doc = decode_document(src).unwrap();
6344        let lite = encode_lite(&doc);
6345        // No literal-string single quotes.
6346        assert!(!lite.contains("'literal'"),
6347                "lite emit should canonicalise '...' to \"...\", got:\n{lite}");
6348        assert!(lite.contains("a: \"literal\""), "got:\n{lite}");
6349        assert!(lite.contains("b: \"basic\""),   "got:\n{lite}");
6350    }
6351
6352    #[test]
6353    fn encode_lite_works_on_lite_parse() {
6354        // A Document parsed in lite mode has empty comments +
6355        // original_forms — encode_lite must still produce valid DMS.
6356        let src = "# leading\na: 0x1F40\nb: 'hello'\n";
6357        let doc = decode_lite_document(src).unwrap();
6358        assert!(doc.comments.is_empty());
6359        assert!(doc.original_forms.is_empty());
6360        let lite = encode_lite(&doc);
6361        let doc2 = decode_document(&lite).unwrap();
6362        assert_eq!(doc.body, doc2.body);
6363    }
6364
6365    #[test]
6366    fn encode_with_mode_dispatches_correctly() {
6367        let src = "# c\na: 1\n";
6368        let doc = decode_document(src).unwrap();
6369        assert_eq!(
6370            encode_with_mode(&doc, EmitMode::Full).unwrap(),
6371            encode(&doc).unwrap()
6372        );
6373        assert_eq!(
6374            encode_with_mode(&doc, EmitMode::Lite).unwrap(),
6375            encode_lite(&doc)
6376        );
6377    }
6378
6379    // ---------- unordered-table tests (SPEC §"Unordered tables") ----------
6380
6381    #[test]
6382    fn decode_document_unordered_produces_unordered_table() {
6383        // Body of a kvpair-only source must come back as
6384        // Value::UnorderedTable when parsed via the *_unordered entry
6385        // point.
6386        let doc = decode_document_unordered("a: 1\nb: 2\n").unwrap();
6387        match &doc.body {
6388            Value::UnorderedTable(t) => {
6389                assert_eq!(t.len(), 2);
6390                assert_eq!(t["a"], Value::Integer(1));
6391                assert_eq!(t["b"], Value::Integer(2));
6392            }
6393            other => panic!("expected Value::UnorderedTable, got {other:?}"),
6394        }
6395    }
6396
6397    #[test]
6398    fn decode_lite_document_unordered_produces_unordered_table() {
6399        // (unordered, lite) — the read-only fast path. No comments,
6400        // no original_forms, hash-only backing.
6401        let src = "# leading\na: 0xFF\nb: 'lit'\n";
6402        let doc = decode_lite_document_unordered(src).unwrap();
6403        assert!(doc.comments.is_empty(), "lite mode must drop comments");
6404        assert!(doc.original_forms.is_empty(), "lite mode must drop original_forms");
6405        match &doc.body {
6406            Value::UnorderedTable(t) => {
6407                assert_eq!(t.len(), 2);
6408                assert_eq!(t["a"], Value::Integer(255));
6409                assert_eq!(t["b"], Value::String("lit".to_string()));
6410            }
6411            other => panic!("expected Value::UnorderedTable, got {other:?}"),
6412        }
6413    }
6414
6415    #[test]
6416    fn unordered_data_round_trip() {
6417        // Parse unordered → emit lite → re-parse (ordered) → must be
6418        // data-equivalent (key set + values match; iteration order is
6419        // not promised).
6420        let src = "a: 1\nb: 2\nc: 3\nd: 'x'\n";
6421        let unordered = decode_document_unordered(src).unwrap();
6422        let emitted = encode_lite(&unordered);
6423        let reparsed = decode_document(&emitted).unwrap();
6424        // Compare structurally: collect both into ordered maps and
6425        // compare key/value sets.
6426        let unordered_keys: std::collections::HashSet<&String> = match &unordered.body {
6427            Value::UnorderedTable(t) => t.keys().collect(),
6428            other => panic!("expected UnorderedTable, got {other:?}"),
6429        };
6430        let reparsed_map: &DmsMap<Value> = match &reparsed.body {
6431            Value::Table(t) => t,
6432            other => panic!("expected Table after re-parse, got {other:?}"),
6433        };
6434        let reparsed_keys: std::collections::HashSet<&String> =
6435            reparsed_map.keys().collect();
6436        assert_eq!(unordered_keys, reparsed_keys, "key sets must match");
6437        if let Value::UnorderedTable(t) = &unordered.body {
6438            for (k, v) in t {
6439                assert_eq!(reparsed_map.get(k), Some(v), "value for {k} must match");
6440            }
6441        }
6442    }
6443
6444    #[test]
6445    fn encode_full_errors_on_unordered() {
6446        // Per SPEC §"Unordered tables": full-mode `encode` MUST refuse
6447        // an unordered Document (round-trip needs a stable order). As
6448        // of v0.3 this is a clean `Err(EncodeError::UnorderedInFullMode)`
6449        // rather than a panic.
6450        let doc = decode_document_unordered("a: 1\nb: 2\n").unwrap();
6451        assert_eq!(encode(&doc), Err(EncodeError::UnorderedInFullMode));
6452    }
6453
6454    #[test]
6455    fn encode_full_errors_on_unordered_nested() {
6456        // Same error must fire when the unordered table is nested
6457        // inside a List or another Table — the contains check is
6458        // recursive.
6459        let mut nested: DmsHashMap<Value> = DmsHashMap::default();
6460        nested.insert("x".to_string(), Value::Integer(1));
6461        let doc = Document {
6462            meta: None,
6463            body: Value::List(vec![Value::UnorderedTable(nested)]),
6464            comments: Vec::new(),
6465            original_forms: Vec::new(),
6466        };
6467        assert_eq!(encode(&doc), Err(EncodeError::UnorderedInFullMode));
6468    }
6469
6470    #[test]
6471    fn encode_lite_accepts_unordered() {
6472        // Lite emit accepts unordered Document and yields valid DMS.
6473        // No round-trip stability promised, but data equivalence is.
6474        let src = "a: 1\nb: 2\n";
6475        let doc = decode_document_unordered(src).unwrap();
6476        let lite = encode_lite(&doc);
6477        // Should not contain any internal markers; should re-parse.
6478        let doc2 = decode_document(&lite).unwrap();
6479        match &doc2.body {
6480            Value::Table(t) => {
6481                assert_eq!(t.len(), 2);
6482                assert_eq!(t["a"], Value::Integer(1));
6483                assert_eq!(t["b"], Value::Integer(2));
6484            }
6485            other => panic!("expected Table after re-parse, got {other:?}"),
6486        }
6487    }
6488
6489    #[test]
6490    fn decode_document_ordered_unaffected() {
6491        // Sanity: the default parse path still produces Value::Table,
6492        // and is otherwise unchanged by the addition of UnorderedTable.
6493        let doc = decode_document("a: 1\nb: 2\n").unwrap();
6494        match &doc.body {
6495            Value::Table(t) => {
6496                assert_eq!(t.len(), 2);
6497                let keys: Vec<&String> = t.keys().collect();
6498                // IndexMap preserves insertion order — that's the
6499                // whole point of the ordered path.
6500                assert_eq!(keys, vec![&"a".to_string(), &"b".to_string()]);
6501            }
6502            other => panic!("expected Value::Table, got {other:?}"),
6503        }
6504        // Round-trip via full encode still works.
6505        let emitted = encode(&doc).expect("encode failed");
6506        let doc2 = decode_document(&emitted).unwrap();
6507        assert_eq!(doc.body, doc2.body);
6508    }
6509
6510    #[test]
6511    fn unordered_nested_table_via_block() {
6512        // Nested block tables under unordered mode also become
6513        // UnorderedTable (the rule is: every body table is unordered).
6514        let src = "outer:\n  inner: 1\n  other: 2\n";
6515        let doc = decode_document_unordered(src).unwrap();
6516        match &doc.body {
6517            Value::UnorderedTable(t) => {
6518                match t.get("outer").unwrap() {
6519                    Value::UnorderedTable(inner) => {
6520                        assert_eq!(inner.len(), 2);
6521                        assert_eq!(inner["inner"], Value::Integer(1));
6522                        assert_eq!(inner["other"], Value::Integer(2));
6523                    }
6524                    other => panic!("expected UnorderedTable inner, got {other:?}"),
6525                }
6526            }
6527            other => panic!("expected UnorderedTable outer, got {other:?}"),
6528        }
6529    }
6530
6531    #[test]
6532    fn unordered_flow_table() {
6533        // Inline flow tables {a: 1, b: 2} also become UnorderedTable
6534        // under --ignore-order.
6535        let src = "x: { a: 1, b: 2 }\n";
6536        let doc = decode_document_unordered(src).unwrap();
6537        match &doc.body {
6538            Value::UnorderedTable(outer) => match outer.get("x").unwrap() {
6539                Value::UnorderedTable(t) => {
6540                    assert_eq!(t.len(), 2);
6541                    assert_eq!(t["a"], Value::Integer(1));
6542                    assert_eq!(t["b"], Value::Integer(2));
6543                }
6544                other => panic!("expected UnorderedTable for flow, got {other:?}"),
6545            },
6546            other => panic!("expected UnorderedTable, got {other:?}"),
6547        }
6548    }
6549}