rustledger_parser/cst/
format.rs

1//! Opinionated CST-backed formatter (phase 4.1 of #1262).
2//!
3//! [`format_source`] is a pure function `&str → String`: it
4//! reparses the input into a CST and emits text in one canonical
5//! form per AST shape. Two semantically-equivalent inputs produce
6//! byte-identical output; idempotence (`f(f(x)) == f(x)`) follows
7//! trivially.
8//!
9//! Replaces the pre-#1262 source-level formatter that took
10//! `(source, ParseResult, FormatConfig)` and re-emitted via the
11//! AST-driven `rustledger_core::format` path. Typed-directive
12//! synthesis (`rustledger_core::format::format_directives`) still
13//! lives in `rustledger-core` for callers that build a directive
14//! from scratch (e.g., `rledger add`, importer extract, FFI
15//! `format.entry`) — that's a different shape of input and is
16//! out of scope here.
17//!
18//! # Typed-directive emit: known coupling
19//!
20//! The typed-directive path is a two-pass shim: callers run
21//! `core::format::format_directives` to get bean-format-style text,
22//! then run that text back through [`format_source`] for the
23//! canonical pass. This keeps the FINAL byte sequence single-
24//! sourced (always emitted by this module), but it means
25//! `core::format` is permanently load-bearing as a parser-clean
26//! intermediate and every canonical-form rule needs the legacy
27//! emitter to produce SOMETHING the new parser accepts.
28//!
29//! Call sites (`rustledger-ffi-wasi::router::canonical_format_directives`,
30//! `rustledger::cmd::add_cmd::canonical_format_directive`,
31//! `rustledger::cmd::extract_cmd`) all guard the round-trip with
32//! an explicit `parse(&raw)` step that bails on parse errors, so a
33//! divergence between the two emitters surfaces as a hard error
34//! instead of silently dropping content.
35//!
36//! The eventual fix is a typed-directive emit path on this module
37//! (`format_directive(&Directive) -> String`) that bypasses the
38//! source-string round-trip. Tracked in a follow-up issue.
39//!
40//! # Canonical form (locked in the PR-decision comment on #1262)
41//!
42//! - Indent inside a directive body: 2 spaces. Tabs converted.
43//! - Blank lines between directives: preserved from the source
44//!   (#1325). Grouped directives (consecutive `open`s, a `price`
45//!   feed) stay grouped; the formatter does not insert or collapse
46//!   blank lines, matching Python `bean-format`.
47//! - Blank lines inside a directive: 0.
48//! - Number lexical form: thousands separators dropped; user
49//!   decimal-place count preserved.
50//! - Comment content: verbatim.
51//! - Comment positions: normalized to the attachment slot
52//!   (header-trailing / inter-directive / body-internal /
53//!   posting-trailing).
54//! - Cost spec spacing: `{cost CCY}` (no inner padding).
55//! - Tag/link order on a transaction header: source order, after
56//!   the strings.
57//! - Trailing newline at EOF: always exactly one.
58//! - Line endings: LF; CRLF inputs normalized.
59//! - Leading BOM: dropped.
60//!
61//! No `FormatConfig` parameter. One canonical form, no knobs.
62
63use crate::cst::ast::{self, AstNode, AstToken, MetaEntry, SourceFile};
64
65/// Pre-computed alignment data for a whole source file.
66///
67/// Bean-format-style two-axis alignment. The **number field** is a
68/// fixed-width slot starting at column `number_col` and `number_width`
69/// chars wide, into which each posting's number / arithmetic
70/// expression is right-justified. Shorter numbers are left-padded
71/// with spaces, so the currency column (right after the field) is
72/// uniform across the whole file even when individual numbers have
73/// different widths or signs.
74///
75/// - `number_col`   = INDENT + max(account width with optional `flag `) + 2
76/// - `number_width` = max rendered width of any posting's number /
77///   arithmetic expression (sign included)
78///
79/// `PostingAlignment` is `Copy` and `Default` (the all-zero state);
80/// the default is the alignment used for files that contain no
81/// postings (no transactions, or transactions with no AMOUNT).
82/// Marked `#[non_exhaustive]` so that a future column-derivation
83/// rule can add fields without breaking downstream consumers.
84///
85/// **Name choice.** The type is qualified by its semantic purpose
86/// (posting layout column widths) so the public path
87/// `rustledger_parser::format::PostingAlignment` doesn't compete
88/// with future generic "alignment" types (text justification,
89/// memory layout, etc.).
90#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
91#[non_exhaustive]
92pub struct PostingAlignment {
93    /// 0-indexed column at which the right-justified number field
94    /// starts.
95    pub number_col: usize,
96    /// Width of the number field; shorter numbers are left-padded
97    /// with spaces so the currency column stays uniform.
98    pub number_width: usize,
99}
100
101/// Two-space indent for directive bodies (postings, metadata).
102const INDENT: &str = "  ";
103
104/// Format a Beancount source file in opinionated canonical form.
105///
106/// Reparses internally — callers that already have a CST in hand
107/// and want to avoid the double-parse can use [`format_node`].
108///
109/// Returns canonical text; output always ends with exactly one
110/// trailing newline (even for an empty file, where the output is
111/// just `"\n"`).
112///
113/// **Line-ending normalization runs BEFORE parsing.** The lexer
114/// does not treat bare `\r` as a line terminator, so a classic-
115/// Mac-authored `directive\r…\rdirective\r` would otherwise parse
116/// as a single broken directive and the rest of the user's ledger
117/// would be silently dropped. We normalize `\r\n` and bare `\r`
118/// to `\n` first, then parse — matching the canonical-form
119/// promise that line endings are LF-only on output.
120#[must_use]
121pub fn format_source(source: &str) -> String {
122    let (stripped, _had_bom) = crate::bom::strip_leading(source);
123    let normalized = crlf_to_lf_outside_strings(stripped);
124    let parsed = SourceFile::parse(&normalized);
125    format_node(parsed.syntax())
126}
127
128/// Like [`format_source`] but reuses the caller's
129/// [`crate::ParseResult`] instead of re-parsing `source`.
130///
131/// Skips both expensive pre-passes the bare `format_source` runs
132/// every call: the lex+parse from `SourceFile::parse(&normalized)`,
133/// and the `O(N_postings)` `compute_alignment` walk. Both pieces
134/// are already on `parse_result` (in `syntax_root` and
135/// `alignment` respectively, populated by `parse_via_cst`). For
136/// any consumer that already holds a `ParseResult` — the LSP
137/// `format_document` handler, the FFI `format.source` endpoint,
138/// the WASM `ParsedLedger::format` bridge — this entry skips two
139/// redundant traversals of the file.
140///
141/// **Output equivalence with `format_source`.** Pinned by
142/// `parse_result_alignment_cache::format_source_with_parsed_matches_format_source_under_fallback`
143/// (the fallback exercises broken sources) and
144/// `cst::format::tests::format_source_with_parsed_matches_format_source`
145/// (the cache path exercises clean sources) across LF / CRLF /
146/// BOM / parse-error / mixed-line-ending fixtures. The cache-
147/// path equivalence holds because the formatter rebuilds output
148/// from each directive's typed values rather than echoing
149/// trivia, so the CRLF-vs-LF difference in the underlying CST
150/// trivia never reaches the output. The fallback path is
151/// byte-trivially equivalent (it IS `format_source`).
152///
153/// **CRLF re-injection is still the caller's responsibility.**
154/// Same as `format_source`: this function always returns LF;
155/// LSP consumers that need to preserve CRLF for Windows-
156/// authored files call [`lf_to_crlf_outside_strings`] on the
157/// returned text.
158///
159/// **Parse-error fallback.** When `parse_result.errors` is
160/// non-empty, this function delegates to `format_source(source)`
161/// — losing the cache benefit but preserving byte-identity for
162/// inputs whose CST diverges from what `format_source`'s
163/// pre-parse normalization would produce. Concretely: bare-`\r`
164/// (classic Mac) line terminators are normalized to LF by
165/// `format_source` before parsing, but `parse_via_cst` does NOT
166/// normalize them — so the cached CST treats them as broken
167/// content and `parse_result.errors` is non-empty. The fallback
168/// path keeps the byte-identity claim total instead of
169/// "holds-only-when-clean".
170///
171/// **Stale `parse_result` is the caller's responsibility.** The
172/// producer-side cache invariant (see
173/// [`crate::ParseResult::alignment`] rustdoc) says
174/// `parse_result` must come from a fresh `parse(source)` with
175/// the same `source`. A `debug_assert_eq!` compares the CST's
176/// text length against `source.len() - bom_offset` to catch the
177/// most common mismatched-pair class (different documents have
178/// different lengths) in debug builds; release builds skip the
179/// check. Identical-length mismatches still pass silently —
180/// the rustdoc-level contract remains the source of truth.
181///
182/// # Panics
183///
184/// Panics if `parse_result.syntax_root` is not a `SOURCE_FILE`
185/// (always true for results produced by [`crate::parse`]).
186///
187/// In debug builds, panics on a `(parse_result, source)`
188/// length-mismatch via `debug_assert_eq!`. Release builds
189/// silently emit possibly-wrong output (the producer-only
190/// invariant is the caller's responsibility).
191#[must_use]
192pub fn format_source_with_parsed(parse_result: &crate::ParseResult, source: &str) -> String {
193    // Parse-error fallback. See the function rustdoc for the
194    // rationale: `parse_via_cst` does not run the same input
195    // normalization `format_source` does (no CRLF/bare-CR
196    // normalize), so for sources containing bare-`\r` line
197    // terminators the cached CST is wrong-shaped and the cache
198    // path would diverge from `format_source`. Delegating
199    // preserves byte-identity unconditionally.
200    if !parse_result.errors.is_empty() {
201        return format_source(source);
202    }
203    let node = parse_result.syntax_node();
204    // Defensive length check (debug-only). Catches the most
205    // common form of `(parse_result, source)` mismatched pair —
206    // different documents with different lengths. The CST's
207    // text range is BOM-stripped, so we add back the BOM bytes
208    // if the parser saw one.
209    //
210    // Computed outside the `debug_assert_eq!` to avoid clippy's
211    // `debug_assert_with_mut_call` (`syntax_node()` does an Arc
212    // bump, which clippy treats as state mutation in a debug
213    // context).
214    let cst_len =
215        usize::from(node.text_range().len()) + if parse_result.has_leading_bom { 3 } else { 0 };
216    debug_assert_eq!(
217        cst_len,
218        source.len(),
219        "format_source_with_parsed called with a `source` whose length doesn't \
220         match the CST stored in `parse_result`. The two arguments came from \
221         different documents — the cache path will emit text for the wrong \
222         buffer. See `ParseResult::alignment` rustdoc for the producer-only \
223         invariant.",
224    );
225    format_node_with_alignment(&node, parse_result.alignment)
226}
227
228/// Like [`format_source`], but returns the parse errors instead
229/// of silently formatting around them.
230///
231/// `format_source` is intentionally infallible — the canonical
232/// formatter must still emit *something* for a file the parser
233/// could only recover from. Tooling that wants to refuse to
234/// rewrite a file with parse errors (the `rledger format` CLI,
235/// the LSP `format` handler) previously had to call `parse`
236/// out-of-band, inspect `errors`, then call `format_source` on
237/// the SAME input — a contract two functions cooperated on
238/// implicitly, and the kind of pairing a future caller could
239/// easily forget. This helper makes the contract explicit.
240///
241/// Returns `Ok(formatted)` if and only if `parse(source).errors`
242/// would be empty. Otherwise returns the parse errors verbatim,
243/// in the same order the parser emitted them.
244///
245/// # Errors
246///
247/// Returns `Err(Vec<ParseError>)` containing every parse error
248/// the underlying [`parse`](crate::parse) call would surface for
249/// `source`. The caller decides whether to abort, render the
250/// errors, or fall back to a non-canonical pass.
251pub fn try_format_source(source: &str) -> Result<String, Vec<crate::ParseError>> {
252    let result = crate::parse(source);
253    if !result.errors.is_empty() {
254        return Err(result.errors);
255    }
256    // Reuse the parse + alignment we already produced for the
257    // error gate instead of letting `format_source` re-parse +
258    // re-walk every posting. Byte-identical output pinned by
259    // `format_source_with_parsed_matches_format_source`.
260    Ok(format_source_with_parsed(&result, source))
261}
262
263/// Convert every `\n` line terminator OUTSIDE string literals back
264/// to `\r\n`, leaving `\n` characters inside strings (and inside
265/// comments… see below) untouched.
266///
267/// The canonical form emitted by [`format_source`] is LF-only.
268/// Editors that round-trip Windows-authored files want to see CRLF
269/// echoed back on every line. This helper bridges the two by
270/// walking the canonical output with the shared `SourceState`
271/// state machine. The walker respects:
272///
273/// - String literals: bytes pass through verbatim. The user's
274///   original line endings inside a multi-line narration / note /
275///   document string are preserved.
276/// - Line comments (`;`, `%`, `#!`, `#+`): the comment's
277///   terminating newline IS a real structural line terminator, so
278///   it gets converted to CRLF; bytes inside the comment region
279///   (which can include arbitrary characters, notably stray `"`)
280///   pass through without flipping the in-string state. `#!` and
281///   `#+` open a comment at any column — the lexer's
282///   `SHEBANG` / `EMACS_DIRECTIVE` regexes carry no line-start
283///   anchor, and the state machine matches that classification.
284///
285/// The helper lives in this module rather than the LSP crate
286/// because its correctness depends on the lexer's `STRING` and
287/// comment rules. Keep it co-located with the formatter so a
288/// lexer change forces a co-evaluation here.
289#[must_use]
290pub fn lf_to_crlf_outside_strings(s: &str) -> String {
291    let mut out = String::with_capacity(s.len() + s.matches('\n').count());
292    // BOM is data, not classification input. We re-prepend it
293    // verbatim and let the body start fresh in Code state. The
294    // sibling crlf_to_lf_outside_strings does the same so the two
295    // walkers handle a leading-BOM file identically.
296    let (body, bom) = match s.strip_prefix('\u{FEFF}') {
297        Some(rest) => (rest, "\u{FEFF}"),
298        None => (s, ""),
299    };
300    out.push_str(bom);
301    let mut chars = body.chars().peekable();
302    let mut state = SourceState::Code;
303    let mut prev_was_backslash = false;
304    while let Some(ch) = chars.next() {
305        let peek = chars.peek().copied();
306        match state {
307            SourceState::InString => out.push(ch),
308            SourceState::InComment | SourceState::Code => {
309                if ch == '\n' {
310                    out.push_str("\r\n");
311                } else {
312                    out.push(ch);
313                }
314            }
315        }
316        state = advance_source_state(ch, peek, state, &mut prev_was_backslash);
317    }
318    out
319}
320
321/// Render typed Beancount `Directive`s in the canonical form
322/// emitted by [`format_source`].
323///
324/// Two-pass pipeline:
325///
326/// 1. Synthesize a source string via the typed-directive emitter
327///    in `rustledger_core::format::format_directives`. That
328///    emitter is `Directive → text`; its output is bean-format-
329///    style, parser-clean, and used here purely as an
330///    intermediate.
331/// 2. Re-parse the synthesized text. If the legacy emitter
332///    produced something the new parser cannot fully accept,
333///    return [`CanonicalizeError::ReparseFailed`] rather than
334///    silently emitting the recoverable subset — that silent-loss
335///    failure mode is what the older `crates/rustledger/tests/
336///    format_compat.rs` (deleted in phase 4.1, distinct from the
337///    phase 4.2 file-pair suite at `crates/rustledger-parser/
338///    tests/format_compat/`) used to guard against. The new file-
339///    pair suite exercises `format_source`, not this two-pass
340///    shim; a future change to `canonicalize_directives`'s error
341///    semantics needs its own dedicated regression test.
342/// 3. Run the re-parsed text through [`format_source`] for the
343///    canonical pass.
344///
345/// Single source of truth for the synthesize → canonicalize
346/// shim. Every consumer that builds a typed `Directive` in memory
347/// and wants canonical text — `rledger add`, `rledger extract`,
348/// the FFI `format.entry` / `format.entries` endpoints — should
349/// call this function instead of reinventing the pipeline.
350pub fn canonicalize_directives<'a, I>(
351    directives: I,
352    config: &rustledger_core::format::FormatConfig,
353) -> Result<String, CanonicalizeError>
354where
355    I: IntoIterator<Item = &'a rustledger_core::Directive>,
356    I::IntoIter: ExactSizeIterator,
357{
358    // Take the count off the ExactSizeIterator without
359    // collecting — the legacy emitter only walks the iterator
360    // once, so we don't need to materialize a Vec just to know
361    // how many directives the caller passed.
362    let iter = directives.into_iter();
363    let input_count = iter.len();
364    let raw = rustledger_core::format::format_directives(iter, config);
365    let parse_result = crate::parse(&raw);
366    if !parse_result.errors.is_empty() {
367        return Err(CanonicalizeError::ReparseFailed {
368            errors: parse_result
369                .errors
370                .iter()
371                .map(ToString::to_string)
372                .collect(),
373        });
374    }
375    // Count check covers the only Directive variants we have
376    // today (12, all of which surface on parse_result.directives).
377    // If a future `rustledger_core::Directive` variant is added
378    // that the parser routes to a different `ParseResult`
379    // collection (e.g., a typed Pushtag whose legacy text the
380    // parser puts on a `pragmas` field), this check needs to
381    // include that field too — otherwise a perfectly healthy
382    // round-trip would always report DirectiveCountMismatch. The
383    // compile-time `_directive_variant_fixture_coverage` match
384    // pins the variant set we're committed to here; any new
385    // variant breaks that match and surfaces this same
386    // maintenance need.
387    let reparsed_count = parse_result.directives.len();
388    if reparsed_count != input_count {
389        return Err(CanonicalizeError::DirectiveCountMismatch {
390            input: input_count,
391            reparsed: reparsed_count,
392        });
393    }
394    Ok(format_source(&raw))
395}
396
397/// Error returned by [`canonicalize_directives`].
398///
399/// Marked `#[non_exhaustive]` so that adding a future variant
400/// (e.g. a `CanonicalizationTimeout` for an async path, or a new
401/// guard for a future canonical-form rule) does not become a
402/// SemVer-breaking change. Consumers must use a `_ => …` arm.
403#[derive(Debug, Clone)]
404#[non_exhaustive]
405pub enum CanonicalizeError {
406    /// The synthesized intermediate failed to re-parse cleanly.
407    /// Carries the rendered error messages so callers can surface
408    /// a diagnostic; the source text itself is not retained
409    /// because it's an internal intermediate the caller has no
410    /// control over.
411    ReparseFailed {
412        /// One rendered message per parse error from the
413        /// intermediate text. Capped at the parser's own error
414        /// limit so this field is bounded.
415        errors: Vec<String>,
416    },
417    /// The synthesized intermediate parsed cleanly but produced a
418    /// different directive count than the input. This indicates
419    /// the legacy emitter and the new parser disagree on what
420    /// constitutes a directive — typically a future
421    /// `rustledger_core::Directive` variant whose legacy text the
422    /// CST parser silently swallows as comments / error-recovery
423    /// trivia. Without this guard, the call would round-trip to
424    /// truncated text with no error returned.
425    DirectiveCountMismatch {
426        /// Number of directives the caller passed in.
427        input: usize,
428        /// Number of directives the parser recovered from the
429        /// synthesized text.
430        reparsed: usize,
431    },
432}
433
434impl std::fmt::Display for CanonicalizeError {
435    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
436        match self {
437            Self::ReparseFailed { errors } => {
438                let preview: Vec<&str> = errors.iter().take(3).map(String::as_str).collect();
439                write!(
440                    f,
441                    "canonical formatter failed to re-parse the synthesized \
442                     directive text ({} error(s)): {}",
443                    errors.len(),
444                    preview.join("; ")
445                )
446            }
447            Self::DirectiveCountMismatch { input, reparsed } => write!(
448                f,
449                "the canonical formatter could not emit {input} directive(s) \
450                 without loss ({reparsed} survived the round-trip). This is \
451                 an rledger bug; please report it with the input directives.",
452            ),
453        }
454    }
455}
456
457impl std::error::Error for CanonicalizeError {}
458
459/// Replace CRLF and bare-CR line terminators with LF, but ONLY
460/// outside string literals.
461///
462/// String literals (`"…"`) can contain raw `\r` and `\n` per the
463/// lexer's `STRING` rule; folding CR inside a string would mutate
464/// the user's data. Uses the shared `SourceState` state machine
465/// to track string / comment boundaries.
466///
467/// Cheap fast path: if the input contains no `\r`, returns the
468/// source slice borrowed (no allocation). Used by
469/// [`format_source`] before parsing so the lexer never has to see
470/// legacy line endings. Exposed publicly under [`crlf_to_lf_outside_strings`]
471/// for tooling (CLI `--diff`, format-equivalence checks) that
472/// needs the same string-aware normalization.
473pub fn crlf_to_lf_outside_strings(src: &str) -> std::borrow::Cow<'_, str> {
474    if !src.contains('\r') {
475        return std::borrow::Cow::Borrowed(src);
476    }
477    // Re-prepend the BOM verbatim and let the body start fresh in
478    // Code state. The state machine no longer needs line-start
479    // tracking — the lexer's `SHEBANG` / `EMACS_DIRECTIVE` regexes
480    // have no line-start anchor, so `#!`/`#+` open a comment at
481    // any column, and the state machine mirrors that.
482    let (body, bom) = match src.strip_prefix('\u{FEFF}') {
483        Some(rest) => (rest, "\u{FEFF}"),
484        None => (src, ""),
485    };
486    let mut out = String::with_capacity(src.len());
487    out.push_str(bom);
488    let mut chars = body.chars().peekable();
489    let mut state = SourceState::Code;
490    let mut prev_was_backslash = false;
491    while let Some(ch) = chars.next() {
492        let peek = chars.peek().copied();
493        match state {
494            SourceState::InString => out.push(ch),
495            _ => {
496                if ch == '\r' {
497                    out.push('\n');
498                    if peek == Some('\n') {
499                        chars.next();
500                    }
501                } else {
502                    out.push(ch);
503                }
504            }
505        }
506        state = advance_source_state(ch, peek, state, &mut prev_was_backslash);
507    }
508    std::borrow::Cow::Owned(out)
509}
510
511/// `true` iff `src` contains at least one `\r` byte OUTSIDE a
512/// string literal — i.e. the byte sequence the canonical
513/// formatter would fold to `\n` via
514/// [`crlf_to_lf_outside_strings`].
515///
516/// This is the explicit predicate companion to the Cow return of
517/// [`crlf_to_lf_outside_strings`]. Tooling that only needs to
518/// know whether the fold would change bytes (the CLI `--diff`
519/// "CR-bearing line endings folded" cause line, the LSP
520/// did-the-formatter-touch-this guard) should call this instead
521/// of matching on `Cow::Owned`, which conflates allocation with
522/// semantic change. A future optimization that pre-allocated the
523/// Cow even on a no-op fold would silently invert that
524/// match-on-Cow guard; this predicate keeps the question
525/// answered by the bytes, not by allocation behavior.
526#[must_use]
527pub fn cr_outside_strings_present(src: &str) -> bool {
528    if !src.contains('\r') {
529        return false;
530    }
531    let body = src.strip_prefix('\u{FEFF}').unwrap_or(src);
532    let mut chars = body.chars().peekable();
533    let mut state = SourceState::Code;
534    let mut prev_was_backslash = false;
535    while let Some(ch) = chars.next() {
536        let peek = chars.peek().copied();
537        if matches!(state, SourceState::Code | SourceState::InComment) && ch == '\r' {
538            return true;
539        }
540        state = advance_source_state(ch, peek, state, &mut prev_was_backslash);
541    }
542    false
543}
544
545/// Per-character walker state for line-ending normalization passes
546/// that must respect string-literal and comment boundaries.
547///
548/// Used by both line-ending helpers: a flat `is_in_string` boolean
549/// is not enough because a quote character inside a `;`/`%` /
550/// `#!` / `#+` comment is data, not a string delimiter.
551#[derive(Debug, Clone, Copy, PartialEq, Eq)]
552enum SourceState {
553    /// In normal code. `"` opens a string; `;` / `%` / `#!` /
554    /// `#+` opens a comment; everything else is just bytes.
555    Code,
556    /// Inside `"…"`. Bytes pass through; an unescaped `"` exits.
557    InString,
558    /// Inside `;…\n`, `%…\n`, `#!…\n`, or `#+…\n`. Bytes pass
559    /// through until LF/CR.
560    InComment,
561}
562
563/// One-step state transition shared by both line-ending helpers.
564///
565/// Returns the state AFTER consuming `ch`. The string-escape
566/// bookkeeping (`prev_was_backslash`) updates in place. Comment
567/// opener detection covers all four line-comment lexemes: `;` and
568/// `%` open a comment unconditionally; `#!` and `#+` open one at
569/// any column — the lexer's `#![^\n\r]*` / `#\+[^\n\r]*` regexes
570/// have NO line-start anchor, so a mid-line `#!` or `#+` is still
571/// a `SHEBANG` / `EMACS_DIRECTIVE` token. A `#` followed by
572/// anything else is a `TAG` / `HASH` token, not a comment.
573const fn advance_source_state(
574    ch: char,
575    peek: Option<char>,
576    state: SourceState,
577    prev_was_backslash: &mut bool,
578) -> SourceState {
579    match state {
580        SourceState::InString => {
581            let is_close = ch == '"' && !*prev_was_backslash;
582            *prev_was_backslash = ch == '\\' && !*prev_was_backslash;
583            if is_close {
584                SourceState::Code
585            } else {
586                SourceState::InString
587            }
588        }
589        SourceState::InComment => {
590            if matches!(ch, '\n' | '\r') {
591                SourceState::Code
592            } else {
593                SourceState::InComment
594            }
595        }
596        SourceState::Code => {
597            let is_hash_line_comment = ch == '#' && matches!(peek, Some('!' | '+'));
598            if ch == '"' {
599                *prev_was_backslash = false;
600                SourceState::InString
601            } else if matches!(ch, ';' | '%') || is_hash_line_comment {
602                SourceState::InComment
603            } else {
604                SourceState::Code
605            }
606        }
607    }
608}
609
610/// Format a `SOURCE_FILE` syntax node in opinionated canonical form.
611///
612/// The bare-node entry for callers that already parsed the CST
613/// (typically LSP formatting providers). Output rules are the
614/// same as [`format_source`].
615///
616/// Internally runs [`compute_alignment`] on `node` to derive the
617/// file-wide column targets. Hot paths that hold a precomputed
618/// `PostingAlignment` (e.g., via [`crate::ParseResult::alignment`]) should
619/// call [`format_node_with_alignment`] instead to skip the
620/// per-call walk. Equivalence pinned by
621/// `format_node_equals_format_node_with_alignment` in this file's
622/// tests.
623#[must_use]
624pub fn format_node(node: &crate::SyntaxNode) -> String {
625    // Precondition: `node` is the SOURCE_FILE parse root (the only thing callers
626    // pass); a wrong node is a caller bug, not input-driven.
627    #[allow(clippy::expect_used)]
628    let source_file =
629        SourceFile::cast(node.clone()).expect("format_node called on non-SOURCE_FILE node");
630    let alignment = compute_alignment(&source_file);
631    format_node_with_alignment(node, alignment)
632}
633
634/// Like [`format_node`] but skips the per-call
635/// [`compute_alignment`] walk by accepting a precomputed
636/// `PostingAlignment`.
637///
638/// The cache pattern: parse → take `ParseResult::alignment` (the
639/// pre-computed file-wide alignment, populated by `parse_via_cst`)
640/// → call this function. Subsequent formatting calls on the same
641/// `ParseResult` pay only the per-call emit cost, not the
642/// `O(N_postings)` pre-pass.
643///
644/// `alignment` MUST match what `compute_alignment(&SourceFile::cast(node).unwrap())` would
645/// return for the given `node` — passing a mismatched alignment
646/// is allowed but produces output with non-canonical column
647/// widths. Use `PostingAlignment::default()` for files known to have no
648/// postings (no transactions, or transactions with no AMOUNT).
649///
650/// # Panics
651///
652/// Panics if `node`'s kind is not `SOURCE_FILE`.
653#[must_use]
654pub fn format_node_with_alignment(node: &crate::SyntaxNode, alignment: PostingAlignment) -> String {
655    // Precondition check (debug-only). The bare `format_node`
656    // delegate already validated the kind via the
657    // `SourceFile::cast` it performs for `compute_alignment`, so
658    // for the most common call path (bare → with_alignment) the
659    // debug_assert is a redundant no-op in release. External
660    // direct callers of this entry point (FFI, future LSP
661    // handlers calling `format_node_with_alignment` with a
662    // `parse_result.alignment` cache) get the panic in debug
663    // builds; in release, a wrong-kind `node` produces empty or
664    // malformed output rather than panicking — acceptable for
665    // a precondition that's guaranteed by the call's typed
666    // contract.
667    debug_assert_eq!(
668        node.kind(),
669        crate::SyntaxKind::SOURCE_FILE,
670        "format_node_with_alignment called on non-SOURCE_FILE node (got {:?})",
671        node.kind(),
672    );
673    let mut out = String::new();
674    // Walk every direct child in source order so file-level comments
675    // (file-leading per phase-2.0 trivia attachment, plus file-
676    // trailing) interleave correctly with directives. Inter-directive
677    // and same-line trailing comments live INSIDE the next/owning
678    // directive and surface from `emit_directive`'s leading-trivia
679    // pass.
680    //
681    // Blank-line policy at the top level: PRESERVE the author's blank
682    // lines between directives rather than normalizing to exactly one.
683    // Between two directives, emit as many blank lines as the source
684    // had — including zero, so deliberately grouped runs (consecutive
685    // `open`s, a dense `price` feed) stay grouped instead of being
686    // double-spaced (#1325). This matches Python `bean-format` and the
687    // rest of the beancount formatter lineage (fava,
688    // beancount-language-server, beancount-mode), all of which leave
689    // blank-line structure untouched and only realign amounts.
690    //
691    // Adjacent file-level comments still stay tight as a group (so a
692    // `; ====\n; HEADER\n; ====` section header keeps its visual
693    // grouping), and a comment group sitting against a directive on
694    // either side stays flush.
695    let mut prev_was_directive = false;
696    for el in node.children_with_tokens() {
697        match el {
698            rowan::NodeOrToken::Node(n) => {
699                if let Some(directive) = ast::Directive::cast(n.clone()) {
700                    if prev_was_directive {
701                        for _ in 0..leading_blank_lines(directive.syntax()) {
702                            out.push('\n');
703                        }
704                    }
705                    emit_directive(&directive, alignment, &mut out);
706                    prev_was_directive = true;
707                } else if n.kind() == crate::SyntaxKind::ERROR_NODE {
708                    // Preserve unparsable content verbatim (#1335): `format`
709                    // must never delete the author's text. Org-mode `*`
710                    // section headers (and any comments grouped with them)
711                    // parse into ERROR_NODEs; emit them as-is rather than
712                    // dropping them. Treated like a directive for spacing — an
713                    // ERROR_NODE is a top-level content block, so the author's
714                    // blank lines around it (before it, and before the next
715                    // directive) are preserved, not flushed.
716                    if prev_was_directive {
717                        for _ in 0..leading_blank_lines(&n) {
718                            out.push('\n');
719                        }
720                    }
721                    emit_error_node(&n, &mut out);
722                    prev_was_directive = true;
723                }
724                // Any other non-directive node: nothing to emit.
725            }
726            rowan::NodeOrToken::Token(t) => {
727                if matches!(
728                    t.kind(),
729                    crate::SyntaxKind::COMMENT
730                        | crate::SyntaxKind::PERCENT_COMMENT
731                        | crate::SyntaxKind::SHEBANG
732                        | crate::SyntaxKind::EMACS_DIRECTIVE
733                ) {
734                    out.push_str(t.text().trim_end_matches(['\n', '\r']));
735                    out.push('\n');
736                    prev_was_directive = false;
737                }
738            }
739        }
740    }
741    if !out.ends_with('\n') {
742        out.push('\n');
743    }
744    out
745}
746
747/// Format the subset of `node`'s top-level children that intersect
748/// `range`, returning the snapped byte range and the canonical-form
749/// replacement text.
750///
751/// This is the building block for the LSP `textDocument/rangeFormatting`
752/// provider: the client sends a `Range`, the server snaps it up to
753/// the smallest set of top-level structural nodes (directives or
754/// standalone comments) that intersect the selection, formats those
755/// nodes the same way [`format_node`] formats the whole file, and
756/// returns a single `TextEdit` replacing the snapped range. The
757/// alternative — formatting a substring of the source — would have
758/// to either invent a partial canonical form (creating a second
759/// truth alongside the whole-file canonical form, the failure mode
760/// that bit #1252) or refuse to format anything that crosses a
761/// structural boundary. Snapping up to top-level boundaries is the
762/// only choice that lets the same canonical-form rules apply.
763///
764/// **Frame.** `range` is in the *CST* byte frame — the same frame
765/// the syntax node's `TextRange`s use. The LSP handler is
766/// responsible for shifting `bom_offset` at the input/output
767/// boundary (mirrors the [`super::super::SyntaxNode`] /
768/// `selection_range` handler convention; see
769/// `ParseResult::syntax_root` rustdoc for the rationale).
770///
771/// **Behavior.**
772///
773/// - If `range` intersects no top-level Directive or standalone
774///   COMMENT/SHEBANG/EMACS token, returns `None`. The LSP handler
775///   surfaces `None` directly (serialized as `null` per LSP, not
776///   as `[]`); the client treats it as "nothing to format".
777/// - If the computed snap range would cover any top-level
778///   `ERROR_NODE` byte, returns `None`. **Range formatting refuses
779///   to delete user content the parser couldn't classify.** This
780///   diverges from [`format_node`], which silently drops
781///   `ERROR_NODE` children on the whole-file path; the rationale
782///   is the per-handler asymmetry the LSP exposes — the user
783///   pressing "Format Selection" expects either a clean
784///   reformat or a no-op, never a silent partial delete of an
785///   in-progress directive. Tooling that genuinely wants to drop
786///   broken regions can still call [`format_node`] on the same
787///   node.
788/// - Otherwise returns `Some((snap, text))` where `snap` is the
789///   union of the included children's text ranges (so it begins at
790///   the first included child's start and ends at the last
791///   included child's end, including each child's leading-trivia
792///   prefix per the phase-2.0 Directive-Terminator Rule) and
793///   `text` is the canonical-form replacement.
794/// - Cursor-only selection (`range.is_empty()`): the child at the
795///   cursor is included if the cursor is strictly inside it OR is
796///   exactly at the child's start. Boundary at the child's end
797///   belongs to the next child, not the previous one — matches
798///   the standard "end-of-line cursor is start-of-next-line"
799///   convention.
800///
801/// **Posting alignment.** The pre-pass uses the FULL `SourceFile`, not
802/// the selected subset. A selection that formats one transaction
803/// in a file with many other transactions inherits the file's
804/// alignment columns, so the formatted output stays visually
805/// aligned with un-formatted postings elsewhere. The opposite
806/// policy (per-selection alignment) would create a jarring
807/// visual jump every time the user re-formats a sub-range.
808///
809/// **Round-trip invariant.** For any `range` that contains every
810/// top-level child, the returned text equals the result of
811/// [`format_node`] on the same node. Pinned by
812/// `format_node_range_full_range_matches_format_node` in this
813/// file's test module.
814///
815/// Returns `None` if `node`'s kind is not `SOURCE_FILE` (the precondition is
816/// still that callers pass the parse root) or if `range` intersects no top-level
817/// child.
818#[must_use]
819pub fn format_node_range(
820    node: &crate::SyntaxNode,
821    range: rowan::TextRange,
822) -> Option<(rowan::TextRange, String)> {
823    // This returns `Option`, so a non-SOURCE_FILE node is a clean `None` rather
824    // than a panic (the precondition is still that callers pass the parse root).
825    let source_file = SourceFile::cast(node.clone())?;
826    // File-wide alignment pre-pass: see rustdoc above for the
827    // rationale. The selected subset always uses the full file's
828    // alignment columns. Hot paths with a precomputed `PostingAlignment`
829    // should call `format_node_range_with_alignment` instead.
830    let alignment = compute_alignment(&source_file);
831    format_node_range_with_alignment(node, range, alignment)
832}
833
834/// Like [`format_node_range`] but skips the per-call
835/// [`compute_alignment`] walk by accepting a precomputed
836/// `PostingAlignment`.
837///
838/// The cache pattern is identical to
839/// [`format_node_with_alignment`]: parse → take
840/// `ParseResult::alignment` → call this function. The hot path the
841/// cache addresses is the LSP `textDocument/rangeFormatting`
842/// fallback (CST-snap path that fires on parse-error files), which
843/// can be invoked per-keystroke through format-on-type clients.
844/// Without the cache the per-call cost is
845/// `O(N_postings_in_file)`; with the cache it's
846/// `O(N_cst_nodes covered by range)`.
847///
848/// `alignment` MUST match what `compute_alignment(&SourceFile::cast(node).unwrap())` would
849/// return for the given `node`; pinned by
850/// `format_node_range_matches_format_node_range_with_alignment`. Same
851/// `range` semantics, `ERROR_NODE` policy, snap rules, and
852/// `# Panics` precondition as [`format_node_range`].
853#[must_use]
854pub fn format_node_range_with_alignment(
855    node: &crate::SyntaxNode,
856    range: rowan::TextRange,
857    alignment: PostingAlignment,
858) -> Option<(rowan::TextRange, String)> {
859    // Precondition check (debug-only). Same rationale as
860    // `format_node_with_alignment`: the bare delegate already
861    // validated the kind, so the most common call path (bare →
862    // with_alignment) gets no release-build cost from this
863    // assert. External direct callers — the LSP range_formatting
864    // fallback, FFI, future format-on-type — get a debug-build
865    // panic; release-build wrong-kind input produces no output
866    // (rather than panicking).
867    debug_assert_eq!(
868        node.kind(),
869        crate::SyntaxKind::SOURCE_FILE,
870        "format_node_range_with_alignment called on non-SOURCE_FILE node (got {:?})",
871        node.kind(),
872    );
873
874    // First pass: identify the included children and the snap range.
875    // We pick:
876    //   - Directive nodes whose `text_range` intersects `range`
877    //   - top-level COMMENT/PERCENT_COMMENT/SHEBANG/EMACS_DIRECTIVE
878    //     tokens whose range intersects `range`
879    // ERROR_NODE and other non-Directive nodes are skipped (matches
880    // `format_node`); a selection that lands only on them returns
881    // None below.
882    let mut snap_start: Option<rowan::TextSize> = None;
883    let mut snap_end: Option<rowan::TextSize> = None;
884    let mut any_included = false;
885    for el in node.children_with_tokens() {
886        let (kind, child_range) = (el.kind(), el.text_range());
887        let is_formattable = match &el {
888            rowan::NodeOrToken::Node(n) => ast::Directive::cast(n.clone()).is_some(),
889            rowan::NodeOrToken::Token(_) => matches!(
890                kind,
891                crate::SyntaxKind::COMMENT
892                    | crate::SyntaxKind::PERCENT_COMMENT
893                    | crate::SyntaxKind::SHEBANG
894                    | crate::SyntaxKind::EMACS_DIRECTIVE
895            ),
896        };
897        if !is_formattable {
898            continue;
899        }
900        if !range_intersects(child_range, range) {
901            continue;
902        }
903        any_included = true;
904        snap_start = Some(snap_start.map_or(child_range.start(), |s| s.min(child_range.start())));
905        snap_end = Some(snap_end.map_or(child_range.end(), |e| e.max(child_range.end())));
906    }
907    if !any_included {
908        return None;
909    }
910    // `any_included` guarantees both bounds were set in the loop above; bail
911    // (return `None`) rather than `unwrap` if somehow not.
912    let (Some(snap_start), Some(snap_end)) = (snap_start, snap_end) else {
913        return None;
914    };
915    let snap = rowan::TextRange::new(snap_start, snap_end);
916
917    // ERROR_NODE intersection bail: if the snap range covers any
918    // top-level ERROR_NODE byte, refuse to format and return None.
919    // Range formatting must not silently delete content the parser
920    // could not classify — without this guard, a selection
921    // spanning two valid directives with an ERROR_NODE between
922    // them would emit a TextEdit that replaces all three with
923    // just the two formatted directives, deleting the user's
924    // in-progress source bytes.
925    //
926    // This is the deliberate divergence from `format_node`'s
927    // whole-file policy: the whole-file path runs on the
928    // assumption that the caller (CLI / FFI / `try_format_source`)
929    // has already decided to accept content loss; the per-handler
930    // LSP path has no such opt-in. The cost is occasional
931    // "format-selection did nothing" UX while a parse error sits
932    // inside the snap; the benefit is no data loss.
933    for el in node.children_with_tokens() {
934        if !matches!(el.kind(), crate::SyntaxKind::ERROR_NODE) {
935            continue;
936        }
937        let er = el.text_range();
938        // Strict-overlap check: an ERROR_NODE whose end touches
939        // snap.start (or start touches snap.end) is adjacent, not
940        // overlapping — those are safe to emit alongside.
941        if er.end() > snap.start() && er.start() < snap.end() {
942            return None;
943        }
944    }
945
946    // Second pass: emit only the children whose range falls
947    // inside `snap`. We re-walk rather than caching the first
948    // pass because the second pass needs to maintain the
949    // `prev_was_directive` blank-line state in source order, and
950    // the child set is small enough that the second walk is
951    // cheap. (Re-walking also keeps the data-flow obvious: snap
952    // computation and emission are two distinct concerns.)
953    let mut out = String::new();
954    let mut prev_was_directive = false;
955    for el in node.children_with_tokens() {
956        let child_range = el.text_range();
957        // Use the snap range (not the input `range`) so we emit
958        // every child WITHIN the snap, even those that the
959        // original selection didn't directly intersect but that
960        // sit between two intersecting children. Without this,
961        // ERROR_NODE-free trivia between two selected directives
962        // would be re-formatted into our output (the comment
963        // pass picks them up), which matches `format_node`.
964        if child_range.end() <= snap.start() || child_range.start() >= snap.end() {
965            continue;
966        }
967        match el {
968            rowan::NodeOrToken::Node(n) => {
969                // ERROR_NODEs never reach here: the range path bails out
970                // above (returns None) when the snap covers one, so it
971                // refuses to format rather than risk touching unparsable
972                // content. Only the whole-file path preserves them verbatim.
973                let Some(directive) = ast::Directive::cast(n) else {
974                    continue;
975                };
976                // Preserve the author's inter-directive blank lines
977                // (#1325), identically to `format_node_with_alignment`,
978                // so range formatting and whole-file formatting agree.
979                //
980                // The FIRST directive emitted from the snap needs care:
981                // its predecessor may sit OUTSIDE the selection, but the
982                // blank lines between them are this directive's leading
983                // trivia (the Directive-Terminator Rule), so they fall
984                // INSIDE the snapped range. Dropping them would delete
985                // the blank line above the selection. Emit them whenever
986                // a directive precedes this one in the file — the same
987                // condition the whole-file path expresses as
988                // `prev_was_directive`. For the file's first directive
989                // (no predecessor) there is nothing to preserve.
990                let preceded_by_directive = prev_was_directive
991                    || directive
992                        .syntax()
993                        .prev_sibling()
994                        .and_then(ast::Directive::cast)
995                        .is_some();
996                if preceded_by_directive {
997                    for _ in 0..leading_blank_lines(directive.syntax()) {
998                        out.push('\n');
999                    }
1000                }
1001                emit_directive(&directive, alignment, &mut out);
1002                prev_was_directive = true;
1003            }
1004            rowan::NodeOrToken::Token(t) => {
1005                if matches!(
1006                    t.kind(),
1007                    crate::SyntaxKind::COMMENT
1008                        | crate::SyntaxKind::PERCENT_COMMENT
1009                        | crate::SyntaxKind::SHEBANG
1010                        | crate::SyntaxKind::EMACS_DIRECTIVE
1011                ) {
1012                    out.push_str(t.text().trim_end_matches(['\n', '\r']));
1013                    out.push('\n');
1014                    prev_was_directive = false;
1015                }
1016            }
1017        }
1018    }
1019    if !out.ends_with('\n') {
1020        out.push('\n');
1021    }
1022    Some((snap, out))
1023}
1024
1025/// Whether `child` (a CST node's text range) intersects the
1026/// caller's selection. Zero-width selections (a cursor with no
1027/// extent) are handled specially: the cursor counts as "inside"
1028/// a child if the cursor is strictly inside the child's range or
1029/// is exactly at the child's start. Boundary at the child's end
1030/// is NOT a match — it belongs to the next child, matching
1031/// editors' "end-of-line cursor = start of next line" convention.
1032fn range_intersects(child: rowan::TextRange, sel: rowan::TextRange) -> bool {
1033    if sel.is_empty() {
1034        child.contains(sel.start()) || sel.start() == child.start()
1035    } else {
1036        child.start() < sel.end() && sel.start() < child.end()
1037    }
1038}
1039
1040/// Compute the file-wide alignment columns for a parsed `SourceFile`.
1041///
1042/// Walks every Transaction's postings once, takes the max LHS
1043/// width (account + optional `flag `) and max number-text width,
1044/// and derives the column targets from them.
1045///
1046/// **`O(N_postings)`.** Public so consumers can pre-compute the
1047/// alignment once (typically at parse time) and pass the cached
1048/// `PostingAlignment` into [`format_node_with_alignment`] or
1049/// [`format_node_range_with_alignment`] — eliminates the per-call
1050/// walk in hot formatting paths (LSP format-on-type through a
1051/// parse error, repeat-format scripts, etc.).
1052///
1053/// **Tree-shape precondition.** `sf` must be a `SourceFile` whose
1054/// CST was produced by `parse_structured` (directly or transitively
1055/// via `parse_via_cst` / `parse`). Hand-built partial trees (e.g.,
1056/// a `GreenNodeBuilder` invocation for snippet formatting) silently
1057/// return `PostingAlignment::default()` because their wrapping
1058/// nodes fail the `ast::Directive::Transaction::cast` check.
1059/// Likewise, transactions wrapped in `ERROR_NODE` by mid-edit
1060/// error recovery are excluded — see
1061/// `parse_result_alignment_cache::mid_transaction_error_node` for
1062/// the pinned behavior. The function never panics on a partial
1063/// tree; it just returns the all-zero alignment for the no-postings
1064/// case.
1065///
1066/// **Pinning the contract.** `ParseResult::alignment` is populated
1067/// by calling this function during `parse_via_cst`; the equivalence
1068/// between the cached value and a fresh call is guaranteed by the
1069/// `parse_result_alignment_cache::*` regression tests (7 fixtures) in
1070/// this module.
1071#[must_use]
1072pub fn compute_alignment(sf: &SourceFile) -> PostingAlignment {
1073    let mut max_lhs: usize = 0;
1074    let mut max_num: usize = 0;
1075    // Tracks postings that actually render a number — the only ones that
1076    // participate in alignment. A file whose postings render no numbers
1077    // gets `PostingAlignment::default()`, matching the type docs.
1078    let mut any_aligned_posting = false;
1079    for directive in sf.directives() {
1080        let ast::Directive::Transaction(t) = directive else {
1081            continue;
1082        };
1083        for child in t.syntax().children() {
1084            let Some(p) = ast::Posting::cast(child) else {
1085                continue;
1086            };
1087            let mut lhs = 0usize;
1088            if let Some(flag) = p.flag() {
1089                lhs += flag.text().chars().count() + 1; // `! ` etc.
1090            }
1091            if let Some(account) = p.account() {
1092                lhs += account.text().chars().count();
1093            }
1094
1095            // Only postings that render a number drive the alignment
1096            // column. `bean-format` computes the number column from the
1097            // prefixes of number-bearing lines only, so two kinds of
1098            // posting must NOT push the column right:
1099            //   - amount-less postings (the elided balancing leg, or a
1100            //     long account with no amount), and
1101            //   - currency-only amounts (`Assets:Cash USD`), which
1102            //     `emit_posting` prints with no number at all.
1103            // Counting either is why `rledger format` and `bean-format`
1104            // disagreed and round-tripping never converged (issue #1290).
1105            // `amount_number_text` is the shared predicate that keeps
1106            // this pre-pass in lockstep with `emit_posting`.
1107            if let Some(amt) = p.amount()
1108                && let Some(text) = amount_number_text(&amt)
1109            {
1110                any_aligned_posting = true;
1111                max_lhs = max_lhs.max(lhs);
1112                max_num = max_num.max(text.chars().count());
1113            }
1114        }
1115    }
1116    if !any_aligned_posting {
1117        return PostingAlignment::default();
1118    }
1119    // 2 spaces between the longest account end and the number field,
1120    // matching the conventional Beancount layout.
1121    PostingAlignment {
1122        number_col: INDENT.len() + max_lhs + 2,
1123        number_width: max_num,
1124    }
1125}
1126
1127/// The rendered number / arithmetic-expression text of an amount *if it
1128/// renders a number*, or `None` when it renders nothing (a currency-only
1129/// amount like `USD`, whose value text is empty). EXCLUDES the trailing
1130/// currency; sign (if any) is included.
1131///
1132/// This is the single source of truth for "does this posting line have a
1133/// number?". Both the file-wide alignment pre-pass ([`compute_alignment`])
1134/// and the emitter ([`emit_posting`]) consult it, so they can never
1135/// disagree about which postings participate in alignment — the bug
1136/// class behind #1290 (amount-less postings) and its currency-only
1137/// sibling.
1138fn amount_number_text(amt: &ast::Amount) -> Option<String> {
1139    let text = amount_value_text(amt);
1140    (!text.is_empty()).then_some(text)
1141}
1142
1143/// Render an amount's value portion (number or arithmetic
1144/// expression) as a string, EXCLUDING the trailing currency.
1145/// Mirrors the value half of [`format_amount`].
1146fn amount_value_text(amt: &ast::Amount) -> String {
1147    let mut buf = String::new();
1148    if amt.is_arithmetic() {
1149        emit_amount_subnode_expression(amt.syntax(), &mut buf);
1150        return buf;
1151    }
1152    if let Some(sign) = amt.sign()
1153        && sign.is_minus()
1154    {
1155        buf.push('-');
1156    }
1157    if let Some(n) = amt.number() {
1158        buf.push_str(&canonical_number(n.text()));
1159    }
1160    buf
1161}
1162
1163fn emit_directive(d: &ast::Directive, align: PostingAlignment, out: &mut String) {
1164    // Leading inter-directive trivia: COMMENT tokens that sit
1165    // BEFORE the directive's first content token. Per phase-2.0
1166    // trivia attachment, these live inside the directive's syntax
1167    // node — emit them as their own lines BEFORE the canonical
1168    // content.
1169    emit_leading_comments(d.syntax(), out);
1170
1171    // Capture an optional same-line trailing comment so we can
1172    // splice it back in immediately before the directive's
1173    // terminating NEWLINE — see the comment-aware emit loop at
1174    // the bottom of this function.
1175    let trailing = collect_trailing_comment(d.syntax());
1176
1177    let len_before = out.len();
1178    match d {
1179        ast::Directive::Open(d) => emit_open(d, out),
1180        ast::Directive::Close(d) => emit_close(d, out),
1181        ast::Directive::Commodity(d) => emit_commodity(d, out),
1182        ast::Directive::Note(d) => emit_note(d, out),
1183        ast::Directive::Event(d) => emit_event(d, out),
1184        ast::Directive::Query(d) => emit_query(d, out),
1185        ast::Directive::Pad(d) => emit_pad(d, out),
1186        ast::Directive::Document(d) => emit_document(d, out),
1187        ast::Directive::Price(d) => emit_price(d, out),
1188        ast::Directive::Balance(d) => emit_balance(d, out),
1189        ast::Directive::Custom(d) => emit_custom(d, out),
1190        ast::Directive::Option(d) => emit_option(d, out),
1191        ast::Directive::Include(d) => emit_include(d, out),
1192        ast::Directive::Plugin(d) => emit_plugin(d, out),
1193        ast::Directive::Pushtag(d) => emit_pushtag(d, out),
1194        ast::Directive::Poptag(d) => emit_poptag(d, out),
1195        ast::Directive::Pushmeta(d) => emit_pushmeta(d, out),
1196        ast::Directive::Popmeta(d) => emit_popmeta(d, out),
1197        ast::Directive::Transaction(d) => emit_transaction(d, align, out),
1198    }
1199    // Splice the same-line trailing comment in: find the FIRST '\n'
1200    // after `len_before` (= end of the directive's header line in
1201    // the emitted bytes) and insert `" ; comment"` before it. For
1202    // single-line directives the first '\n' is also the only one
1203    // and this lands the comment on the directive line. For multi-
1204    // line transactions it lands the comment on the header line
1205    // (where the source had it), not after the body.
1206    if let Some(c) = trailing
1207        && let Some(newline_rel) = out[len_before..].find('\n')
1208    {
1209        let insert_at = len_before + newline_rel;
1210        let mut splice = String::with_capacity(c.len() + 1);
1211        splice.push(' ');
1212        splice.push_str(&c);
1213        out.insert_str(insert_at, &splice);
1214    }
1215}
1216
1217/// Emit an `ERROR_NODE`'s text verbatim, so `format` never deletes content it
1218/// could not parse (#1335) — chiefly org-mode `*` section headers and the
1219/// comments grouped with them. Only trailing whitespace per line is stripped
1220/// (the formatter's no-trailing-space policy) and the node's trailing newlines
1221/// are collapsed to one; everything else — including blank lines, comments and
1222/// the unparsable lines themselves — is preserved exactly as written.
1223fn emit_error_node(node: &crate::SyntaxNode, out: &mut String) {
1224    let text = node.text().to_string();
1225    // Trim leading AND trailing blank lines: the caller emits the leading
1226    // blank lines (via `leading_blank_lines`) so emitting them here too would
1227    // double-count them and break idempotence. Internal blank lines and the
1228    // content (org headers, grouped comments) are preserved.
1229    for line in text.trim_matches(['\n', '\r']).split('\n') {
1230        out.push_str(line.trim_end());
1231        out.push('\n');
1232    }
1233}
1234
1235/// Number of blank lines the author left immediately before this
1236/// directive's first visible line (its leading comment, if any, else
1237/// its content). Each NEWLINE in the leading trivia that precedes the
1238/// first comment / content token is exactly one blank line: the
1239/// previous directive owns its own terminator NEWLINE (the Directive-
1240/// Terminator Rule), so this node's leading NEWLINEs are purely the
1241/// blank gap, with no off-by-one. WHITESPACE-only "blank" lines count
1242/// too (the NEWLINE that ends them is included). Scanning stops at the
1243/// first comment or content token, so a blank line sitting *between* a
1244/// leading comment and the directive's content is not counted here
1245/// (that gap is collapsed by `emit_leading_comments`, as before).
1246fn leading_blank_lines(node: &crate::SyntaxNode) -> usize {
1247    let mut blanks = 0;
1248    for el in node.children_with_tokens() {
1249        let rowan::NodeOrToken::Token(t) = el else {
1250            break;
1251        };
1252        match t.kind() {
1253            crate::SyntaxKind::NEWLINE => blanks += 1,
1254            crate::SyntaxKind::WHITESPACE => {}
1255            // First comment or content token — past the leading gap.
1256            _ => break,
1257        }
1258    }
1259    blanks
1260}
1261
1262/// Walk the directive's direct-child tokens until the first
1263/// non-trivia token, emitting each `COMMENT` (and `PERCENT_COMMENT`)
1264/// on its own line. Whitespace and newlines in the leading region
1265/// are ignored — the canonical form controls inter-directive
1266/// blank-line spacing separately.
1267fn emit_leading_comments(node: &crate::SyntaxNode, out: &mut String) {
1268    for el in node.children_with_tokens() {
1269        let rowan::NodeOrToken::Token(t) = el else {
1270            break;
1271        };
1272        match t.kind() {
1273            crate::SyntaxKind::COMMENT | crate::SyntaxKind::PERCENT_COMMENT => {
1274                out.push_str(t.text().trim_end_matches(['\n', '\r']));
1275                out.push('\n');
1276            }
1277            crate::SyntaxKind::WHITESPACE | crate::SyntaxKind::NEWLINE => {}
1278            _ => break,
1279        }
1280    }
1281}
1282
1283/// Return the directive's same-line trailing comment (if any) —
1284/// the COMMENT token that appears between the LAST non-trivia
1285/// content token and the directive-terminating NEWLINE on the
1286/// header line. Returns the verbatim comment text (no trailing
1287/// newline).
1288fn collect_trailing_comment(node: &crate::SyntaxNode) -> Option<String> {
1289    // Find the directive-header terminating NEWLINE: the FIRST
1290    // direct-child NEWLINE that follows at least one non-trivia
1291    // content token. (For single-line directives there's only one
1292    // NEWLINE; for transactions the header line is the first
1293    // NEWLINE, after which postings/metadata follow.)
1294    let mut header_nl_idx: Option<usize> = None;
1295    let mut saw_content = false;
1296    let tokens: Vec<crate::SyntaxToken> = node
1297        .children_with_tokens()
1298        .filter_map(rowan::NodeOrToken::into_token)
1299        .collect();
1300    for (i, t) in tokens.iter().enumerate() {
1301        let k = t.kind();
1302        if k == crate::SyntaxKind::NEWLINE && saw_content {
1303            header_nl_idx = Some(i);
1304            break;
1305        }
1306        if !matches!(
1307            k,
1308            crate::SyntaxKind::WHITESPACE
1309                | crate::SyntaxKind::NEWLINE
1310                | crate::SyntaxKind::COMMENT
1311                | crate::SyntaxKind::PERCENT_COMMENT
1312        ) {
1313            saw_content = true;
1314        }
1315    }
1316    // EOF-without-newline fallback: if there is no header-
1317    // terminating NEWLINE, the directive runs to the end of the
1318    // file. Scan from the LAST token instead. A `?` early-return
1319    // here previously dropped same-line trailing comments at the
1320    // final line of a file that lacked a trailing newline, e.g.
1321    // `2024-01-15 open Assets:A ; trailing` (no `\n`). The
1322    // canonical formatter restores the trailing newline, but the
1323    // comment was already gone.
1324    let nl_idx = header_nl_idx.unwrap_or(tokens.len());
1325    // Scan backwards from the header NEWLINE (or EOF): the
1326    // trailing comment is the last COMMENT before the NEWLINE
1327    // separated only by WHITESPACE.
1328    for i in (0..nl_idx).rev() {
1329        let k = tokens[i].kind();
1330        if matches!(
1331            k,
1332            crate::SyntaxKind::COMMENT | crate::SyntaxKind::PERCENT_COMMENT
1333        ) {
1334            return Some(tokens[i].text().to_string());
1335        }
1336        if k != crate::SyntaxKind::WHITESPACE {
1337            return None;
1338        }
1339    }
1340    None
1341}
1342
1343// ---- Single-line directives ------------------------------------
1344
1345fn emit_open(d: &ast::OpenDirective, out: &mut String) {
1346    let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1347    let account = d
1348        .account()
1349        .map(|t| t.text().to_string())
1350        .unwrap_or_default();
1351    out.push_str(&date);
1352    out.push_str(" open ");
1353    out.push_str(&account);
1354    // The currency constraint list is comma-separated (`USD,EUR`), not
1355    // space-separated — emitting spaces produces invalid beancount (#1405).
1356    for (i, currency) in d.currencies().enumerate() {
1357        out.push_str(if i == 0 { " " } else { "," });
1358        out.push_str(currency.text());
1359    }
1360    if let Some(booking) = d.booking_method() {
1361        // `booking.text()` includes the surrounding quotes.
1362        out.push(' ');
1363        out.push_str(booking.text());
1364    }
1365    out.push('\n');
1366    emit_meta_entries_of(d.syntax(), out);
1367}
1368
1369fn emit_close(d: &ast::CloseDirective, out: &mut String) {
1370    let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1371    let account = d
1372        .account()
1373        .map(|t| t.text().to_string())
1374        .unwrap_or_default();
1375    out.push_str(&date);
1376    out.push_str(" close ");
1377    out.push_str(&account);
1378    out.push('\n');
1379    emit_meta_entries_of(d.syntax(), out);
1380}
1381
1382fn emit_commodity(d: &ast::CommodityDirective, out: &mut String) {
1383    let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1384    let currency = d
1385        .currency()
1386        .map(|t| t.text().to_string())
1387        .unwrap_or_default();
1388    out.push_str(&date);
1389    out.push_str(" commodity ");
1390    out.push_str(&currency);
1391    out.push('\n');
1392    emit_meta_entries_of(d.syntax(), out);
1393}
1394
1395fn emit_note(d: &ast::NoteDirective, out: &mut String) {
1396    let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1397    let account = d
1398        .account()
1399        .map(|t| t.text().to_string())
1400        .unwrap_or_default();
1401    let text = d.text().map(|s| s.text().to_string()).unwrap_or_default();
1402    out.push_str(&date);
1403    out.push_str(" note ");
1404    out.push_str(&account);
1405    out.push(' ');
1406    out.push_str(&text);
1407    out.push('\n');
1408    emit_meta_entries_of(d.syntax(), out);
1409}
1410
1411fn emit_event(d: &ast::EventDirective, out: &mut String) {
1412    let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1413    let event_type = d
1414        .event_type()
1415        .map(|s| s.text().to_string())
1416        .unwrap_or_default();
1417    let value = d.value().map(|s| s.text().to_string()).unwrap_or_default();
1418    out.push_str(&date);
1419    out.push_str(" event ");
1420    out.push_str(&event_type);
1421    out.push(' ');
1422    out.push_str(&value);
1423    out.push('\n');
1424    emit_meta_entries_of(d.syntax(), out);
1425}
1426
1427fn emit_query(d: &ast::QueryDirective, out: &mut String) {
1428    let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1429    let name = d.name().map(|s| s.text().to_string()).unwrap_or_default();
1430    let query = d.query().map(|s| s.text().to_string()).unwrap_or_default();
1431    out.push_str(&date);
1432    out.push_str(" query ");
1433    out.push_str(&name);
1434    out.push(' ');
1435    out.push_str(&query);
1436    out.push('\n');
1437    emit_meta_entries_of(d.syntax(), out);
1438}
1439
1440fn emit_pad(d: &ast::PadDirective, out: &mut String) {
1441    let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1442    let target = d
1443        .target_account()
1444        .map(|t| t.text().to_string())
1445        .unwrap_or_default();
1446    let source = d
1447        .source_account()
1448        .map(|t| t.text().to_string())
1449        .unwrap_or_default();
1450    out.push_str(&date);
1451    out.push_str(" pad ");
1452    out.push_str(&target);
1453    out.push(' ');
1454    out.push_str(&source);
1455    out.push('\n');
1456    emit_meta_entries_of(d.syntax(), out);
1457}
1458
1459fn emit_document(d: &ast::DocumentDirective, out: &mut String) {
1460    let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1461    let account = d
1462        .account()
1463        .map(|t| t.text().to_string())
1464        .unwrap_or_default();
1465    let path = d.path().map(|s| s.text().to_string()).unwrap_or_default();
1466    out.push_str(&date);
1467    out.push_str(" document ");
1468    out.push_str(&account);
1469    out.push(' ');
1470    out.push_str(&path);
1471    // Trailing TAG / LINK tokens — typed AST has no accessor, so
1472    // walk direct-child tokens. Skip LEADING trivia (a blank line
1473    // before a non-first directive attaches its NEWLINE inside the
1474    // node) and stop at the first NEWLINE *after* the header content
1475    // begins; otherwise the tags/links are dropped when reformatting
1476    // any document past the first — the same bug as #1321 in the
1477    // transaction path.
1478    let mut seen_content = false;
1479    for el in d.syntax().children_with_tokens() {
1480        let rowan::NodeOrToken::Token(t) = el else {
1481            break;
1482        };
1483        match t.kind() {
1484            crate::SyntaxKind::TAG | crate::SyntaxKind::LINK => {
1485                out.push(' ');
1486                out.push_str(t.text());
1487                seen_content = true;
1488            }
1489            crate::SyntaxKind::NEWLINE if seen_content => break,
1490            // Leading trivia before the date: whitespace, blank-line
1491            // NEWLINEs, AND comment lines. A comment before a non-first
1492            // directive attaches inside this node (Directive-Terminator
1493            // Rule); skipping only WHITESPACE/NEWLINE would let it flip
1494            // `seen_content`, break at the comment's NEWLINE, and drop
1495            // the real header tags/links.
1496            k if k.is_trivia() => {}
1497            _ => seen_content = true,
1498        }
1499    }
1500    out.push('\n');
1501    emit_meta_entries_of(d.syntax(), out);
1502}
1503
1504fn emit_price(d: &ast::PriceDirective, out: &mut String) {
1505    let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1506    let base = d
1507        .base_currency()
1508        .map(|t| t.text().to_string())
1509        .unwrap_or_default();
1510    let quote = d
1511        .quote_currency()
1512        .map(|t| t.text().to_string())
1513        .unwrap_or_default();
1514    out.push_str(&date);
1515    out.push_str(" price ");
1516    out.push_str(&base);
1517    out.push(' ');
1518    emit_amount_expression(d.syntax(), out);
1519    out.push(' ');
1520    out.push_str(&quote);
1521    out.push('\n');
1522    emit_meta_entries_of(d.syntax(), out);
1523}
1524
1525fn emit_balance(d: &ast::BalanceDirective, out: &mut String) {
1526    let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1527    let account = d
1528        .account()
1529        .map(|t| t.text().to_string())
1530        .unwrap_or_default();
1531    let currency = d
1532        .currency()
1533        .map(|t| t.text().to_string())
1534        .unwrap_or_default();
1535    out.push_str(&date);
1536    out.push_str(" balance ");
1537    out.push_str(&account);
1538    out.push(' ');
1539    emit_amount_expression(d.syntax(), out);
1540    out.push(' ');
1541    out.push_str(&currency);
1542    // Optional `~ tolerance [CCY]` — walk raw tokens.
1543    if let Some((tolerance, tol_currency)) = balance_tolerance(d.syntax()) {
1544        out.push_str(" ~ ");
1545        out.push_str(&tolerance);
1546        if let Some(c) = tol_currency {
1547            out.push(' ');
1548            out.push_str(&c);
1549        }
1550    }
1551    out.push('\n');
1552    emit_meta_entries_of(d.syntax(), out);
1553}
1554
1555fn emit_custom(d: &ast::CustomDirective, out: &mut String) {
1556    let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1557    let custom_type = d
1558        .custom_type()
1559        .map(|s| s.text().to_string())
1560        .unwrap_or_default();
1561    out.push_str(&date);
1562    out.push_str(" custom ");
1563    out.push_str(&custom_type);
1564    // Walk raw tokens after the type STRING and emit each value
1565    // with single-space separation. NUMBER + CURRENCY adjacent
1566    // counts as an Amount; emitted together with one space.
1567    let tokens: Vec<crate::SyntaxToken> = d
1568        .syntax()
1569        .children_with_tokens()
1570        .filter_map(rowan::NodeOrToken::into_token)
1571        .filter(|t| !is_trivia_kind(t.kind()))
1572        .collect();
1573    // `seen_type` skips the leading DATE + CUSTOM_KW + type-STRING
1574    // tokens (already emitted above as the directive header); once
1575    // it flips true, every subsequent non-trivia token is a value
1576    // argument and gets emitted with single-space separation. An
1577    // adjacent NUMBER + CURRENCY pair is glued with a single space
1578    // (canonical Amount shape); the CURRENCY is NOT eaten as a
1579    // standalone arg next iteration.
1580    //
1581    // Beancount custom directives accept any mix of value kinds
1582    // including DATE — a `custom "type" 2024-06-15 100.00 USD`
1583    // shape has a DATE in value position. The previous version
1584    // skipped every DATE after seen_type, silently dropping such
1585    // user-provided date arguments.
1586    let mut seen_type = false;
1587    let mut i = 0;
1588    while i < tokens.len() {
1589        let t = &tokens[i];
1590        if !seen_type {
1591            if t.kind() == crate::SyntaxKind::STRING {
1592                seen_type = true;
1593            }
1594            i += 1;
1595            continue;
1596        }
1597        out.push(' ');
1598        if t.kind() == crate::SyntaxKind::NUMBER {
1599            out.push_str(&canonical_number(t.text()));
1600            if matches!(
1601                tokens.get(i + 1).map(rowan::SyntaxToken::kind),
1602                Some(crate::SyntaxKind::CURRENCY)
1603            ) {
1604                out.push(' ');
1605                out.push_str(tokens[i + 1].text());
1606                i += 2;
1607                continue;
1608            }
1609        } else {
1610            out.push_str(t.text());
1611        }
1612        i += 1;
1613    }
1614    out.push('\n');
1615    emit_meta_entries_of(d.syntax(), out);
1616}
1617
1618// ---- Top-level non-dated directives -----------------------------
1619
1620fn emit_option(d: &ast::OptionDirective, out: &mut String) {
1621    let key = d.key().map(|s| s.text().to_string()).unwrap_or_default();
1622    let value = d.value().map(|s| s.text().to_string()).unwrap_or_default();
1623    out.push_str("option ");
1624    out.push_str(&key);
1625    out.push(' ');
1626    out.push_str(&value);
1627    out.push('\n');
1628}
1629
1630fn emit_include(d: &ast::IncludeDirective, out: &mut String) {
1631    let path = d.path().map(|s| s.text().to_string()).unwrap_or_default();
1632    out.push_str("include ");
1633    out.push_str(&path);
1634    out.push('\n');
1635}
1636
1637fn emit_plugin(d: &ast::PluginDirective, out: &mut String) {
1638    let module = d.module().map(|s| s.text().to_string()).unwrap_or_default();
1639    out.push_str("plugin ");
1640    out.push_str(&module);
1641    if let Some(config) = d.config() {
1642        out.push(' ');
1643        out.push_str(config.text());
1644    }
1645    out.push('\n');
1646}
1647
1648// ---- State directives (no metadata) -----------------------------
1649
1650fn emit_pushtag(d: &ast::PushtagDirective, out: &mut String) {
1651    let tag = d.tag().map(|t| t.text().to_string()).unwrap_or_default();
1652    out.push_str("pushtag ");
1653    out.push_str(&tag);
1654    out.push('\n');
1655}
1656
1657fn emit_poptag(d: &ast::PoptagDirective, out: &mut String) {
1658    let tag = d.tag().map(|t| t.text().to_string()).unwrap_or_default();
1659    out.push_str("poptag ");
1660    out.push_str(&tag);
1661    out.push('\n');
1662}
1663
1664fn emit_pushmeta(d: &ast::PushmetaDirective, out: &mut String) {
1665    let key = d.key().map(|t| t.text().to_string()).unwrap_or_default();
1666    out.push_str("pushmeta ");
1667    out.push_str(&key);
1668    // Walk the value tokens after META_KEY, single-space separated.
1669    let mut past_key = false;
1670    for el in d.syntax().children_with_tokens() {
1671        let rowan::NodeOrToken::Token(t) = el else {
1672            continue;
1673        };
1674        if !past_key {
1675            if t.kind() == crate::SyntaxKind::META_KEY {
1676                past_key = true;
1677            }
1678            continue;
1679        }
1680        if is_trivia_kind(t.kind()) {
1681            continue;
1682        }
1683        out.push(' ');
1684        if t.kind() == crate::SyntaxKind::NUMBER {
1685            out.push_str(&canonical_number(t.text()));
1686        } else {
1687            out.push_str(t.text());
1688        }
1689    }
1690    out.push('\n');
1691}
1692
1693fn emit_popmeta(d: &ast::PopmetaDirective, out: &mut String) {
1694    let key = d.key().map(|t| t.text().to_string()).unwrap_or_default();
1695    out.push_str("popmeta ");
1696    out.push_str(&key);
1697    out.push('\n');
1698}
1699
1700// ---- Transaction + Posting --------------------------------------
1701
1702fn emit_transaction(d: &ast::Transaction, align: PostingAlignment, out: &mut String) {
1703    let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1704    out.push_str(&date);
1705    out.push(' ');
1706    out.push_str(&transaction_flag_string(d));
1707    if let Some(payee) = d.payee() {
1708        out.push(' ');
1709        out.push_str(payee.text());
1710    }
1711    if let Some(narration) = d.narration() {
1712        out.push(' ');
1713        out.push_str(narration.text());
1714    }
1715    // Header-region tags/links — emitted in source order
1716    // (typed `.tags()` / `.links()` accessors return each kind
1717    // grouped, which loses interleaving like `#a ^l #b`). Walk
1718    // direct-child tokens, stopping at the header-terminating
1719    // NEWLINE.
1720    //
1721    // `seen_content` guards against LEADING trivia: for any directive
1722    // after the first, the preceding blank line's NEWLINE attaches
1723    // inside this node before the date (the Directive-Terminator Rule).
1724    // The header terminator is the first NEWLINE *after* the date, not
1725    // a leading one — otherwise this loop would break immediately and
1726    // emit no header tags (#1321).
1727    let mut seen_content = false;
1728    for el in d.syntax().children_with_tokens() {
1729        let rowan::NodeOrToken::Token(t) = el else {
1730            break;
1731        };
1732        match t.kind() {
1733            crate::SyntaxKind::TAG | crate::SyntaxKind::LINK => {
1734                out.push(' ');
1735                out.push_str(t.text());
1736                seen_content = true;
1737            }
1738            crate::SyntaxKind::NEWLINE if seen_content => break,
1739            // Leading trivia before the date: whitespace, blank-line
1740            // NEWLINEs, AND comment lines (a comment before a non-first
1741            // directive attaches inside this node per the Directive-
1742            // Terminator Rule). Skipping only WHITESPACE/NEWLINE would
1743            // let a leading comment flip `seen_content`, break at the
1744            // comment's NEWLINE, and drop the real header tags/links.
1745            k if k.is_trivia() => {}
1746            // DATE / flag / STRING etc. — header content has begun.
1747            _ => seen_content = true,
1748        }
1749    }
1750    out.push('\n');
1751    // Body: a single source-order walk over the transaction's children,
1752    // emitting — in the order they appear — POSTING / META_ENTRY nodes, any
1753    // body-internal COMMENT lines (#1332: the formatter must not delete the
1754    // author's comments), and trailing body-line TAG / LINK continuation
1755    // tokens (valid Beancount per the body-line exemption).
1756    //
1757    // `seen_content` / `past_header` skip the header region exactly as the
1758    // header loop above does, so the header-trailing comment (spliced onto
1759    // the header line by `emit_directive`) and the header tags/links (already
1760    // emitted inline above) are not duplicated here. A leading blank-line
1761    // NEWLINE for any directive past the first is trivia and must not flip
1762    // `past_header` early (#1321).
1763    let mut past_header = false;
1764    let mut seen_content = false;
1765    for el in d.syntax().children_with_tokens() {
1766        match el {
1767            rowan::NodeOrToken::Node(n) => {
1768                // A POSTING / META_ENTRY node is definitively past the header.
1769                past_header = true;
1770                if let Some(p) = ast::Posting::cast(n.clone()) {
1771                    emit_posting(&p, align, out);
1772                } else if let Some(m) = ast::MetaEntry::cast(n) {
1773                    emit_meta_entry(&m, INDENT, out);
1774                }
1775            }
1776            rowan::NodeOrToken::Token(t) => {
1777                if !past_header {
1778                    match t.kind() {
1779                        crate::SyntaxKind::NEWLINE if seen_content => past_header = true,
1780                        k if k.is_trivia() => {}
1781                        // DATE / flag / STRING / header TAG / LINK: still header.
1782                        _ => seen_content = true,
1783                    }
1784                    continue;
1785                }
1786                // Body tokens: preserve comment-only lines and emit
1787                // continuation tags/links, each on its own indented line.
1788                match t.kind() {
1789                    crate::SyntaxKind::COMMENT | crate::SyntaxKind::PERCENT_COMMENT => {
1790                        out.push_str(INDENT);
1791                        out.push_str(t.text().trim_end_matches(['\n', '\r']));
1792                        out.push('\n');
1793                    }
1794                    crate::SyntaxKind::TAG | crate::SyntaxKind::LINK => {
1795                        out.push_str(INDENT);
1796                        out.push_str(t.text());
1797                        out.push('\n');
1798                    }
1799                    _ => {}
1800                }
1801            }
1802        }
1803    }
1804}
1805
1806fn transaction_flag_string(d: &ast::Transaction) -> String {
1807    use crate::cst::ast::TransactionFlagKind;
1808    match d.flag() {
1809        None => "*".to_string(),
1810        Some(f) => match f.classify() {
1811            TransactionFlagKind::Star | TransactionFlagKind::Txn => "*".to_string(),
1812            TransactionFlagKind::Pending => "!".to_string(),
1813            TransactionFlagKind::Hash => "#".to_string(),
1814            TransactionFlagKind::Letter | TransactionFlagKind::CurrencyLetter => {
1815                f.text().to_string()
1816            }
1817        },
1818    }
1819}
1820
1821fn emit_posting(p: &ast::Posting, align: PostingAlignment, out: &mut String) {
1822    // Posting-trailing comment (same-line, before the posting-line
1823    // NEWLINE) — capture upfront so we can splice it back in just
1824    // before that NEWLINE, preserving the user's attachment intent.
1825    let trailing = collect_trailing_comment(p.syntax());
1826    let posting_start = out.len();
1827
1828    out.push_str(INDENT);
1829    let mut col = INDENT.len();
1830    if let Some(flag) = p.flag() {
1831        out.push_str(flag.text());
1832        out.push(' ');
1833        col += flag.text().chars().count() + 1;
1834    }
1835    let account_text = p
1836        .account()
1837        .map(|a| a.text().to_string())
1838        .unwrap_or_default();
1839    out.push_str(&account_text);
1840    col += account_text.chars().count();
1841
1842    if let Some(amt) = p.amount() {
1843        // `amount_number_text` is the shared "does this render a number?"
1844        // predicate (see `compute_alignment`); a currency-only amount
1845        // returns `None` and prints no number.
1846        if let Some(value) = amount_number_text(&amt) {
1847            // Two stages of padding:
1848            //   1) Account end → start of number field (`number_col`).
1849            //      Fall back to 2 spaces when the LHS already exceeds
1850            //      the file-wide max (over-long account name).
1851            //   2) Inside the number field, left-pad to right-justify
1852            //      to `number_width`. Effect: the currency column
1853            //      lands at a single uniform position file-wide even
1854            //      when numbers have different widths or signs.
1855            let field_pad = align.number_col.saturating_sub(col).max(2);
1856            let justify_pad = align.number_width.saturating_sub(value.chars().count());
1857            for _ in 0..(field_pad + justify_pad) {
1858                out.push(' ');
1859            }
1860            out.push_str(&value);
1861            if let Some(c) = amt.currency() {
1862                out.push(' ');
1863                out.push_str(c.text());
1864            }
1865            if let Some(cs) = p.cost_spec() {
1866                out.push(' ');
1867                out.push_str(&format_cost_spec(&cs));
1868            }
1869            if let Some(pa) = p.price_annotation() {
1870                out.push(' ');
1871                out.push_str(&format_price_annotation(&pa));
1872            }
1873        }
1874    }
1875    out.push('\n');
1876    // Splice the trailing comment in BEFORE the posting-line
1877    // NEWLINE (the first '\n' in the emitted posting region).
1878    if let Some(c) = trailing
1879        && let Some(rel) = out[posting_start..].find('\n')
1880    {
1881        let mut splice = String::with_capacity(c.len() + 1);
1882        splice.push(' ');
1883        splice.push_str(&c);
1884        out.insert_str(posting_start + rel, &splice);
1885    }
1886    // Posting body: emit attached metadata AND posting-internal comment
1887    // lines in source order, indented 4 (deeper than the posting's 2).
1888    // Comment-only lines inside a posting attach as COMMENT tokens of the
1889    // POSTING node; walking children-with-tokens preserves them (#1337)
1890    // instead of dropping them. The posting's own header line is skipped via
1891    // the seen_content/past_header guard, so the same-line trailing comment
1892    // (spliced above) is not duplicated here.
1893    let mut past_header = false;
1894    let mut seen_content = false;
1895    for el in p.syntax().children_with_tokens() {
1896        match el {
1897            rowan::NodeOrToken::Node(n) => {
1898                // Header child nodes (AMOUNT / COST_SPEC / PRICE_ANNOTATION)
1899                // are emitted inline above and must NOT flip `past_header` —
1900                // only the posting-line NEWLINE does. Otherwise the same-line
1901                // trailing comment, which follows the AMOUNT node, would be
1902                // re-emitted here as a body comment. META_ENTRY nodes only
1903                // appear in the body, after `past_header` is already set.
1904                if let Some(m) = ast::MetaEntry::cast(n) {
1905                    emit_meta_entry(&m, "    ", out);
1906                }
1907            }
1908            rowan::NodeOrToken::Token(t) => {
1909                if !past_header {
1910                    match t.kind() {
1911                        crate::SyntaxKind::NEWLINE if seen_content => past_header = true,
1912                        k if k.is_trivia() => {}
1913                        _ => seen_content = true,
1914                    }
1915                    continue;
1916                }
1917                if matches!(
1918                    t.kind(),
1919                    crate::SyntaxKind::COMMENT | crate::SyntaxKind::PERCENT_COMMENT
1920                ) {
1921                    out.push_str("    ");
1922                    out.push_str(t.text().trim_end_matches(['\n', '\r']));
1923                    out.push('\n');
1924                }
1925            }
1926        }
1927    }
1928}
1929
1930/// Format an `AMOUNT` (units + currency) in canonical form. For
1931/// arithmetic shapes, emits the expression with single-space
1932/// separators (parens tight); for plain shapes, emits
1933/// `NUMBER CURRENCY` with thousands separators stripped.
1934fn format_amount(amt: &ast::Amount) -> String {
1935    let mut out = String::new();
1936    if amt.is_arithmetic() {
1937        emit_amount_subnode_expression(amt.syntax(), &mut out);
1938        if let Some(c) = amt.currency() {
1939            if !out.is_empty() {
1940                out.push(' ');
1941            }
1942            out.push_str(c.text());
1943        }
1944        return out;
1945    }
1946    if let Some(sign) = amt.sign()
1947        && sign.is_minus()
1948    {
1949        out.push('-');
1950    }
1951    if let Some(n) = amt.number() {
1952        out.push_str(&canonical_number(n.text()));
1953    }
1954    if let Some(c) = amt.currency() {
1955        if !out.is_empty() && !out.ends_with('-') {
1956            out.push(' ');
1957        }
1958        out.push_str(c.text());
1959    }
1960    out
1961}
1962
1963/// Canonical form for cost specs: `{cost CCY}` (single-brace
1964/// per-unit), `{{cost CCY}}` (double-brace total), `{# cost CCY}`
1965/// (per-unit + total via opener), or the in-brace `{N # T CCY}`
1966/// shape preserved as-is with single-space normalization.
1967///
1968/// Commas separating cost components (`{N CCY, DATE, "label"}`)
1969/// stay tight against the preceding token; every other adjacent
1970/// token pair is joined with a single space.
1971fn format_cost_spec(cs: &ast::CostSpec) -> String {
1972    let (open, close) = if cs.is_total() {
1973        ("{{", "}}")
1974    } else if cs.is_per_unit_plus_total() {
1975        ("{#", "}")
1976    } else {
1977        ("{", "}")
1978    };
1979    // Collect inner content tokens (skip opener/closer/whitespace),
1980    // then route through write_canonical_token_sequence so the spacing rule
1981    // is identical to balance/price/AMOUNT-subnode arithmetic — most
1982    // importantly, unary `+`/`-` stays tight (`{-500 USD}`, not
1983    // `{- 500 USD}`) and COMMA stays tight.
1984    let inner_tokens: Vec<crate::SyntaxToken> = cs
1985        .syntax()
1986        .children_with_tokens()
1987        .filter_map(rowan::NodeOrToken::into_token)
1988        .filter(|t| {
1989            !matches!(
1990                t.kind(),
1991                crate::SyntaxKind::L_BRACE
1992                    | crate::SyntaxKind::R_BRACE
1993                    | crate::SyntaxKind::L_DOUBLE_BRACE
1994                    | crate::SyntaxKind::R_DOUBLE_BRACE
1995                    | crate::SyntaxKind::L_BRACE_HASH
1996                    | crate::SyntaxKind::WHITESPACE
1997                    | crate::SyntaxKind::NEWLINE
1998            )
1999        })
2000        .collect();
2001    let mut inner = String::new();
2002    write_canonical_token_sequence(&inner_tokens, &mut inner);
2003    // The `{#` opener is a two-character marker; canonical form
2004    // separates it from the first inner token with a single space
2005    // (matching the rendering in this function's rustdoc). `{` and
2006    // `{{` don't get inner padding per the canonical-form spec.
2007    if cs.is_per_unit_plus_total() && !inner.is_empty() {
2008        format!("{open} {inner}{close}")
2009    } else {
2010        format!("{open}{inner}{close}")
2011    }
2012}
2013
2014/// Canonical price annotation: `@ amount` (per-unit) or
2015/// `@@ amount` (total).
2016fn format_price_annotation(pa: &ast::PriceAnnotation) -> String {
2017    let op = if pa.is_total() { "@@" } else { "@" };
2018    match pa.amount() {
2019        Some(a) => format!("{op} {}", format_amount(&a)),
2020        None => op.to_string(),
2021    }
2022}
2023
2024// ---- Helpers ---------------------------------------------------
2025
2026/// True for tokens that don't contribute content to the canonical
2027/// form: whitespace, newlines, every comment kind, and the
2028/// leading-file `BOM` token.
2029const fn is_trivia_kind(kind: crate::SyntaxKind) -> bool {
2030    matches!(
2031        kind,
2032        crate::SyntaxKind::WHITESPACE
2033            | crate::SyntaxKind::NEWLINE
2034            | crate::SyntaxKind::COMMENT
2035            | crate::SyntaxKind::PERCENT_COMMENT
2036            | crate::SyntaxKind::SHEBANG
2037            | crate::SyntaxKind::EMACS_DIRECTIVE
2038            | crate::SyntaxKind::BOM
2039    )
2040}
2041
2042/// Strip thousands-separator commas from a NUMBER token's text;
2043/// preserve the user's decimal-place count. Per the locked
2044/// canonical-form decision: `1,000.00` → `1000.00`, `1.0` → `1.0`.
2045fn canonical_number(text: &str) -> String {
2046    if text.contains(',') {
2047        text.replace(',', "")
2048    } else {
2049        text.to_string()
2050    }
2051}
2052
2053/// Emit the arithmetic expression of a `PRICE` / `BALANCE`
2054/// directive: tokens from the first expression-starting token
2055/// (`NUMBER`, unary `+`/`-`, or `(`) up to (but not including) the
2056/// first `CURRENCY` at paren-depth 0. Spacing rules per
2057/// [`write_canonical_token_sequence`].
2058///
2059/// **Why the predicate must allow `PLUS` / `MINUS` / `L_PAREN`,
2060/// not just `NUMBER`.** A previous version skipped tokens until
2061/// it hit a `NUMBER`, which silently dropped leading unary signs
2062/// and opening parens — flipping the sign on inputs like
2063/// `2024-01-15 price USD -1.00 EUR` (formatted to `1.00 EUR`) and
2064/// corrupting parenthesized expressions like
2065/// `2024-01-15 balance Assets:A (1 + 2) USD` (formatted to
2066/// `1 + 2) USD USD`). Sign drift in BALANCE / PRICE is silent data
2067/// corruption — a balance assertion that previously asserted a
2068/// debit would assert a credit after a round-trip.
2069fn emit_amount_expression(node: &crate::SyntaxNode, out: &mut String) {
2070    let raw: Vec<crate::SyntaxToken> = node
2071        .children_with_tokens()
2072        .filter_map(rowan::NodeOrToken::into_token)
2073        .filter(|t| !is_trivia_kind(t.kind()))
2074        .skip_while(|t| {
2075            !matches!(
2076                t.kind(),
2077                crate::SyntaxKind::NUMBER
2078                    | crate::SyntaxKind::PLUS
2079                    | crate::SyntaxKind::MINUS
2080                    | crate::SyntaxKind::L_PAREN
2081            )
2082        })
2083        .collect();
2084    let mut depth: i32 = 0;
2085    let mut first_currency_idx: Option<usize> = None;
2086    for (i, t) in raw.iter().enumerate() {
2087        match t.kind() {
2088            crate::SyntaxKind::L_PAREN => depth += 1,
2089            crate::SyntaxKind::R_PAREN => depth -= 1,
2090            crate::SyntaxKind::CURRENCY if depth == 0 && first_currency_idx.is_none() => {
2091                first_currency_idx = Some(i);
2092            }
2093            _ => {}
2094        }
2095    }
2096    let end = first_currency_idx.unwrap_or(raw.len());
2097    write_canonical_token_sequence(&raw[..end], out);
2098}
2099
2100/// Emit an `AMOUNT` subnode's expression region: every non-trivia
2101/// token minus the trailing `CURRENCY` (caller re-emits the
2102/// currency itself). Used by [`format_amount`] for arithmetic
2103/// posting amounts like `-(1.00 + 2.00) USD`.
2104fn emit_amount_subnode_expression(node: &crate::SyntaxNode, out: &mut String) {
2105    let mut tokens: Vec<crate::SyntaxToken> = node
2106        .children_with_tokens()
2107        .filter_map(rowan::NodeOrToken::into_token)
2108        .filter(|t| !is_trivia_kind(t.kind()))
2109        .collect();
2110    if let Some(last) = tokens.last()
2111        && last.kind() == crate::SyntaxKind::CURRENCY
2112    {
2113        tokens.pop();
2114    }
2115    write_canonical_token_sequence(&tokens, out);
2116}
2117
2118/// Single dispatcher for the canonical spacing rules used by EVERY
2119/// token-sequence emit path: balance / price arithmetic, AMOUNT
2120/// subnodes, cost-spec interiors, and metadata values. There is no
2121/// separate path; each call site collects the relevant non-trivia
2122/// tokens and routes them through here so the rules cannot drift
2123/// between contexts.
2124///
2125/// Rules:
2126///
2127/// - single space between adjacent operands / binary operators
2128/// - no space after `(` or before `)` (parens stay tight)
2129/// - no space after a unary `+` / `-` (one that opens the run
2130///   or follows `(` or another operator)
2131/// - no space before `,` (commas in cost-spec component lists
2132///   stay tight against the preceding token)
2133///
2134/// **Adding a new `SyntaxKind` to the formatter implies thinking
2135/// about its effect on every call site of this function.** A new
2136/// operator-like kind added to `is_op` will silently change cost-
2137/// spec and metadata spacing too; a new bracket-like kind needs
2138/// its own rule. The corpus-level idempotence test
2139/// (`idempotence_corpus_sweep`) is the safety net that catches
2140/// drifts.
2141fn write_canonical_token_sequence(tokens: &[crate::SyntaxToken], out: &mut String) {
2142    let is_op = |k: crate::SyntaxKind| {
2143        matches!(
2144            k,
2145            crate::SyntaxKind::PLUS
2146                | crate::SyntaxKind::MINUS
2147                | crate::SyntaxKind::STAR
2148                | crate::SyntaxKind::SLASH
2149        )
2150    };
2151    let mut prev_kind: Option<crate::SyntaxKind> = None;
2152    let mut prev_was_unary = false;
2153    for t in tokens {
2154        let kind = t.kind();
2155        let is_unary = is_op(kind)
2156            && match prev_kind {
2157                None => true,
2158                Some(p) => p == crate::SyntaxKind::L_PAREN || is_op(p),
2159            };
2160        let need_space = match prev_kind {
2161            None => false,
2162            Some(prev) => {
2163                prev != crate::SyntaxKind::L_PAREN
2164                    && kind != crate::SyntaxKind::R_PAREN
2165                    && kind != crate::SyntaxKind::COMMA
2166                    && !prev_was_unary
2167            }
2168        };
2169        if need_space {
2170            out.push(' ');
2171        }
2172        if kind == crate::SyntaxKind::NUMBER {
2173            out.push_str(&canonical_number(t.text()));
2174        } else {
2175            out.push_str(t.text());
2176        }
2177        prev_kind = Some(kind);
2178        prev_was_unary = is_unary;
2179    }
2180}
2181
2182/// Extract a balance directive's optional tolerance — the
2183/// `NUMBER` after the first `TILDE`, plus an optional trailing
2184/// `CURRENCY` at paren-depth 0.
2185fn balance_tolerance(node: &crate::SyntaxNode) -> Option<(String, Option<String>)> {
2186    let mut past_tilde = false;
2187    let mut number: Option<String> = None;
2188    let mut currency: Option<String> = None;
2189    for el in node.children_with_tokens() {
2190        let rowan::NodeOrToken::Token(t) = el else {
2191            continue;
2192        };
2193        if !past_tilde {
2194            if t.kind() == crate::SyntaxKind::TILDE {
2195                past_tilde = true;
2196            }
2197            continue;
2198        }
2199        match t.kind() {
2200            crate::SyntaxKind::NUMBER if number.is_none() => {
2201                number = Some(canonical_number(t.text()));
2202            }
2203            crate::SyntaxKind::CURRENCY if number.is_some() && currency.is_none() => {
2204                currency = Some(t.text().to_string());
2205            }
2206            _ => {}
2207        }
2208    }
2209    number.map(|n| (n, currency))
2210}
2211
2212// ---- Metadata --------------------------------------------------
2213
2214/// Walk a directive's direct-child `META_ENTRY` nodes and emit
2215/// each on its own indented line in canonical form (`indent + KEY:
2216/// value\n`). Most directive types don't have a `.meta_entries()`
2217/// accessor on their typed wrapper; we walk the syntax node
2218/// directly to stay uniform.
2219fn emit_meta_entries_of(node: &crate::SyntaxNode, out: &mut String) {
2220    // Source-order walk so body-internal COMMENT lines are preserved
2221    // alongside the metadata entries (#1332). The header region (up to and
2222    // including the header-terminating NEWLINE) is skipped so the
2223    // header-trailing comment — spliced onto the header line by
2224    // `emit_directive` — is not duplicated here.
2225    let mut past_header = false;
2226    let mut seen_content = false;
2227    for el in node.children_with_tokens() {
2228        match el {
2229            rowan::NodeOrToken::Node(n) => {
2230                past_header = true;
2231                if let Some(entry) = MetaEntry::cast(n) {
2232                    emit_meta_entry(&entry, INDENT, out);
2233                }
2234            }
2235            rowan::NodeOrToken::Token(t) => {
2236                if !past_header {
2237                    match t.kind() {
2238                        crate::SyntaxKind::NEWLINE if seen_content => past_header = true,
2239                        k if k.is_trivia() => {}
2240                        _ => seen_content = true,
2241                    }
2242                    continue;
2243                }
2244                if matches!(
2245                    t.kind(),
2246                    crate::SyntaxKind::COMMENT | crate::SyntaxKind::PERCENT_COMMENT
2247                ) {
2248                    out.push_str(INDENT);
2249                    out.push_str(t.text().trim_end_matches(['\n', '\r']));
2250                    out.push('\n');
2251                }
2252            }
2253        }
2254    }
2255}
2256
2257/// Canonical emit for a single `META_ENTRY`. Walks non-trivia
2258/// tokens, prints them with single-space separation, and
2259/// normalizes numbers via [`canonical_number`]. The `META_KEY`
2260/// token already includes the trailing colon (e.g. `note:`); the
2261/// value side gets the same NUMBER + CURRENCY gluing rule the
2262/// rest of the formatter uses elsewhere.
2263///
2264/// Two semantically-equivalent inputs (e.g. `foo: "bar"` and
2265/// `foo:    "bar"`) produce byte-identical output — the
2266/// gofmt-style invariant the file rustdoc promises.
2267fn emit_meta_entry(m: &MetaEntry, indent: &str, out: &mut String) {
2268    out.push_str(indent);
2269    // Split the META_ENTRY's non-trivia tokens into [META_KEY,
2270    // value*]. The META_KEY token already includes the trailing
2271    // colon (e.g. `note:`); the value tokens go through
2272    // write_canonical_token_sequence so the spacing rules — unary +/-
2273    // tight, COMMA tight, paren-tight, NUMBER canonicalized — are
2274    // shared with the balance/price/cost-spec/posting-amount paths.
2275    let content: Vec<crate::SyntaxToken> = m
2276        .syntax()
2277        .children_with_tokens()
2278        .filter_map(rowan::NodeOrToken::into_token)
2279        .filter(|t| {
2280            !matches!(
2281                t.kind(),
2282                crate::SyntaxKind::WHITESPACE | crate::SyntaxKind::NEWLINE
2283            )
2284        })
2285        .collect();
2286    let mut iter = content.iter();
2287    if let Some(key) = iter.next() {
2288        out.push_str(key.text());
2289    }
2290    let value_tokens: Vec<crate::SyntaxToken> = iter.cloned().collect();
2291    if !value_tokens.is_empty() {
2292        out.push(' ');
2293        write_canonical_token_sequence(&value_tokens, out);
2294    }
2295    out.push('\n');
2296}
2297
2298#[cfg(test)]
2299mod tests {
2300    use super::*;
2301
2302    #[test]
2303    fn empty_input_yields_single_newline() {
2304        assert_eq!(format_source(""), "\n");
2305    }
2306
2307    #[test]
2308    fn open_directive_canonical() {
2309        let src = "2024-01-15   open    Assets:Cash\n";
2310        assert_eq!(format_source(src), "2024-01-15 open Assets:Cash\n");
2311    }
2312
2313    #[test]
2314    fn open_with_currencies_and_booking_canonical() {
2315        // The currency constraint list is comma-separated; emitting spaces
2316        // produced invalid beancount (#1405).
2317        let src = "2024-01-15 open Assets:Brokerage USD,EUR \"STRICT\"\n";
2318        assert_eq!(
2319            format_source(src),
2320            "2024-01-15 open Assets:Brokerage USD,EUR \"STRICT\"\n"
2321        );
2322    }
2323
2324    /// Regression for #1405: `format` must keep the open currency list
2325    /// comma-separated, not rewrite it space-separated (invalid syntax), and
2326    /// the result must be idempotent.
2327    #[test]
2328    fn open_currency_list_stays_comma_separated() {
2329        let src = "2026-01-01 open Assets:Wallet USD,EUR\n";
2330        let once = format_source(src);
2331        assert_eq!(once, "2026-01-01 open Assets:Wallet USD,EUR\n");
2332        assert_eq!(format_source(&once), once, "format must be idempotent");
2333    }
2334
2335    #[test]
2336    fn close_directive_canonical() {
2337        let src = "2024-12-31 close Assets:Cash\n";
2338        assert_eq!(format_source(src), "2024-12-31 close Assets:Cash\n");
2339    }
2340
2341    #[test]
2342    fn commodity_directive_canonical() {
2343        let src = "2024-01-01 commodity HOOL\n";
2344        assert_eq!(format_source(src), "2024-01-01 commodity HOOL\n");
2345    }
2346
2347    #[test]
2348    fn blank_lines_between_directives_preserved() {
2349        // #1325: the formatter preserves the author's inter-directive
2350        // blank lines rather than normalizing to exactly one (matching
2351        // Python bean-format and the rest of the beancount lineage).
2352
2353        // Grouped (no blank in source) stays grouped — not double-spaced.
2354        let grouped = "2024-01-01 open Assets:A\n2024-01-02 open Assets:B\n";
2355        assert_eq!(format_source(grouped), grouped);
2356
2357        // One blank is preserved as one.
2358        let one = "2024-01-01 open Assets:A\n\n2024-01-02 open Assets:B\n";
2359        assert_eq!(format_source(one), one);
2360
2361        // Two blanks are preserved as two (not collapsed).
2362        let two = "2024-01-01 open Assets:A\n\n\n2024-01-02 open Assets:B\n";
2363        assert_eq!(format_source(two), two);
2364
2365        // A whitespace-only "blank" line still counts as one blank line
2366        // (its trailing whitespace is stripped, leaving an empty line).
2367        let ws_blank = "2024-01-01 open Assets:A\n   \n2024-01-02 open Assets:B\n";
2368        assert_eq!(
2369            format_source(ws_blank),
2370            "2024-01-01 open Assets:A\n\n2024-01-02 open Assets:B\n"
2371        );
2372    }
2373
2374    #[test]
2375    fn trailing_newline_always_present() {
2376        let src = "2024-01-01 open Assets:A";
2377        let formatted = format_source(src);
2378        assert!(formatted.ends_with('\n'));
2379        assert!(!formatted.ends_with("\n\n"));
2380    }
2381
2382    #[test]
2383    fn idempotent_on_canonical_input() {
2384        let src = "2024-01-01 open Assets:A\n\n2024-01-02 close Assets:A\n";
2385        let once = format_source(src);
2386        let twice = format_source(&once);
2387        assert_eq!(once, twice);
2388    }
2389
2390    #[test]
2391    fn note_canonical() {
2392        let src = "2024-01-15   note   Assets:Cash   \"a note\"\n";
2393        assert_eq!(
2394            format_source(src),
2395            "2024-01-15 note Assets:Cash \"a note\"\n"
2396        );
2397    }
2398
2399    #[test]
2400    fn event_canonical() {
2401        let src = "2024-01-15  event  \"location\"   \"NYC\"\n";
2402        assert_eq!(
2403            format_source(src),
2404            "2024-01-15 event \"location\" \"NYC\"\n"
2405        );
2406    }
2407
2408    #[test]
2409    fn query_canonical() {
2410        let src = "2024-01-15 query \"q1\" \"SELECT account\"\n";
2411        assert_eq!(
2412            format_source(src),
2413            "2024-01-15 query \"q1\" \"SELECT account\"\n"
2414        );
2415    }
2416
2417    #[test]
2418    fn pad_canonical() {
2419        let src = "2024-01-15  pad   Assets:A   Equity:Opening\n";
2420        assert_eq!(
2421            format_source(src),
2422            "2024-01-15 pad Assets:A Equity:Opening\n"
2423        );
2424    }
2425
2426    #[test]
2427    fn document_with_tags_and_links_canonical() {
2428        let src = "2024-06-01 document Assets:Bank \"stmt.pdf\" #q1 ^scan42 #urgent\n";
2429        assert_eq!(
2430            format_source(src),
2431            "2024-06-01 document Assets:Bank \"stmt.pdf\" #q1 ^scan42 #urgent\n"
2432        );
2433    }
2434
2435    #[test]
2436    fn issue_1321_document_tags_links_idempotent_across_directives() {
2437        // Same class as the transaction case, in `document` directives:
2438        // the 2nd+ document's trailing tags/links were dropped on a
2439        // reformat (found by the #1323 corpus idempotence check). Assert
2440        // the fixed-point property: re-formatting must not change (and
2441        // must not drop the tags/links of the second document).
2442        let src = "\
24432013-05-18 document Assets:Bank \"/a.pdf\" #tag1 ^link1
24442013-05-19 document Assets:Bank \"/b.pdf\" #tag2 ^link2
2445";
2446        let once = format_source(src);
2447        assert_eq!(format_source(&once), once, "format must be idempotent");
2448        assert!(
2449            once.contains("#tag2") && once.contains("^link2"),
2450            "the second document's tags/links must survive formatting; got:\n{once}"
2451        );
2452    }
2453
2454    #[test]
2455    fn issue_1321_header_tags_links_idempotent_across_transactions() {
2456        // Header tags/links must stay on the header line for EVERY
2457        // transaction, not just the first. Regression for #1321 where
2458        // the 2nd+ transaction's header tags/links got migrated to
2459        // continuation lines.
2460        let src = "\
24612024-01-15 * \"x\" #tag1 ^link1 #tag2 ^link2
2462  Assets:Cash    -1.00 USD
2463  Expenses:Misc   1.00 USD
2464
24652024-01-16 * \"x\" #tag1 ^link1 #tag2 ^link2
2466  Assets:Cash    -1.00 USD
2467  Expenses:Misc   1.00 USD
2468";
2469        assert_eq!(
2470            format_source(src),
2471            src,
2472            "format must be a no-op (idempotent)"
2473        );
2474    }
2475
2476    #[test]
2477    fn issue_1321_comment_before_transaction_keeps_header_tags() {
2478        // A comment line before a transaction is leading trivia attached
2479        // inside the transaction node (Directive-Terminator Rule), exactly
2480        // like a blank line. Skipping only WHITESPACE/NEWLINE let the
2481        // comment flip `seen_content`, break at the comment's NEWLINE, and
2482        // migrate the real header tags/links to continuation lines. The
2483        // header tags/links must stay on the header line. (Found by the
2484        // Copilot review of the #1321 fix.)
2485        let src = "\
24862024-01-15 * \"first\" #h1 ^l1
2487  Assets:Cash    -1.00 USD
2488  Expenses:Misc   1.00 USD
2489
2490; a comment before the second transaction
24912024-01-16 * \"second\" #tag1 ^link1
2492  Assets:Cash    -2.00 USD
2493  Expenses:Misc   2.00 USD
2494";
2495        assert_eq!(
2496            format_source(src),
2497            src,
2498            "a leading comment must not migrate header tags/links to continuation lines"
2499        );
2500    }
2501
2502    #[test]
2503    fn issue_1321_comment_before_document_keeps_tags() {
2504        // Document-directive variant of the comment-trivia case above.
2505        let src = "\
25062013-05-18 document Assets:Bank \"/a.pdf\" #tag1 ^link1
2507; a comment before the second document
25082013-05-19 document Assets:Bank \"/b.pdf\" #tag2 ^link2
2509";
2510        let once = format_source(src);
2511        assert_eq!(format_source(&once), once, "format must be idempotent");
2512        assert!(
2513            once.contains("\"/b.pdf\" #tag2 ^link2"),
2514            "the second document's tags/links must stay on its header line; got:\n{once}"
2515        );
2516    }
2517
2518    #[test]
2519    fn issue_1332_body_comments_in_metadata_preserved() {
2520        // The formatter must NOT delete comment-only lines inside a
2521        // directive body (#1332). Here two commented-out `; price:` lines
2522        // sit between metadata entries in a `commodity` body; they must
2523        // survive, interleaved in source order, and the result is idempotent.
2524        let src = "\
25252023-06-04 commodity EAM-VEUR ; cSpell: word VEUR
2526  name: \"Vanguard FTSE Developed Europe UCITS ETF EUR Dist\"
2527  ; price: \"EUR:alphavantage/price:VEUR.AS:EUR\"
2528  ; price: \"EUR:yahoo/VEUR.AS\"
2529  price: \"EUR:pricehist.beanprice.yahoo/VEUR.AS\"
2530";
2531        assert_eq!(
2532            format_source(src),
2533            src,
2534            "body comments must be preserved verbatim"
2535        );
2536        assert_eq!(format_source(&format_source(src)), format_source(src));
2537    }
2538
2539    #[test]
2540    fn issue_1332_body_comments_between_postings_preserved() {
2541        // Same class, inside a transaction body: a comment-only line between
2542        // postings must survive (in source order, 2-space indent). Asserted
2543        // via preservation + idempotence rather than an exact match, since
2544        // amount alignment is also canonicalized.
2545        let src = "\
25462024-01-15 * \"Cafe\" \"Latte\"
2547  Expenses:Coffee   4.50 USD
2548  ; was 5.00 before the discount
2549  Assets:Checking
2550";
2551        let out = format_source(src);
2552        assert!(
2553            out.contains("\n  ; was 5.00 before the discount\n"),
2554            "the body comment must be preserved on its own indented line; got:\n{out}"
2555        );
2556        // Order: the comment stays between the two postings.
2557        let coffee = out.find("Expenses:Coffee").unwrap();
2558        let comment = out.find("; was 5.00").unwrap();
2559        let checking = out.find("Assets:Checking").unwrap();
2560        assert!(
2561            coffee < comment && comment < checking,
2562            "comment must stay between postings:\n{out}"
2563        );
2564        assert_eq!(format_source(&out), out, "format must be idempotent");
2565    }
2566
2567    #[test]
2568    fn issue_1335_org_headers_and_grouped_comments_preserved() {
2569        // The formatter must not delete unparsable content (#1335).
2570        // Org-mode `*` section headers parse into ERROR_NODEs, and comments
2571        // grouped with them get swallowed into the same node — previously all
2572        // dropped. They must survive, and the result must be idempotent.
2573        let src = "\
2574* Section A
2575;; comment between headers
2576;; second line
2577* Section B
25782013-01-01 open Assets:X
2579";
2580        let out = format_source(src);
2581        // Use the exact `;;` needles: a single-`;` substring would still match
2582        // `;; ...` even if one `;` were dropped, weakening the regression.
2583        for needle in [
2584            "* Section A",
2585            ";; comment between headers",
2586            ";; second line",
2587            "* Section B",
2588            "2013-01-01 open Assets:X",
2589        ] {
2590            assert!(
2591                out.contains(needle),
2592                "lost {needle:?} on format; got:\n{out}"
2593            );
2594        }
2595        assert_eq!(format_source(&out), out, "format must be idempotent");
2596    }
2597
2598    #[test]
2599    fn issue_1335_org_header_then_directive_keeps_header() {
2600        // A lone org header before a directive: the header is an ERROR_NODE
2601        // and must be kept (the comment here attaches to the directive and
2602        // was already preserved).
2603        let src = "* Accounts\n2013-01-01 open Assets:X\n";
2604        let out = format_source(src);
2605        assert!(
2606            out.contains("* Accounts"),
2607            "org header dropped; got:\n{out}"
2608        );
2609        assert_eq!(format_source(&out), out);
2610    }
2611
2612    #[test]
2613    fn issue_1335_blank_lines_around_org_header_preserved() {
2614        // An ERROR_NODE is a top-level content block: the author's blank line
2615        // between an org header and the following directive is preserved (it
2616        // is not flushed), and the result is idempotent.
2617        let src = "* Accounts\n\n2013-01-01 open Assets:X\n";
2618        assert_eq!(
2619            format_source(src),
2620            src,
2621            "blank around org header must be kept"
2622        );
2623        assert_eq!(format_source(&format_source(src)), format_source(src));
2624    }
2625
2626    #[test]
2627    fn issue_1337_posting_internal_comments_preserved() {
2628        // A comment on its own line inside a posting attaches as a COMMENT
2629        // token of the POSTING node; it must be preserved (#1337), not
2630        // dropped, and stay between its posting and the next.
2631        let src = "\
26322024-01-15 * \"x\"
2633  Assets:A   1.00 USD
2634    ; posting-internal note
2635  Assets:B
2636";
2637        let out = format_source(src);
2638        assert!(
2639            out.contains("; posting-internal note"),
2640            "posting-internal comment dropped; got:\n{out}"
2641        );
2642        let a = out.find("Assets:A").unwrap();
2643        let c = out.find("; posting-internal note").unwrap();
2644        let b = out.find("Assets:B").unwrap();
2645        assert!(a < c && c < b, "comment must stay between postings:\n{out}");
2646        assert_eq!(format_source(&out), out, "format must be idempotent");
2647    }
2648
2649    #[test]
2650    fn price_canonical_strips_thousands_separators() {
2651        let src = "2024-01-15 price USD  1,234.56 EUR\n";
2652        assert_eq!(format_source(src), "2024-01-15 price USD 1234.56 EUR\n");
2653    }
2654
2655    #[test]
2656    fn price_arithmetic_canonicalizes_spacing() {
2657        let src = "2024-01-15 price USD 1/2 EUR\n";
2658        assert_eq!(format_source(src), "2024-01-15 price USD 1 / 2 EUR\n");
2659    }
2660
2661    #[test]
2662    fn balance_canonical() {
2663        let src = "2024-01-15  balance  Assets:Cash   100.00  USD\n";
2664        assert_eq!(
2665            format_source(src),
2666            "2024-01-15 balance Assets:Cash 100.00 USD\n"
2667        );
2668    }
2669
2670    #[test]
2671    fn balance_with_tolerance_canonical() {
2672        let src = "2024-01-15 balance Assets:Cash 100.00 USD ~ 0.01 USD\n";
2673        assert_eq!(
2674            format_source(src),
2675            "2024-01-15 balance Assets:Cash 100.00 USD ~ 0.01 USD\n"
2676        );
2677    }
2678
2679    #[test]
2680    fn balance_arithmetic_canonical() {
2681        let src = "2024-01-15 balance Assets:Cash  0.25 + 0.75  USD\n";
2682        assert_eq!(
2683            format_source(src),
2684            "2024-01-15 balance Assets:Cash 0.25 + 0.75 USD\n"
2685        );
2686    }
2687
2688    #[test]
2689    fn custom_canonical() {
2690        let src = "2024-01-01 custom \"budget\" Expenses:Food 500.00 USD\n";
2691        assert_eq!(
2692            format_source(src),
2693            "2024-01-01 custom \"budget\" Expenses:Food 500.00 USD\n"
2694        );
2695    }
2696
2697    #[test]
2698    fn option_canonical() {
2699        let src = "option   \"title\"   \"My Ledger\"\n";
2700        assert_eq!(format_source(src), "option \"title\" \"My Ledger\"\n");
2701    }
2702
2703    #[test]
2704    fn include_canonical() {
2705        let src = "include  \"other.beancount\"\n";
2706        assert_eq!(format_source(src), "include \"other.beancount\"\n");
2707    }
2708
2709    #[test]
2710    fn plugin_canonical_with_config() {
2711        let src = "plugin  \"beancount.plugins.unrealized\"  \"Unrealized\"\n";
2712        assert_eq!(
2713            format_source(src),
2714            "plugin \"beancount.plugins.unrealized\" \"Unrealized\"\n"
2715        );
2716    }
2717
2718    #[test]
2719    fn plugin_canonical_without_config() {
2720        let src = "plugin   \"my.plugin\"\n";
2721        assert_eq!(format_source(src), "plugin \"my.plugin\"\n");
2722    }
2723
2724    #[test]
2725    fn pushtag_poptag_canonical() {
2726        // No blank line in the source — preserved as grouped (#1325).
2727        let src = "pushtag  #active\npoptag  #active\n";
2728        assert_eq!(format_source(src), "pushtag #active\npoptag #active\n");
2729    }
2730
2731    #[test]
2732    fn pushmeta_popmeta_canonical() {
2733        // No blank line in the source — preserved as grouped (#1325).
2734        let src = "pushmeta location: \"NYC\"\npopmeta location:\n";
2735        assert_eq!(
2736            format_source(src),
2737            "pushmeta location: \"NYC\"\npopmeta location:\n"
2738        );
2739    }
2740
2741    // ---- Transaction tests ------------------------------------
2742
2743    #[test]
2744    fn transaction_minimal_two_postings_aligns_amounts() {
2745        let src = "\
27462024-01-15 * \"Coffee\"
2747  Assets:Cash       -5.00 USD
2748  Expenses:Coffee    5.00 USD
2749";
2750        // max LHS = 15 (Expenses:Coffee); number_col = 17.
2751        // max number width = 6 (`-5.00`); number_width = 6.
2752        // Posting 1: account end at col 13, pad 4 → `-5.00` (width 6,
2753        //   no left-pad) → currency at col 24.
2754        // Posting 2: account end at col 17, pad 2 → ` 5.00` (width
2755        //   5 left-padded by 1) → currency at col 24.
2756        let expected = "\
27572024-01-15 * \"Coffee\"
2758  Assets:Cash      -5.00 USD
2759  Expenses:Coffee   5.00 USD
2760";
2761        assert_eq!(format_source(src), expected);
2762    }
2763
2764    /// Regression for #1290: an amount-less posting (the common elided
2765    /// balancing leg) must NOT widen the number column, even when its
2766    /// account is longer than every amount-bearing account. `bean-format`
2767    /// computes the column only from number-bearing lines, so counting
2768    /// `Expenses:Food` here would make `rledger format` and `bean-format`
2769    /// disagree and never converge on round-trip.
2770    #[test]
2771    fn transaction_elided_posting_does_not_widen_amount_column() {
2772        let src = "\
27732024-01-15 * \"Coffee\"
2774  Assets:Cash  -5.00 USD
2775  Expenses:Food
2776";
2777        // Only Assets:Cash (11) bears an amount; Expenses:Food (13) is
2778        // elided and is ignored for alignment. number_col = 2+11+2 = 15.
2779        let expected = "\
27802024-01-15 * \"Coffee\"
2781  Assets:Cash  -5.00 USD
2782  Expenses:Food
2783";
2784        assert_eq!(format_source(src), expected);
2785        // Idempotent: re-formatting the output is a no-op.
2786        assert_eq!(format_source(expected), expected);
2787    }
2788
2789    /// Regression for #1290 using the reporter's exact fixture: a long
2790    /// elided account (`Expenses:Thingamabobs`) alongside a short
2791    /// amount-bearing one (`Assets:Money`). Pre-fix the number was
2792    /// pushed right to clear the long account; `bean-format` keeps it
2793    /// two spaces after `Assets:Money`. Also confirms the thousands
2794    /// separator is stripped.
2795    #[test]
2796    fn transaction_long_elided_account_matches_bean_format() {
2797        let src = "\
27982024-07-20 * \"Commas should stay\"
2799  Assets:Money  -1,024 USD
2800  Expenses:Thingamabobs
2801";
2802        let expected = "\
28032024-07-20 * \"Commas should stay\"
2804  Assets:Money  -1024 USD
2805  Expenses:Thingamabobs
2806";
2807        assert_eq!(format_source(src), expected);
2808        assert_eq!(format_source(expected), expected);
2809    }
2810
2811    /// Regression for the currency-only gap (#1307, found in review): a
2812    /// currency-only posting (`... USD`, no number) renders no number,
2813    /// so — like an elided posting — it must not widen the alignment
2814    /// column even when its account is the longest. Only `Assets:Bank`
2815    /// bears a number here, so the number stays two spaces after it. The
2816    /// assertion checks the numbered line directly, independent of how
2817    /// the currency-only line itself renders.
2818    #[test]
2819    fn transaction_currency_only_posting_does_not_widen_amount_column() {
2820        let out = format_source(
2821            "2024-01-15 * \"x\"\n  Assets:Bank  -5.00 USD\n  Assets:LongCashReserve USD\n",
2822        );
2823        assert!(
2824            out.contains("  Assets:Bank  -5.00 USD"),
2825            "number column must align to the numbered posting, not the longer \
2826             currency-only one; got:\n{out}"
2827        );
2828    }
2829
2830    #[test]
2831    fn transaction_payee_and_narration() {
2832        let src =
2833            "2024-01-15 * \"Starbucks\" \"Coffee\"\n  Assets:Cash -5.00 USD\n  Expenses:Coffee\n";
2834        let out = format_source(src);
2835        assert!(
2836            out.contains("2024-01-15 * \"Starbucks\" \"Coffee\"\n"),
2837            "got: {out}"
2838        );
2839    }
2840
2841    #[test]
2842    fn transaction_pending_flag() {
2843        let src = "2024-01-15 ! \"Pending\"\n  Assets:Cash -5.00 USD\n  Expenses:Misc\n";
2844        let out = format_source(src);
2845        assert!(out.starts_with("2024-01-15 ! \"Pending\"\n"), "got: {out}");
2846    }
2847
2848    #[test]
2849    fn transaction_txn_keyword_normalized_to_star() {
2850        // The `txn` keyword form is canonical-form equivalent to `*`.
2851        let src = "2024-01-15 txn \"x\"\n  Assets:Cash -1.00 USD\n  Expenses:Misc\n";
2852        let out = format_source(src);
2853        assert!(out.starts_with("2024-01-15 * \"x\"\n"), "got: {out}");
2854    }
2855
2856    #[test]
2857    fn transaction_header_tags_and_links() {
2858        let src =
2859            "2024-01-15 * \"x\" #tag1 ^link1 #tag2\n  Assets:Cash -1.00 USD\n  Expenses:Misc\n";
2860        let out = format_source(src);
2861        assert!(
2862            out.starts_with("2024-01-15 * \"x\" #tag1 ^link1 #tag2\n"),
2863            "got: {out}"
2864        );
2865    }
2866
2867    #[test]
2868    fn transaction_auto_balance_posting_no_amount() {
2869        let src = "2024-01-15 * \"x\"\n  Assets:Cash  -5.00 USD\n  Expenses:Misc\n";
2870        let out = format_source(src);
2871        // The auto-balance posting has no amount; should just be
2872        // the indented account name.
2873        assert!(out.contains("\n  Expenses:Misc\n"), "got: {out}");
2874    }
2875
2876    #[test]
2877    fn transaction_posting_with_cost_spec() {
2878        let src = "2024-01-15 * \"buy\"\n  Assets:Brokerage  10 HOOL {500.00 USD}\n  Assets:Cash  -5000.00 USD\n";
2879        let out = format_source(src);
2880        assert!(out.contains("10 HOOL {500.00 USD}"), "got: {out}");
2881    }
2882
2883    #[test]
2884    fn transaction_posting_with_total_cost_spec() {
2885        let src = "2024-01-15 * \"buy\"\n  Assets:Brokerage  10 HOOL {{5000.00 USD}}\n  Assets:Cash  -5000.00 USD\n";
2886        let out = format_source(src);
2887        assert!(out.contains("10 HOOL {{5000.00 USD}}"), "got: {out}");
2888    }
2889
2890    #[test]
2891    fn transaction_posting_with_per_unit_price() {
2892        let src = "2024-01-15 * \"buy\"\n  Assets:Brokerage  10 HOOL @ 500.00 USD\n  Assets:Cash  -5000.00 USD\n";
2893        let out = format_source(src);
2894        assert!(out.contains("10 HOOL @ 500.00 USD"), "got: {out}");
2895    }
2896
2897    #[test]
2898    fn transaction_posting_with_total_price() {
2899        let src = "2024-01-15 * \"buy\"\n  Assets:Brokerage  10 HOOL @@ 5000.00 USD\n  Assets:Cash  -5000.00 USD\n";
2900        let out = format_source(src);
2901        assert!(out.contains("10 HOOL @@ 5000.00 USD"), "got: {out}");
2902    }
2903
2904    #[test]
2905    fn transaction_posting_with_flag() {
2906        let src = "2024-01-15 * \"x\"\n  ! Assets:Cash  -5.00 USD\n  Expenses:Misc  5.00 USD\n";
2907        let out = format_source(src);
2908        assert!(out.contains("\n  ! Assets:Cash"), "got: {out}");
2909    }
2910
2911    #[test]
2912    fn transaction_negative_amount() {
2913        let src = "2024-01-15 * \"x\"\n  Assets:Cash -5.00 USD\n  Expenses:Misc 5.00 USD\n";
2914        let out = format_source(src);
2915        assert!(out.contains("-5.00 USD"), "got: {out}");
2916        assert!(out.contains(" 5.00 USD"), "got: {out}");
2917    }
2918
2919    #[test]
2920    fn transaction_strips_thousands_separators_in_postings() {
2921        let src = "2024-01-15 * \"x\"\n  Assets:Cash -1,000.00 USD\n  Expenses:Misc 1,000.00 USD\n";
2922        let out = format_source(src);
2923        assert!(out.contains("-1000.00 USD"), "got: {out}");
2924        assert!(!out.contains("1,000"), "got: {out}");
2925    }
2926
2927    #[test]
2928    fn transaction_arithmetic_amount() {
2929        let src =
2930            "2024-01-15 * \"x\"\n  Assets:Cash  -(1.00 + 2.00) USD\n  Expenses:Misc 3.00 USD\n";
2931        let out = format_source(src);
2932        // The arithmetic expression should render with single
2933        // spaces around binary ops and tight parens.
2934        assert!(
2935            out.contains("(1.00 + 2.00) USD") || out.contains("-(1.00 + 2.00) USD"),
2936            "got: {out}"
2937        );
2938    }
2939
2940    #[test]
2941    fn transaction_idempotent() {
2942        let src = "\
29432024-01-15 * \"Coffee\"
2944  Assets:Cash       -5.00 USD
2945  Expenses:Coffee    5.00 USD
2946";
2947        let once = format_source(src);
2948        let twice = format_source(&once);
2949        assert_eq!(once, twice);
2950    }
2951
2952    #[test]
2953    fn transaction_file_wide_alignment_across_transactions() {
2954        let src = "\
29552024-01-15 * \"x\"
2956  Assets:Cash -5.00 USD
2957  Expenses:Misc 5.00 USD
2958
29592024-01-16 * \"y\"
2960  Liabilities:CreditCard:Visa  -100.00 USD
2961  Expenses:Big  100.00 USD
2962";
2963        let out = format_source(src);
2964        // Cross-posting invariant: the currency column (USD here)
2965        // lands at the same column on every posting line, even when
2966        // individual numbers differ in width or sign. The number
2967        // field is right-justified so the currency column is uniform.
2968        let usd_cols: Vec<usize> = out
2969            .lines()
2970            .filter(|l| l.starts_with("  ") && l.contains(" USD"))
2971            .filter_map(|l| l.find("USD"))
2972            .collect();
2973        assert!(
2974            usd_cols.len() >= 4,
2975            "expected ≥4 posting lines, got {usd_cols:?} in {out}"
2976        );
2977        let first = usd_cols[0];
2978        assert!(
2979            usd_cols.iter().all(|&c| c == first),
2980            "expected USD column uniform at {first}, got {usd_cols:?} in:\n{out}"
2981        );
2982    }
2983
2984    #[test]
2985    fn transaction_posting_metadata_indented_four() {
2986        let src =
2987            "2024-01-15 * \"x\"\n  Assets:Cash -5.00 USD\n    foo: \"bar\"\n  Expenses:Misc\n";
2988        let out = format_source(src);
2989        assert!(out.contains("\n    foo: \"bar\"\n"), "got: {out}");
2990    }
2991
2992    // ---- Code-review regression tests -----------------------------
2993    //
2994    // Each test pins a bug surfaced by the high-effort code review of
2995    // PR #1284 and verified at runtime against the unfixed formatter.
2996
2997    #[test]
2998    fn cost_spec_per_unit_plus_total_opener_preserved() {
2999        // Bug: format_cost_spec only branched on is_total() and emitted
3000        // `{` for the `{#` opener too, dropping the `#` marker and
3001        // changing semantics from per-unit-plus-total to plain
3002        // per-unit cost.
3003        let src = "2024-01-01 * \"buy\"\n  Assets:Brokerage 10 HOOL {# 500.00 USD}\n  Assets:Cash -5000.00 USD\n";
3004        let out = format_source(src);
3005        assert!(
3006            out.contains("{# 500.00 USD}"),
3007            "expected `{{#` opener preserved; got:\n{out}"
3008        );
3009        assert!(!out.contains("{500.00 USD}"), "got:\n{out}");
3010    }
3011
3012    #[test]
3013    fn cost_spec_comma_stays_tight_to_prev_token() {
3014        // Bug: format_cost_spec's catch-all arm inserted a space
3015        // before every non-trivia token including COMMA, producing
3016        // `{500.00 USD , 2024-01-15}` instead of the canonical
3017        // `{500.00 USD, 2024-01-15}`.
3018        let src = "2024-01-01 * \"buy\"\n  Assets:Brokerage 10 HOOL {500.00 USD, 2024-01-15}\n  Assets:Cash -5000.00 USD\n";
3019        let out = format_source(src);
3020        assert!(
3021            out.contains("{500.00 USD, 2024-01-15}"),
3022            "comma must stay tight to USD; got:\n{out}"
3023        );
3024        assert!(
3025            !out.contains("USD ,"),
3026            "no space allowed before comma; got:\n{out}"
3027        );
3028    }
3029
3030    #[test]
3031    fn custom_directive_preserves_date_value_arguments() {
3032        // Bug: emit_custom's post-seen_type match skipped every DATE
3033        // token, silently dropping legitimate date-typed value
3034        // arguments. The leading directive date is already skipped
3035        // via the seen_type=false phase.
3036        let src = "2024-01-01 custom \"budget\" \"name\" 2024-06-15 100.00 USD\n";
3037        let out = format_source(src);
3038        assert!(
3039            out.contains("2024-06-15"),
3040            "value-position DATE must survive; got: {out}"
3041        );
3042    }
3043
3044    #[test]
3045    fn file_level_adjacent_comments_stay_tight() {
3046        // Bug: format_node's top-level walk inserted a blank `\n`
3047        // separator before every emitted item including comments,
3048        // breaking section-header blocks like `; ====\n; HEADER\n; ====`
3049        // by injecting blanks between every adjacent comment line.
3050        let src = "; ====\n; HEADER\n; ====\n2024-01-01 open Assets:A\n";
3051        let expected = "; ====\n; HEADER\n; ====\n2024-01-01 open Assets:A\n";
3052        assert_eq!(format_source(src), expected);
3053    }
3054
3055    #[test]
3056    fn metadata_internal_whitespace_normalized() {
3057        // Bug: emit_meta_entries_of passed META_ENTRY source text
3058        // through verbatim, so `foo: "bar"` and `foo:    "bar"` —
3059        // identical typed ASTs — produced different formatter
3060        // output, violating the gofmt-style invariant the rustdoc
3061        // declares.
3062        let a = "2024-01-01 open Assets:Bank\n  starting: \"foo\"\n";
3063        let b = "2024-01-01 open Assets:Bank\n  starting:    \"foo\"\n";
3064        assert_eq!(format_source(a), format_source(b));
3065    }
3066
3067    #[test]
3068    fn metadata_number_thousands_separator_stripped() {
3069        // Same invariant: numbers inside metadata values share the
3070        // canonical thousands-separator policy with posting numbers
3071        // (otherwise the same file would emit inconsistent numeric
3072        // forms in postings vs. metadata).
3073        let src = "2024-01-01 open Assets:Bank\n  starting_balance: 1,000.00 USD\n";
3074        let out = format_source(src);
3075        assert!(
3076            out.contains("1000.00 USD"),
3077            "thousands-sep should strip in metadata too; got: {out}"
3078        );
3079        assert!(!out.contains("1,000"), "got: {out}");
3080    }
3081
3082    #[test]
3083    fn bare_cr_line_endings_normalized_to_lf_before_parse() {
3084        // Bug: the lexer doesn't treat bare CR as a line terminator,
3085        // so a classic-Mac-authored `directive\r…\rdirective\r`
3086        // parsed as one broken directive and the rest were silently
3087        // dropped. format_source normalizes line endings BEFORE
3088        // parsing so bare CR (and CRLF) are treated as LF.
3089        let src = "2024-01-01 open Assets:A\r2024-01-02 open Assets:B\r";
3090        let out = format_source(src);
3091        assert!(
3092            out.contains("2024-01-01 open Assets:A"),
3093            "first directive lost: {out:?}"
3094        );
3095        assert!(
3096            out.contains("2024-01-02 open Assets:B"),
3097            "second directive lost on bare-CR input: {out:?}"
3098        );
3099    }
3100
3101    #[test]
3102    fn crlf_input_canonicalizes_to_lf() {
3103        // CRLF and bare CR both fold to LF on the way through the
3104        // canonical pass (the canonical form is LF-only).
3105        let src = "2024-01-01 open Assets:A\r\n2024-01-02 open Assets:B\r\n";
3106        let out = format_source(src);
3107        assert!(
3108            !out.contains('\r'),
3109            "canonical output must be LF-only: {out:?}"
3110        );
3111        assert!(out.contains("2024-01-01 open Assets:A\n"), "got: {out:?}");
3112        assert!(out.contains("2024-01-02 open Assets:B\n"), "got: {out:?}");
3113    }
3114
3115    #[test]
3116    fn metadata_value_with_unary_minus_stays_tight() {
3117        // Bug: emit_meta_entry's tokenized walk inserted a space
3118        // after a unary `+`/`-`, breaking `key: -5.00 USD` →
3119        // `key: - 5.00 USD`. Routed through write_canonical_token_sequence
3120        // so unary detection matches the balance/price/posting paths.
3121        let src = "2024-01-01 open Assets:Bank\n  threshold: -5.00 USD\n";
3122        let out = format_source(src);
3123        assert!(
3124            out.contains("threshold: -5.00 USD"),
3125            "unary minus must stay tight in metadata; got: {out}"
3126        );
3127        assert!(
3128            !out.contains("- 5.00"),
3129            "no space after unary minus; got: {out}"
3130        );
3131    }
3132
3133    #[test]
3134    fn metadata_value_with_unary_plus_stays_tight() {
3135        let src = "2024-01-01 open Assets:Bank\n  min: +1.00 USD\n";
3136        let out = format_source(src);
3137        assert!(out.contains("min: +1.00 USD"), "got: {out}");
3138        assert!(!out.contains("+ 1.00"), "got: {out}");
3139    }
3140
3141    #[test]
3142    fn cost_spec_negative_cost_stays_tight() {
3143        // Bug: format_cost_spec catch-all had no unary-operator
3144        // handling. `{-500 USD}` formatted to `{- 500 USD}`. Now
3145        // routes through write_canonical_token_sequence.
3146        let src = "2024-01-01 * \"x\"\n  Assets:Brokerage 10 HOOL {-500 USD}\n  Assets:Cash -5000.00 USD\n";
3147        let out = format_source(src);
3148        assert!(
3149            out.contains("{-500 USD}"),
3150            "negative cost spec must stay tight; got:\n{out}"
3151        );
3152        assert!(!out.contains("{- "), "got:\n{out}");
3153    }
3154
3155    #[test]
3156    fn cost_spec_arithmetic_with_unary_stays_tight() {
3157        // `{500 * -2 USD}` formerly emitted `{500 * - 2 USD}` because
3158        // the cost-spec catch-all didn't understand unary +/-.
3159        let src = "2024-01-01 * \"x\"\n  Assets:Brokerage 10 HOOL {500 * -2 USD}\n  Assets:Cash -1000.00 USD\n";
3160        let out = format_source(src);
3161        assert!(
3162            out.contains("{500 * -2 USD}"),
3163            "cost-spec arithmetic unary must stay tight; got:\n{out}"
3164        );
3165    }
3166
3167    // ---- Property tests -------------------------------------------
3168    //
3169    // Two invariants the rustdoc's gofmt-style promise depends on,
3170    // pinned over a hand-curated input matrix:
3171    //
3172    // - **Idempotence:** `format_source(format_source(x)) == format_source(x)`.
3173    // - **Round-trip stability for canonicalize_directives:** the
3174    //   synthesize-then-canonicalize shim produces text that, when
3175    //   parsed back, yields the same Directive count and zero parse
3176    //   errors.
3177    //
3178    // The matrix covers every directive kind plus the high-risk
3179    // edge cases the prior reviews surfaced (unary +/- in metadata,
3180    // cost-spec arithmetic, CRLF, bare CR, multi-line strings,
3181    // comments containing quotes, non-Latin accounts). When the
3182    // upstream compatibility corpus is fetched into
3183    // `tests/compatibility/files/` the per-file sweep at the bottom
3184    // also runs; otherwise the file-based test is skipped.
3185
3186    const IDEMPOTENCE_MATRIX: &[(&str, &str)] = &[
3187        ("empty", ""),
3188        ("only_comment", "; header comment\n"),
3189        ("only_directive", "2024-01-01 open Assets:Cash\n"),
3190        (
3191            "two_open_directives",
3192            "2024-01-01 open Assets:A\n2024-01-02 open Assets:B\n",
3193        ),
3194        (
3195            "transaction_with_cost_and_price",
3196            "2024-01-15 * \"buy\"\n  Assets:Brokerage 10 HOOL {500.00 USD} @ 510.00 USD\n  Assets:Cash -5000.00 USD\n",
3197        ),
3198        (
3199            "transaction_with_per_unit_plus_total_cost",
3200            "2024-01-15 * \"x\"\n  Assets:Brokerage 10 HOOL {# 500.00 USD}\n  Assets:Cash -5000.00 USD\n",
3201        ),
3202        (
3203            "transaction_with_arithmetic_amount",
3204            "2024-01-15 * \"x\"\n  Assets:Cash  -(1.00 + 2.00) USD\n  Expenses:Misc 3.00 USD\n",
3205        ),
3206        (
3207            "balance_with_arithmetic_and_tolerance",
3208            "2024-01-15 balance Assets:Cash 0.25 + 0.75 USD ~ 0.01 USD\n",
3209        ),
3210        // Regression for Copilot #2: a previous emit_amount_expression
3211        // skipped tokens until the first NUMBER, which dropped a
3212        // leading unary `-` and silently flipped the sign — a
3213        // balance assertion that asserted a debit would assert a
3214        // credit after a round-trip. These fixtures pin the
3215        // sign / paren preservation explicitly.
3216        (
3217            "balance_leading_unary_minus",
3218            "2024-01-15 balance Assets:A -1.00 USD\n",
3219        ),
3220        (
3221            "balance_leading_parenthesized_expression",
3222            "2024-01-15 balance Assets:A (1 + 2) USD\n",
3223        ),
3224        (
3225            "price_leading_unary_minus",
3226            "2024-01-15 price USD -1.00 EUR\n",
3227        ),
3228        (
3229            "price_with_thousands_separator",
3230            "2024-01-15 price USD 1,234.56 EUR\n",
3231        ),
3232        (
3233            "metadata_unary_minus",
3234            "2024-01-01 open Assets:Bank\n  threshold: -5.00 USD\n",
3235        ),
3236        (
3237            "metadata_arithmetic",
3238            "2024-01-01 open Assets:Bank\n  total: 1000 + 500 USD\n",
3239        ),
3240        (
3241            "cost_spec_with_comma_and_date",
3242            "2024-01-15 * \"x\"\n  Assets:Brokerage 10 HOOL {500.00 USD, 2024-01-15}\n  Assets:Cash -5000.00 USD\n",
3243        ),
3244        (
3245            "cost_spec_with_negative",
3246            "2024-01-15 * \"x\"\n  Assets:Brokerage 10 HOOL {-500 USD}\n  Assets:Cash 5000.00 USD\n",
3247        ),
3248        (
3249            "transaction_with_tags_and_links",
3250            "2024-01-15 * \"x\" #tag1 ^link1 #tag2\n  Assets:Cash -1.00 USD\n  Expenses:Misc 1.00 USD\n",
3251        ),
3252        (
3253            "custom_with_date_value",
3254            "2024-01-01 custom \"budget\" \"name\" 2024-06-15 100.00 USD\n",
3255        ),
3256        (
3257            "non_latin_account_name",
3258            "2024-01-15 * \"x\"\n  Активы:Банк -5.00 USD\n  Expenses:Misc 5.00 USD\n",
3259        ),
3260        (
3261            "section_header_comments",
3262            "; ====\n; HEADER\n; ====\n2024-01-01 open Assets:A\n",
3263        ),
3264        (
3265            "multiline_note_string",
3266            "2024-01-15 note Assets:Bank \"line 1\nline 2\"\n",
3267        ),
3268        (
3269            "comment_containing_quote",
3270            "; comment with \"a quote\n2024-01-01 open Assets:A\n",
3271        ),
3272        (
3273            "crlf_input",
3274            "2024-01-01 open Assets:A\r\n2024-01-02 open Assets:B\r\n",
3275        ),
3276        (
3277            "bare_cr_input",
3278            "2024-01-01 open Assets:A\r2024-01-02 open Assets:B\r",
3279        ),
3280        (
3281            "file_with_trailing_newlines",
3282            "2024-01-01 open Assets:A\n\n\n",
3283        ),
3284        ("file_without_trailing_newline", "2024-01-01 open Assets:A"),
3285        // Regression for Copilot #1: collect_trailing_comment
3286        // previously returned None for a directive with no
3287        // header-terminating NEWLINE token, which silently dropped
3288        // a same-line trailing comment at EOF when the file lacked
3289        // a trailing newline. The canonical formatter restores the
3290        // trailing newline, but the dropped comment was already
3291        // gone.
3292        (
3293            "trailing_comment_no_final_newline",
3294            "2024-01-15 open Assets:A ; trailing",
3295        ),
3296        (
3297            "posting_with_trailing_comment",
3298            "2024-01-15 * \"x\"\n  Assets:Cash -5.00 USD ; pocket\n  Expenses:Misc 5.00 USD\n",
3299        ),
3300        (
3301            "balance_assertion_with_meta",
3302            "2024-01-15 balance Assets:Cash 100.00 USD\n  source: \"bank\"\n",
3303        ),
3304        (
3305            "options_and_includes",
3306            "option \"title\" \"My Ledger\"\ninclude \"sub.beancount\"\nplugin \"my.plugin\" \"cfg\"\n",
3307        ),
3308        // ---- per-variant coverage ---------------------------------
3309        ("close_directive", "2024-12-31 close Assets:Cash\n"),
3310        ("commodity_directive", "2024-01-01 commodity HOOL\n"),
3311        ("note_directive", "2024-01-15 note Assets:Cash \"a note\"\n"),
3312        ("event_directive", "2024-01-15 event \"location\" \"NYC\"\n"),
3313        (
3314            "query_directive",
3315            "2024-01-15 query \"q1\" \"SELECT account\"\n",
3316        ),
3317        ("pad_directive", "2024-01-15 pad Assets:A Equity:Opening\n"),
3318        (
3319            "document_directive",
3320            "2024-06-01 document Assets:Bank \"stmt.pdf\" #q1\n",
3321        ),
3322        // Note: `#!` and `#+` anywhere on a line, not just at
3323        // line start, open the lexer's SHEBANG / EMACS_DIRECTIVE
3324        // tokens. The fixture places `#+` mid-line and tails it
3325        // with an unbalanced `"`: an incorrect state machine that
3326        // gated the opener on `at_line_start` would stay in Code
3327        // when it hit the `#+`, then flip to InString on the next
3328        // `"` and trap there for the remainder of the file. The
3329        // lexer-agreement property test catches that divergence,
3330        // and the round-trip body runs too because the parser
3331        // treats the mid-line EMACS_DIRECTIVE as same-line
3332        // trailing trivia under the directive-terminator rule.
3333        (
3334            "emacs_directive_mid_line_with_quote",
3335            "2024-01-15 open Assets:A #+stray \"q\n",
3336        ),
3337        ("pushtag_directive", "pushtag #active\n"),
3338        ("poptag_directive", "poptag #active\n"),
3339        ("pushmeta_directive", "pushmeta location: \"NYC\"\n"),
3340        ("popmeta_directive", "popmeta location:\n"),
3341    ];
3342
3343    /// Number of fixtures in [`IDEMPOTENCE_MATRIX`] that legitimately
3344    /// produce zero typed directives — comment-only / empty /
3345    /// pragma-only inputs. The round-trip property test skips these
3346    /// (they have nothing to emit), but every OTHER fixture MUST
3347    /// exercise the body. Bumping this constant when adding such a
3348    /// fixture is the only manual maintenance the coverage floor
3349    /// needs; otherwise the floor (`IDEMPOTENCE_MATRIX.len() -
3350    /// ROUNDTRIP_KNOWN_ZERO_DIRECTIVE_FIXTURES`) tracks the matrix
3351    /// automatically.
3352    ///
3353    /// Today's zero-directive fixtures (skipped by the round-trip
3354    /// body), verified by an exhaustive probe against the live
3355    /// parser:
3356    ///
3357    /// - `empty`, `only_comment` — no directives at all.
3358    /// - `bare_cr_input` — the parser does not recognize bare CR
3359    ///   (without a following LF) as a directive terminator, so
3360    ///   the file's two would-be directives never surface as
3361    ///   structured tokens. The fixture's purpose is the
3362    ///   line-ending state-machine pass, not the round-trip body.
3363    /// - `pushtag_directive`, `poptag_directive`,
3364    ///   `pushmeta_directive`, `popmeta_directive` — pragma
3365    ///   directives don't surface as `Directive` variants on the
3366    ///   typed-AST side (the parser also rejects them today, so
3367    ///   they produce parse errors and the skip-on-errors guard
3368    ///   triggers).
3369    /// - `options_and_includes` — option / include / plugin lines
3370    ///   live on separate `ParseResult` collections, not on
3371    ///   `.directives`.
3372    ///
3373    /// Note: `comment_containing_quote` and
3374    /// `emacs_directive_mid_line_with_quote` BOTH exercise the
3375    /// body — each is paired with a parseable directive on the
3376    /// same line or an adjacent line, and the trivia token
3377    /// (comment / `EMACS_DIRECTIVE`) attaches as same-line or
3378    /// inter-directive trivia under the directive-terminator
3379    /// rule. Their purpose is the state-machine / lexer agreement
3380    /// property on a comment with an unbalanced `"`, not the
3381    /// zero-directive case.
3382    const ROUNDTRIP_KNOWN_ZERO_DIRECTIVE_FIXTURES: usize = 8;
3383
3384    #[test]
3385    fn lf_to_crlf_outside_strings_preserves_string_interior() {
3386        // Bug: a flat in_string-only state machine would re-inject
3387        // CRLF inside multi-line strings, mutating the user's bytes.
3388        let s = "2024-01-15 note Assets:Bank \"line 1\nline 2\"\n";
3389        let out = lf_to_crlf_outside_strings(s);
3390        assert!(out.contains("line 1\nline 2"), "got: {out:?}");
3391        assert!(out.ends_with("\r\n"), "got: {out:?}");
3392    }
3393
3394    #[test]
3395    fn lf_to_crlf_outside_strings_handles_comment_with_quote() {
3396        // Bug: an unbalanced `"` inside a `;` comment formerly flipped
3397        // in_string=true for the rest of the file, leaving every
3398        // subsequent newline as LF.
3399        let s = "; comment with \"a quote\n2024-01-01 open Assets:A\n";
3400        let out = lf_to_crlf_outside_strings(s);
3401        assert_eq!(
3402            out,
3403            "; comment with \"a quote\r\n2024-01-01 open Assets:A\r\n",
3404        );
3405    }
3406
3407    #[test]
3408    fn lf_to_crlf_outside_strings_handles_percent_comment_with_quote() {
3409        let s = "% percent \"quote\n2024-01-01 open Assets:A\n";
3410        let out = lf_to_crlf_outside_strings(s);
3411        assert_eq!(out, "% percent \"quote\r\n2024-01-01 open Assets:A\r\n");
3412    }
3413
3414    #[test]
3415    fn crlf_to_lf_preserves_crlf_inside_strings() {
3416        // Bug fix mirror: a Windows-authored multi-line string had
3417        // its CRLF folded to LF by the pre-parse normalizer too,
3418        // which silently mutated the user's bytes.
3419        let s = "2024-01-15 note Assets:Bank \"line1\r\nline2\"\r\n";
3420        let normalized = crlf_to_lf_outside_strings(s);
3421        // Outside the string, the trailing CRLF folds to LF; inside
3422        // the string, CRLF stays CRLF (user's bytes preserved).
3423        assert!(
3424            normalized.contains("\"line1\r\nline2\""),
3425            "got: {:?}",
3426            &*normalized
3427        );
3428        assert!(normalized.ends_with('\n') && !normalized.ends_with("\r\n"));
3429    }
3430
3431    #[test]
3432    fn idempotence_matrix() {
3433        // The gofmt invariant in the file rustdoc: f(f(x)) == f(x)
3434        // on every accepted input. Each fixture below covers one
3435        // axis of the canonical-form spec; together they exercise
3436        // every directive kind and every spacing rule shared via
3437        // write_canonical_token_sequence.
3438        for (name, src) in IDEMPOTENCE_MATRIX {
3439            let once = format_source(src);
3440            let twice = format_source(&once);
3441            assert_eq!(
3442                once, twice,
3443                "idempotence broken on fixture `{name}`\n--- once ---\n{once}\n--- twice ---\n{twice}",
3444            );
3445        }
3446    }
3447
3448    #[test]
3449    fn canonicalize_directives_roundtrips_every_synthesized_directive() {
3450        // For each canonical-form fixture: parse → take the typed
3451        // directives → run them through canonicalize_directives →
3452        // re-parse the canonical text → assert the parser reports
3453        // zero errors and the directive count is preserved.
3454        //
3455        // This is the proper end-to-end test of the two-pass shim
3456        // the FFI format.entry and rledger add/extract commands all
3457        // depend on. Without it, a future Directive variant added
3458        // to rustledger-core without matching coverage in
3459        // cst::format would silently round-trip to truncated text.
3460        //
3461        // Counter + assertion guards against silent-skip: if the
3462        // guard at the top of the loop ever filters too many
3463        // fixtures (e.g. a parser regression that drops directives
3464        // from previously-clean fixtures), the test fails instead
3465        // of silently passing with zero coverage.
3466        use rustledger_core::format::FormatConfig;
3467        let cfg = FormatConfig::default();
3468        let mut exercised = 0usize;
3469        for (name, src) in IDEMPOTENCE_MATRIX {
3470            let parsed = crate::parse(src);
3471            if parsed.errors.is_empty() && !parsed.directives.is_empty() {
3472                let dirs: Vec<&rustledger_core::Directive> =
3473                    parsed.directives.iter().map(|s| &s.value).collect();
3474                let formatted = super::canonicalize_directives(dirs.iter().copied(), &cfg)
3475                    .unwrap_or_else(|e| {
3476                        panic!("canonicalize_directives error on fixture `{name}`: {e}")
3477                    });
3478                let reparsed = crate::parse(&formatted);
3479                assert!(
3480                    reparsed.errors.is_empty(),
3481                    "round-trip parse errors on fixture `{name}`:\n--- formatted ---\n{formatted}\n--- errors ---\n{:?}",
3482                    reparsed.errors,
3483                );
3484                assert_eq!(
3485                    parsed.directives.len(),
3486                    reparsed.directives.len(),
3487                    "directive count drifted on fixture `{name}`\n--- formatted ---\n{formatted}",
3488                );
3489                exercised += 1;
3490            }
3491        }
3492        let expected = IDEMPOTENCE_MATRIX
3493            .len()
3494            .saturating_sub(ROUNDTRIP_KNOWN_ZERO_DIRECTIVE_FIXTURES);
3495        assert!(
3496            exercised >= expected,
3497            "only {exercised} fixtures exercised the round-trip body, \
3498             expected at least {expected} (= IDEMPOTENCE_MATRIX.len() - \
3499             {ROUNDTRIP_KNOWN_ZERO_DIRECTIVE_FIXTURES}). A parser \
3500             regression or a broken fixture is silently dropping coverage."
3501        );
3502    }
3503
3504    /// `SHEBANG` / `EMACS_DIRECTIVE` lines (`#!…` / `#+…` at line
3505    /// start) also count as comments for the LSP-CRLF state
3506    /// machine. A stray quote inside such a line used to flip
3507    /// `in_string=true` for the rest of the file just like the
3508    /// `;` / `%` comment case the round-3 fix covered.
3509    #[test]
3510    fn lf_to_crlf_outside_strings_handles_emacs_directive_with_quote() {
3511        let s = "#+title: \"My Book\n2024-01-01 open Assets:A\n";
3512        let out = lf_to_crlf_outside_strings(s);
3513        assert_eq!(out, "#+title: \"My Book\r\n2024-01-01 open Assets:A\r\n");
3514    }
3515
3516    #[test]
3517    fn lf_to_crlf_outside_strings_handles_shebang_with_quote() {
3518        let s = "#!shebang \"quote\n2024-01-01 open Assets:A\n";
3519        let out = lf_to_crlf_outside_strings(s);
3520        assert_eq!(out, "#!shebang \"quote\r\n2024-01-01 open Assets:A\r\n");
3521    }
3522
3523    /// `#` NOT at line start is a TAG / HASH token; the state
3524    /// machine must NOT treat it as a comment opener.
3525    #[test]
3526    fn lf_to_crlf_outside_strings_hash_mid_line_is_not_comment() {
3527        let s = "2024-01-15 * \"x\" #tag1\n  Assets:A 1 USD\n";
3528        let out = lf_to_crlf_outside_strings(s);
3529        // Every LF outside strings becomes CRLF — including the
3530        // one ending the tag-bearing line.
3531        assert!(out.contains("#tag1\r\n"), "got: {out:?}");
3532        assert!(out.ends_with("\r\n"), "got: {out:?}");
3533    }
3534
3535    /// Regression for Copilot #2 inline review on PR #1284: a
3536    /// previous `emit_amount_expression` dropped leading unary
3537    /// signs and parens, flipping the sign on
3538    /// `2024-01-15 balance Assets:A
3539    /// -1.00 USD` to `1.00 USD` — silent data corruption (a debit
3540    /// asserted as a credit). Byte-exact pins on every shape.
3541    #[test]
3542    fn balance_price_preserve_leading_unary_and_parens() {
3543        // Bare leading minus on balance.
3544        let src = "2024-01-15 balance Assets:A -1.00 USD\n";
3545        assert_eq!(
3546            format_source(src),
3547            "2024-01-15 balance Assets:A -1.00 USD\n"
3548        );
3549
3550        // Bare leading minus on price (sign flip would change
3551        // every quote on the user's commodity).
3552        let src = "2024-01-15 price USD -1.00 EUR\n";
3553        assert_eq!(format_source(src), "2024-01-15 price USD -1.00 EUR\n");
3554
3555        // Leading parenthesized expression. The previous code
3556        // dropped the `(`, which made the trailing `)` unbalanced
3557        // AND made the first-CURRENCY scan find the wrong token.
3558        let src = "2024-01-15 balance Assets:A (1 + 2) USD\n";
3559        assert_eq!(
3560            format_source(src),
3561            "2024-01-15 balance Assets:A (1 + 2) USD\n"
3562        );
3563
3564        // Leading minus on a parenthesized arithmetic expression.
3565        let src = "2024-01-15 balance Assets:A -(1 + 2) USD\n";
3566        assert_eq!(
3567            format_source(src),
3568            "2024-01-15 balance Assets:A -(1 + 2) USD\n"
3569        );
3570    }
3571
3572    /// Regression for Copilot #1 inline review on PR #1284:
3573    /// `collect_trailing_comment` used `?` on the header-terminating
3574    /// NEWLINE, silently dropping same-line trailing comments at
3575    /// EOF when the file had no final newline. The canonical
3576    /// formatter restores the trailing newline, but the dropped
3577    /// comment was already gone — a real-world case for editors
3578    /// that don't insert a trailing newline on save.
3579    #[test]
3580    fn trailing_comment_preserved_at_eof_without_newline() {
3581        let src = "2024-01-15 open Assets:A ; trailing";
3582        assert_eq!(format_source(src), "2024-01-15 open Assets:A ; trailing\n");
3583    }
3584
3585    #[test]
3586    fn try_format_source_returns_ok_on_clean_input() {
3587        let src = "2024-01-15 open Assets:Cash\n";
3588        let out = super::try_format_source(src).expect("clean input should format");
3589        assert_eq!(out, super::format_source(src));
3590    }
3591
3592    #[test]
3593    fn try_format_source_returns_err_on_parse_error() {
3594        // Bare `unparsable` text triggers parser errors. The
3595        // helper must surface them instead of silently emitting
3596        // canonical text around a broken file.
3597        let src = "this is not a directive at all\n";
3598        let err = super::try_format_source(src).expect_err("garbage should error");
3599        assert!(!err.is_empty(), "errors must not be empty");
3600    }
3601
3602    #[test]
3603    fn cr_outside_strings_present_distinguishes_in_string_cr() {
3604        // CR inside a multi-line string literal must NOT count —
3605        // the formatter wouldn't fold it.
3606        let in_string_only = "2024-01-15 note Assets:Bank \"line1\r\nline2\"\n";
3607        assert!(!super::cr_outside_strings_present(in_string_only));
3608
3609        // CR outside any string literal (CRLF line terminator)
3610        // counts — that's what crlf_to_lf_outside_strings would
3611        // fold.
3612        let crlf_terminator = "2024-01-01 open Assets:A\r\n";
3613        assert!(super::cr_outside_strings_present(crlf_terminator));
3614
3615        // No `\r` at all — fast path.
3616        let lf_only = "2024-01-01 open Assets:A\n";
3617        assert!(!super::cr_outside_strings_present(lf_only));
3618
3619        // CR inside a `;` comment is outside any string and counts.
3620        // (Beancount lexer's comment regex excludes the newline, so
3621        // the comment region ends at `\r`; either way, the predicate
3622        // says "yes, the formatter would fold this byte".)
3623        let comment_with_cr = "; comment with \"quote\rstuff\n";
3624        assert!(super::cr_outside_strings_present(comment_with_cr));
3625    }
3626
3627    #[test]
3628    fn canonicalize_directives_directive_count_mismatch_is_reported() {
3629        // Drive the new DirectiveCountMismatch error variant.
3630        // Today's Directive variants all round-trip with matching
3631        // counts, so this test pins the Display rendering of the
3632        // variant (the user-facing message). The positive-count-
3633        // match path is exercised by
3634        // `canonicalize_directives_positive_count_check` below.
3635        let err = super::CanonicalizeError::DirectiveCountMismatch {
3636            input: 3,
3637            reparsed: 2,
3638        };
3639        let msg = format!("{err}");
3640        assert!(msg.contains("3 directive(s)"), "got: {msg}");
3641        assert!(msg.contains("2 survived"), "got: {msg}");
3642        assert!(msg.contains("rledger bug"), "got: {msg}");
3643    }
3644
3645    /// Single source of truth for the variant → fixture mapping
3646    /// used by both the compile-time exhaustiveness check
3647    /// ([`_directive_variant_fixture_coverage`]) and the runtime
3648    /// semantic check
3649    /// ([`directive_variant_fixture_names_resolve_in_matrix`]).
3650    ///
3651    /// Each tuple is `(VariantName, fixture_name)`. The
3652    /// `VariantName` half is the string the runtime check uses to
3653    /// confirm the fixture parses to that variant; the
3654    /// `fixture_name` half is what the compile-time match returns
3655    /// for the same variant. A future `Directive::Hedge` variant
3656    /// only ships with canonical-form coverage if BOTH a new
3657    /// arm is added to the compile-time match AND a row here
3658    /// names a fixture that actually produces a `Hedge` on parse.
3659    const DIRECTIVE_VARIANT_FIXTURE_MAP: &[(&str, &str)] = &[
3660        ("Transaction", "transaction_with_cost_and_price"),
3661        ("Balance", "balance_with_arithmetic_and_tolerance"),
3662        ("Open", "only_directive"),
3663        ("Close", "close_directive"),
3664        ("Commodity", "commodity_directive"),
3665        ("Pad", "pad_directive"),
3666        ("Event", "event_directive"),
3667        ("Query", "query_directive"),
3668        ("Note", "note_directive"),
3669        ("Document", "document_directive"),
3670        ("Price", "price_with_thousands_separator"),
3671        ("Custom", "custom_with_date_value"),
3672    ];
3673
3674    /// Lookup helper: variant tag string → fixture name. Used by
3675    /// the compile-time match below. Panics if the variant is not
3676    /// in the map (which would be an internal-consistency bug, not
3677    /// a user-facing case).
3678    const fn fixture_for_variant(tag: &str) -> &'static str {
3679        let mut i = 0;
3680        while i < DIRECTIVE_VARIANT_FIXTURE_MAP.len() {
3681            let (v, f) = DIRECTIVE_VARIANT_FIXTURE_MAP[i];
3682            // const_str equality: compare byte slices.
3683            let v_bytes = v.as_bytes();
3684            let t_bytes = tag.as_bytes();
3685            if v_bytes.len() == t_bytes.len() {
3686                let mut k = 0;
3687                let mut eq = true;
3688                while k < v_bytes.len() {
3689                    if v_bytes[k] != t_bytes[k] {
3690                        eq = false;
3691                        break;
3692                    }
3693                    k += 1;
3694                }
3695                if eq {
3696                    return f;
3697                }
3698            }
3699            i += 1;
3700        }
3701        panic!("DIRECTIVE_VARIANT_FIXTURE_MAP missing entry for variant tag");
3702    }
3703
3704    /// Compile-time check that every `rustledger_core::Directive`
3705    /// variant has at least one source-text fixture in
3706    /// [`IDEMPOTENCE_MATRIX`] exercising its emit path. The
3707    /// function NEVER runs — its body is an exhaustive `match` over
3708    /// the `Directive` enum. Adding a new variant breaks
3709    /// compilation unless the author adds a match arm referencing
3710    /// `fixture_for_variant("NewVariantName")`, AND adds a row to
3711    /// [`DIRECTIVE_VARIANT_FIXTURE_MAP`] naming the fixture. The
3712    /// runtime test then confirms the fixture parses to a directive
3713    /// of that variant.
3714    ///
3715    /// The non-`Directive` pragma-style directives (Pushtag,
3716    /// Poptag, Pushmeta, Popmeta, options, includes, plugins)
3717    /// don't appear in the typed `Directive` enum; they're covered
3718    /// by separate fixtures whose names map directly into
3719    /// `IDEMPOTENCE_MATRIX`.
3720    #[allow(dead_code)]
3721    fn _directive_variant_fixture_coverage(d: &rustledger_core::Directive) -> &'static str {
3722        match d {
3723            rustledger_core::Directive::Transaction(_) => fixture_for_variant("Transaction"),
3724            rustledger_core::Directive::Balance(_) => fixture_for_variant("Balance"),
3725            rustledger_core::Directive::Open(_) => fixture_for_variant("Open"),
3726            rustledger_core::Directive::Close(_) => fixture_for_variant("Close"),
3727            rustledger_core::Directive::Commodity(_) => fixture_for_variant("Commodity"),
3728            rustledger_core::Directive::Pad(_) => fixture_for_variant("Pad"),
3729            rustledger_core::Directive::Event(_) => fixture_for_variant("Event"),
3730            rustledger_core::Directive::Query(_) => fixture_for_variant("Query"),
3731            rustledger_core::Directive::Note(_) => fixture_for_variant("Note"),
3732            rustledger_core::Directive::Document(_) => fixture_for_variant("Document"),
3733            rustledger_core::Directive::Price(_) => fixture_for_variant("Price"),
3734            rustledger_core::Directive::Custom(_) => fixture_for_variant("Custom"),
3735        }
3736    }
3737
3738    #[test]
3739    fn directive_variant_fixture_names_resolve_in_matrix() {
3740        // Runtime mirror of the compile-time match above:
3741        //
3742        //   (1) every fixture name appears in IDEMPOTENCE_MATRIX;
3743        //   (2) parsing that fixture produces AT LEAST one
3744        //       directive of the variant the map row names.
3745        //
3746        // Without check (2) the compile-time match is satisfied by
3747        // any fixture-name string — a future contributor adding
3748        // a row `("Hedge", "only_comment")` would compile, the
3749        // lookup would resolve, and Hedge would ship with zero
3750        // canonical-form coverage. The semantic check rejects that
3751        // by parsing the named fixture and inspecting the
3752        // directive variant.
3753        use rustledger_core::Directive;
3754        fn matches_variant(d: &Directive, expected: &str) -> bool {
3755            matches!(
3756                (d, expected),
3757                (Directive::Transaction(_), "Transaction")
3758                    | (Directive::Balance(_), "Balance")
3759                    | (Directive::Open(_), "Open")
3760                    | (Directive::Close(_), "Close")
3761                    | (Directive::Commodity(_), "Commodity")
3762                    | (Directive::Pad(_), "Pad")
3763                    | (Directive::Event(_), "Event")
3764                    | (Directive::Query(_), "Query")
3765                    | (Directive::Note(_), "Note")
3766                    | (Directive::Document(_), "Document")
3767                    | (Directive::Price(_), "Price")
3768                    | (Directive::Custom(_), "Custom")
3769            )
3770        }
3771        for (variant, name) in DIRECTIVE_VARIANT_FIXTURE_MAP {
3772            let (_, src) = IDEMPOTENCE_MATRIX
3773                .iter()
3774                .find(|(n, _)| *n == *name)
3775                .unwrap_or_else(|| {
3776                    panic!(
3777                        "fixture `{name}` is named by \
3778                     DIRECTIVE_VARIANT_FIXTURE_MAP but missing from \
3779                     IDEMPOTENCE_MATRIX"
3780                    )
3781                });
3782            let parsed = crate::parse(src);
3783            let found = parsed
3784                .directives
3785                .iter()
3786                .any(|s| matches_variant(&s.value, variant));
3787            assert!(
3788                found,
3789                "fixture `{name}` is mapped to `Directive::{variant}` by \
3790                 DIRECTIVE_VARIANT_FIXTURE_MAP, but parsing it produced \
3791                 no directive of that variant (got {:?}). This silently \
3792                 leaves the variant without canonical-form coverage.",
3793                parsed
3794                    .directives
3795                    .iter()
3796                    .map(|s| std::mem::discriminant(&s.value))
3797                    .collect::<Vec<_>>()
3798            );
3799        }
3800    }
3801
3802    /// Coverage-mirror check: every `matrix_name` half of the
3803    /// `MIRROR_PAIRS` table in the file-pair integration test
3804    /// (`crates/rustledger-parser/tests/format_compat.rs`) must
3805    /// exist as an entry in [`IDEMPOTENCE_MATRIX`]. The
3806    /// integration test asserts the symmetric half (every
3807    /// `file_pair_name` exists as a directory under `cases/`).
3808    /// Together the two checks guarantee that retiring a
3809    /// bug-class fixture from EITHER side forces an edit to
3810    /// `MIRROR_PAIRS` - which surfaces in review and prevents
3811    /// the silent one-sided drop the README's "two audience" split
3812    /// design would otherwise admit.
3813    ///
3814    /// Hand-maintained copy of the matrix half of the table.
3815    /// Editing `MIRROR_PAIRS` in the integration test requires
3816    /// editing this list too; the test below fires otherwise.
3817    #[test]
3818    fn idempotence_matrix_mirrors_format_compat_pairs() {
3819        const MIRROR_PAIRS_MATRIX_HALF: &[&str] = &[
3820            "balance_leading_unary_minus",
3821            "balance_leading_parenthesized_expression",
3822            "price_leading_unary_minus",
3823            "cost_spec_with_negative",
3824            "cost_spec_with_comma_and_date",
3825            "transaction_with_per_unit_plus_total_cost",
3826            "metadata_unary_minus",
3827            "metadata_arithmetic",
3828            "non_latin_account_name",
3829            "posting_with_trailing_comment",
3830            "multiline_note_string",
3831            "comment_containing_quote",
3832            "transaction_with_tags_and_links",
3833            "custom_with_date_value",
3834            "options_and_includes",
3835            "balance_assertion_with_meta",
3836            "crlf_input",
3837        ];
3838        let matrix_names: std::collections::BTreeSet<&str> =
3839            IDEMPOTENCE_MATRIX.iter().map(|(name, _)| *name).collect();
3840        let missing: Vec<&&str> = MIRROR_PAIRS_MATRIX_HALF
3841            .iter()
3842            .filter(|name| !matrix_names.contains(*name))
3843            .collect();
3844        assert!(
3845            missing.is_empty(),
3846            "IDEMPOTENCE_MATRIX is missing the matrix-half of MIRROR_PAIRS: {missing:?}. \
3847             Either re-add the entry to IDEMPOTENCE_MATRIX, or edit MIRROR_PAIRS in \
3848             tests/format_compat.rs to retire the pair from BOTH sides.",
3849        );
3850    }
3851
3852    /// Property test: the `SourceState` classification used by the
3853    /// line-ending helpers must agree with the lexer's
3854    /// classification on every byte of a corpus of fixtures.
3855    ///
3856    /// Concretely: for every byte offset in every fixture, the
3857    /// state machine's `InString` periods MUST line up with the
3858    /// lexer's STRING token spans, and its `InComment` periods MUST
3859    /// line up with the union of COMMENT / SHEBANG /
3860    /// `EMACS_DIRECTIVE` token spans. A divergence — e.g. the lexer
3861    /// gains a new comment lexeme that the state machine treats as
3862    /// code — fails this test instead of silently mutating user
3863    /// bytes inside the new lexeme on a line-ending round-trip.
3864    #[test]
3865    fn source_state_classification_agrees_with_lexer() {
3866        use crate::logos_lexer::{Token, tokenize_lossless};
3867
3868        for (name, src) in IDEMPOTENCE_MATRIX {
3869            // Run the lexer to get authoritative classification of
3870            // each token. Build a per-byte map of expected state.
3871            let tokens = tokenize_lossless(src);
3872            let mut expected = vec![SourceState::Code; src.len()];
3873            for (token, span) in &tokens {
3874                let classify = match token {
3875                    Token::String(_) => Some(SourceState::InString),
3876                    Token::Comment(_) | Token::Shebang(_) | Token::EmacsDirective(_) => {
3877                        Some(SourceState::InComment)
3878                    }
3879                    _ => None,
3880                };
3881                if let Some(state) = classify {
3882                    for byte in &mut expected[span.start..span.end] {
3883                        *byte = state;
3884                    }
3885                }
3886            }
3887
3888            // Run the state-machine classifier and compare per
3889            // byte. We skip ONLY the exact bytes where a
3890            // transition fires — the lexer includes those bytes
3891            // inside the resulting token while the state machine
3892            // tags them with the PRE-transition state (the
3893            // 'opener' is still Code, the closing LF is still
3894            // InComment). Tracking the transition indices
3895            // explicitly (rather than skipping every `"`/`;`/`%`
3896            // / newline byte) means a state-machine bug at any
3897            // non-transition `"`/`;`/`%` byte — e.g. inside a
3898            // comment or string — surfaces as a real failure
3899            // instead of being silently masked.
3900            let (actual, transitions) = classify_source_bytes_with_transitions(src);
3901
3902            for (i, (&want, &got)) in expected.iter().zip(actual.iter()).enumerate() {
3903                if transitions.contains(&i) {
3904                    continue;
3905                }
3906                assert_eq!(
3907                    want,
3908                    got,
3909                    "state-machine / lexer disagreement on fixture `{name}` \
3910                     at byte {i} ({:?}): lexer said {want:?}, state machine said {got:?}",
3911                    src.as_bytes()[i] as char
3912                );
3913            }
3914        }
3915    }
3916
3917    /// Walk `s` through the same state-machine logic the
3918    /// line-ending helpers use, returning a per-byte classification
3919    /// AND the set of byte indices where a state transition
3920    /// fired. The transition indices are the ONLY bytes where the
3921    /// state machine and the lexer can legitimately disagree (the
3922    /// off-by-one at opener / closer / terminator); callers
3923    /// comparing against the lexer should skip exactly those
3924    /// indices and assert agreement everywhere else.
3925    fn classify_source_bytes_with_transitions(
3926        s: &str,
3927    ) -> (Vec<SourceState>, std::collections::HashSet<usize>) {
3928        let (body, bom_len) = match s.strip_prefix('\u{FEFF}') {
3929            Some(rest) => (rest, '\u{FEFF}'.len_utf8()),
3930            None => (s, 0),
3931        };
3932        let mut out: Vec<SourceState> = vec![SourceState::Code; s.len()];
3933        let mut transitions = std::collections::HashSet::new();
3934        let mut chars = body.char_indices().peekable();
3935        let mut state = SourceState::Code;
3936        let mut prev_was_backslash = false;
3937        while let Some((rel_i, ch)) = chars.next() {
3938            let i = bom_len + rel_i;
3939            let peek = chars.peek().map(|&(_, c)| c);
3940            // Classify THIS byte under the state BEFORE advancing.
3941            for byte in &mut out[i..i + ch.len_utf8()] {
3942                *byte = state;
3943            }
3944            let prev_state = state;
3945            let next_state = advance_source_state(ch, peek, state, &mut prev_was_backslash);
3946            // Record only OPENING transitions and the comment-
3947            // closing newline, where the state machine and lexer
3948            // legitimately disagree on this single byte:
3949            //   - Code → InString : opening `"` is Code-side but
3950            //     the lexer puts it inside the STRING token.
3951            //   - Code → InComment: opening `;` / `%` / `#!` /
3952            //     `#+` is Code-side but the lexer puts it inside
3953            //     the COMMENT / SHEBANG / EMACS_DIRECTIVE token.
3954            //   - InComment → Code: the `\n` ending the comment is
3955            //     classified InComment by the state machine but
3956            //     sits OUTSIDE the comment token (the lexer's
3957            //     `[^\n\r]*` excludes it).
3958            // The InString → Code transition (closing `"`) is NOT
3959            // a disagreement: the state machine still tags that
3960            // byte as InString (pre-transition), and the lexer
3961            // includes the closing `"` inside the STRING token.
3962            // Skipping it would silently mask a real bug.
3963            if next_state != state {
3964                let opening = matches!(prev_state, SourceState::Code)
3965                    && matches!(next_state, SourceState::InString | SourceState::InComment);
3966                let comment_close = matches!(prev_state, SourceState::InComment)
3967                    && matches!(next_state, SourceState::Code);
3968                if opening || comment_close {
3969                    transitions.insert(i);
3970                    // For a `#!` or `#+` opener the lexer's token
3971                    // span begins at the `#`, so the second byte
3972                    // (`!` / `+`) is also a "before the lexer's
3973                    // token start" byte the state machine tags as
3974                    // Code. Record it too.
3975                    if matches!(ch, '#') && matches!(peek, Some('!' | '+')) {
3976                        transitions.insert(i + 1);
3977                    }
3978                }
3979            }
3980            state = next_state;
3981        }
3982        (out, transitions)
3983    }
3984
3985    #[test]
3986    fn canonicalize_directives_positive_count_check() {
3987        // Pin the success path of the count check: pass a real
3988        // multi-directive input through canonicalize_directives and
3989        // assert that the output round-trips to the SAME directive
3990        // count. Without this test, a regression that always
3991        // returned CountMismatch (e.g. `==` instead of `!=` on the
3992        // count comparison) would be caught only on production
3993        // calls, not in CI. Together with the Display test above,
3994        // this gives coverage of both arms of the count guard.
3995        use rustledger_core::format::FormatConfig;
3996        let cfg = FormatConfig::default();
3997        let src = "2024-01-01 open Assets:Cash\n2024-01-02 open Assets:Bank\n2024-01-03 close Assets:Cash\n";
3998        let parsed = crate::parse(src);
3999        assert_eq!(
4000            parsed.directives.len(),
4001            3,
4002            "fixture must parse to 3 directives"
4003        );
4004        let dirs: Vec<&rustledger_core::Directive> =
4005            parsed.directives.iter().map(|s| &s.value).collect();
4006        let formatted = super::canonicalize_directives(dirs.iter().copied(), &cfg)
4007            .expect("canonicalize_directives should succeed on this input");
4008        let reparsed = crate::parse(&formatted);
4009        assert_eq!(
4010            reparsed.directives.len(),
4011            3,
4012            "count check accepted but round-trip dropped directives: {formatted}"
4013        );
4014    }
4015
4016    // ---- format_node_range -----------------------------------------
4017
4018    /// Parse `source` via the same pipeline `format_source` uses
4019    /// so the resulting `SyntaxNode`'s `TextRange`s are in the
4020    /// same byte frame `format_node_range`'s `range` argument
4021    /// is expected to use (post-BOM-strip, post-CRLF-to-LF).
4022    /// Returns the syntax node + the normalized source text so
4023    /// tests can compute byte offsets by `.find()`.
4024    fn parse_for_range(source: &str) -> (crate::SyntaxNode, String) {
4025        let (stripped, _bom) = crate::bom::strip_leading(source);
4026        let normalized = crlf_to_lf_outside_strings(stripped).to_string();
4027        let sf = SourceFile::parse(&normalized);
4028        (sf.syntax().clone(), normalized)
4029    }
4030
4031    fn ts(n: usize) -> rowan::TextSize {
4032        rowan::TextSize::try_from(n).expect("offset fits TextSize")
4033    }
4034
4035    /// For any selection covering the whole file, the result text
4036    /// equals `format_node(node)`. Pins the round-trip invariant
4037    /// the design rests on: range formatting is the whole-file
4038    /// formatter restricted to a range, not a parallel canonical
4039    /// form.
4040    #[test]
4041    fn format_node_range_full_range_matches_format_node() {
4042        let source = "\
40432024-01-01 open Assets:Bank USD
40442024-01-15 * \"Coffee\"
4045  Assets:Bank  -5.00 USD
4046  Expenses:Food
40472024-01-31 close Assets:Bank
4048";
4049        let (node, src) = parse_for_range(source);
4050        let full = rowan::TextRange::new(ts(0), ts(src.len()));
4051        let (snap, formatted) =
4052            format_node_range(&node, full).expect("full range must include all directives");
4053        assert_eq!(
4054            snap,
4055            rowan::TextRange::new(ts(0), ts(src.len())),
4056            "snap range should be the whole file's textual span"
4057        );
4058        assert_eq!(formatted, format_node(&node));
4059    }
4060
4061    /// A selection that hits only inter-directive whitespace
4062    /// (no directive intersected, no top-level comment
4063    /// intersected) returns `None` — the caller surfaces this
4064    /// as an empty `Vec<TextEdit>`.
4065    #[test]
4066    fn format_node_range_trivia_only_returns_none() {
4067        // The phase-2.0 Directive-Terminator Rule puts every
4068        // inter-directive blank line on the next directive's
4069        // leading trivia, so any byte index between two
4070        // directives is INSIDE the next directive's text_range.
4071        // The only way to reach a truly trivia-only selection
4072        // is a source that has no directives at all (file is
4073        // pure whitespace). That is the case worth pinning —
4074        // the LSP handler maps `None` to an empty
4075        // `Vec<TextEdit>`, which is exactly the right "nothing
4076        // to format" response for a whitespace-only buffer.
4077        let (empty, _) = parse_for_range("\n\n\n");
4078        let sel = rowan::TextRange::new(ts(0), ts(3));
4079        assert!(format_node_range(&empty, sel).is_none());
4080    }
4081
4082    /// Selecting only the first directive's content (the
4083    /// transaction) snaps to that directive and the second
4084    /// directive is left out of both the snap and the output.
4085    #[test]
4086    fn format_node_range_single_directive() {
4087        let source = "\
40882024-01-01 open Assets:Bank USD
40892024-01-15 * \"Coffee\"
4090  Assets:Bank  -5.00 USD
4091  Expenses:Food
4092";
4093        let (node, src) = parse_for_range(source);
4094        // Position the selection inside the `open` line. Use
4095        // the byte offset of the word `open` so the test is
4096        // robust to whitespace changes in the fixture.
4097        let open_byte = src.find("open").expect("fixture contains 'open'");
4098        let sel = rowan::TextRange::new(ts(open_byte), ts(open_byte + "open".len()));
4099        let (snap, formatted) = format_node_range(&node, sel).expect("intersects 1 directive");
4100
4101        // Snap should start at byte 0 (the open directive's
4102        // text_range starts at the file's start) and end at
4103        // the open directive's terminating newline.
4104        let open_end = src.find('\n').expect("first directive has terminator") + 1;
4105        assert_eq!(snap.start(), ts(0));
4106        assert_eq!(snap.end(), ts(open_end));
4107        // Output is exactly the open directive's canonical form
4108        // + its `\n` terminator. No second-directive content.
4109        assert_eq!(formatted, "2024-01-01 open Assets:Bank USD\n");
4110    }
4111
4112    /// Multi-directive selection: the author's inter-directive
4113    /// blank lines are preserved (a blank stays a blank; grouped
4114    /// stays grouped), matching whole-file formatting (#1325).
4115    #[test]
4116    fn format_node_range_multi_directive_preserves_blank_lines() {
4117        // #1325: range formatting preserves the author's inter-directive
4118        // blank lines, identically to whole-file formatting. A source
4119        // with a blank between the two directives keeps it...
4120        let spaced = "\
41212024-01-01 open Assets:Bank USD
4122
41232024-01-31 close Assets:Bank
4124";
4125        let (node, src) = parse_for_range(spaced);
4126        let sel = rowan::TextRange::new(ts(0), ts(src.len()));
4127        let (snap, formatted) = format_node_range(&node, sel).expect("intersects 2 directives");
4128        assert_eq!(snap, rowan::TextRange::new(ts(0), ts(src.len())));
4129        assert_eq!(formatted, spaced, "the blank separator must be preserved");
4130
4131        // ...and a grouped source (no blank) stays grouped, rather than
4132        // having a separator inserted.
4133        let grouped = "\
41342024-01-01 open Assets:Bank USD
41352024-01-31 close Assets:Bank
4136";
4137        let (node2, src2) = parse_for_range(grouped);
4138        let sel2 = rowan::TextRange::new(ts(0), ts(src2.len()));
4139        let (_, formatted2) = format_node_range(&node2, sel2).expect("intersects 2 directives");
4140        assert_eq!(formatted2, grouped, "grouped directives must stay grouped");
4141    }
4142
4143    #[test]
4144    fn format_node_range_first_directive_in_snap_keeps_leading_blank() {
4145        // Regression (Copilot review of #1325): when the selection
4146        // covers only the SECOND directive, its predecessor sits outside
4147        // the snap, but the blank line between them is the second
4148        // directive's leading trivia and therefore inside the snapped
4149        // range. Range formatting must re-emit it, not silently delete
4150        // the blank line above the selection.
4151        let source = "2024-01-01 open Assets:Bank USD\n\n2024-01-31 close Assets:Bank\n";
4152        let (node, src) = parse_for_range(source);
4153        // Cursor inside the second (close) directive only.
4154        let close_byte = src.find("close").expect("fixture has 'close'");
4155        let cursor = rowan::TextRange::new(ts(close_byte), ts(close_byte));
4156        let (snap, formatted) = format_node_range(&node, cursor).expect("intersects close");
4157        // The leading blank is preserved in the replacement text...
4158        assert_eq!(formatted, "\n2024-01-31 close Assets:Bank\n");
4159        // ...so applying the edit leaves the blank line intact.
4160        let mut result = src;
4161        result.replace_range(
4162            usize::from(snap.start())..usize::from(snap.end()),
4163            &formatted,
4164        );
4165        assert_eq!(
4166            result, source,
4167            "range-formatting the second directive must not delete the blank above it"
4168        );
4169    }
4170
4171    /// Cursor-only (zero-width) selection inside a directive
4172    /// snaps to that directive. The cursor convention: inside
4173    /// or at the directive's start byte counts as inside;
4174    /// boundary at the directive's end belongs to the next
4175    /// child.
4176    #[test]
4177    fn format_node_range_cursor_inside_directive() {
4178        let source = "\
41792024-01-01 open Assets:Bank USD
41802024-01-31 close Assets:Bank
4181";
4182        let (node, src) = parse_for_range(source);
4183        // Cursor on the `c` of `close` (line 2 of the fixture).
4184        let close_byte = src.find("close").expect("fixture has 'close'");
4185        let cursor = rowan::TextRange::new(ts(close_byte), ts(close_byte));
4186        let (snap, formatted) = format_node_range(&node, cursor).expect("intersects close");
4187        // Snap starts at the close directive's text_range start.
4188        // Per Directive-Terminator Rule the second directive
4189        // OWNS the leading inter-directive trivia — so snap
4190        // starts immediately after the first directive's
4191        // terminator newline.
4192        let close_dir_start = src
4193            .find("\n2024-01-31")
4194            .map(|n| n + 1)
4195            .expect("close directive starts on its own line");
4196        assert_eq!(snap.start(), ts(close_dir_start));
4197        assert_eq!(snap.end(), ts(src.len()));
4198        assert_eq!(formatted, "2024-01-31 close Assets:Bank\n");
4199    }
4200
4201    /// Cursor exactly at the start of a directive snaps to
4202    /// that directive (start-boundary inclusion rule).
4203    #[test]
4204    fn format_node_range_cursor_at_directive_start_includes_directive() {
4205        let source = "\
42062024-01-01 open Assets:Bank USD
42072024-01-31 close Assets:Bank
4208";
4209        let (node, _src) = parse_for_range(source);
4210        // Cursor at byte 0 = start of first directive.
4211        let cursor = rowan::TextRange::new(ts(0), ts(0));
4212        let (_snap, formatted) = format_node_range(&node, cursor).expect("intersects open");
4213        // Only the OPEN should be formatted, not the close.
4214        assert!(formatted.starts_with("2024-01-01 open"));
4215        assert!(!formatted.contains("close"));
4216    }
4217
4218    /// Selection containing a top-level standalone comment
4219    /// (file-leading or between-directive comment that the
4220    /// trivia attachment policy puts on `SOURCE_FILE`) includes
4221    /// the comment in both the snap and the output.
4222    #[test]
4223    fn format_node_range_includes_top_level_comments() {
4224        let source = "\
4225; header
42262024-01-01 open Assets:Bank USD
4227";
4228        let (node, src) = parse_for_range(source);
4229        let sel = rowan::TextRange::new(ts(0), ts(src.len()));
4230        let (snap, formatted) = format_node_range(&node, sel).expect("intersects both");
4231        assert_eq!(snap, rowan::TextRange::new(ts(0), ts(src.len())));
4232        // Header comment, then directive on the next line. No
4233        // canonical blank between a file-level comment group
4234        // and a directive (matches format_node's policy).
4235        assert_eq!(formatted, "; header\n2024-01-01 open Assets:Bank USD\n");
4236    }
4237
4238    /// A selection that lands entirely inside an `ERROR_NODE`
4239    /// (no Directive intersected) returns None. Matches
4240    /// `format_node`'s policy of skipping `ERROR_NODE` children
4241    /// at the top level.
4242    #[test]
4243    fn format_node_range_error_node_only_returns_none() {
4244        // `}}}` at top level isn't a directive — the parser
4245        // wraps it in an ERROR_NODE.
4246        let source = "}}}\n";
4247        let (node, src) = parse_for_range(source);
4248        let sel = rowan::TextRange::new(ts(0), ts(src.len()));
4249        assert!(format_node_range(&node, sel).is_none());
4250    }
4251
4252    /// Past-EOF selection still works: the snap clamps to the
4253    /// last child that intersects within the file. (rowan's
4254    /// `TextRange` is bounded by usize but `format_node_range`
4255    /// doesn't validate `range` against file length — bytes past
4256    /// EOF can never intersect any child, so the rule is
4257    /// degenerate but well-defined.)
4258    #[test]
4259    fn format_node_range_past_eof_clamps() {
4260        let source = "2024-01-01 open Assets:Bank USD\n";
4261        let (node, src) = parse_for_range(source);
4262        let past_eof = rowan::TextRange::new(ts(src.len()), ts(src.len() + 1000));
4263        // The cursor / range is past EOF — no child intersects.
4264        assert!(format_node_range(&node, past_eof).is_none());
4265        // But a range that STRADDLES EOF still snaps to the
4266        // last intersecting directive.
4267        let straddle = rowan::TextRange::new(ts(0), ts(src.len() + 1000));
4268        let (snap, formatted) = format_node_range(&node, straddle).expect("intersects open");
4269        assert_eq!(snap, rowan::TextRange::new(ts(0), ts(src.len())));
4270        assert_eq!(formatted, "2024-01-01 open Assets:Bank USD\n");
4271    }
4272
4273    /// A cursor inside a posting (sub-directive position) snaps
4274    /// up to the enclosing transaction — the design pins
4275    /// "round to top-level directive boundaries, no finer."
4276    #[test]
4277    fn format_node_range_cursor_in_posting_snaps_to_transaction() {
4278        let source = "\
42792024-01-15 * \"Coffee\"
4280  Assets:Bank  -5.00 USD
4281  Expenses:Food
4282";
4283        let (node, src) = parse_for_range(source);
4284        // Position the cursor on the `B` of `Bank` in the
4285        // first posting.
4286        let bank_byte = src.find("Bank").expect("fixture has Bank");
4287        let cursor = rowan::TextRange::new(ts(bank_byte), ts(bank_byte));
4288        let (snap, _formatted) = format_node_range(&node, cursor).expect("intersects transaction");
4289        // Snap covers the WHOLE transaction (start of file
4290        // through final posting's newline).
4291        assert_eq!(snap.start(), ts(0));
4292        assert_eq!(snap.end(), ts(src.len()));
4293    }
4294
4295    /// Selection straddling an `ERROR_NODE` between two valid
4296    /// directives: snap range would cover the union (including
4297    /// `ERROR_NODE` bytes), so `format_node_range` returns
4298    /// `None` instead of silently deleting the error content.
4299    ///
4300    /// This is the deliberate divergence from `format_node`'s
4301    /// whole-file policy. `format_source(broken_source)` does
4302    /// drop `ERROR_NODE` content — but that path's callers
4303    /// (`rledger format` CLI, FFI `format.entry`) opt into
4304    /// content loss by invoking the canonical-form pipeline. The
4305    /// per-handler LSP `textDocument/rangeFormatting` path has no
4306    /// such opt-in, so it refuses to delete user content the
4307    /// parser couldn't classify. See the function's rustdoc for
4308    /// the per-handler asymmetry rationale.
4309    #[test]
4310    fn format_node_range_bails_when_snap_covers_error_node() {
4311        let source = "\
43122024-01-01 open Assets:Bank USD
4313}}}garbage{{{
43142024-01-31 close Assets:Bank
4315";
4316        let (node, src) = parse_for_range(source);
4317        let sel = rowan::TextRange::new(ts(0), ts(src.len()));
4318        assert!(
4319            format_node_range(&node, sel).is_none(),
4320            "selection covering both directives + ERROR_NODE between them must bail \
4321             to avoid silently deleting the garbage line — got Some output",
4322        );
4323    }
4324
4325    /// Selection that intersects only the FIRST valid directive
4326    /// in a broken file (no `ERROR_NODE` byte in the snap range)
4327    /// still formats. Pins that the `ERROR_NODE` bail is precisely
4328    /// scoped to the snap range, not to "the file has any
4329    /// `ERROR_NODE` at all".
4330    #[test]
4331    fn format_node_range_formats_directive_when_snap_does_not_cover_error_node() {
4332        let source = "\
43332024-01-01 open Assets:Bank USD
4334}}}garbage{{{
43352024-01-31 close Assets:Bank
4336";
4337        let (node, src) = parse_for_range(source);
4338        // Selection covers ONLY the open directive (first line +
4339        // its terminator). The ERROR_NODE on line 1 sits at byte
4340        // offset == open_end (length of first line including \n)
4341        // onward, OUTSIDE the snap range.
4342        let open_end = src.find('\n').expect("first directive has newline") + 1;
4343        let sel = rowan::TextRange::new(ts(0), ts(open_end));
4344        let (snap, formatted) =
4345            format_node_range(&node, sel).expect("selection covers only the open");
4346        assert_eq!(snap.start(), ts(0));
4347        assert_eq!(snap.end(), ts(open_end));
4348        assert_eq!(formatted, "2024-01-01 open Assets:Bank USD\n");
4349    }
4350
4351    /// `format_node_with_alignment(node, compute_alignment(sf))` is
4352    /// byte-identical to `format_node(node)`. Pins the cache
4353    /// contract: passing the correct alignment is a pure
4354    /// optimization, NOT a behavior change.
4355    #[test]
4356    fn format_node_equals_format_node_with_alignment() {
4357        let fixtures: &[(&str, &str)] = &[
4358            ("empty", ""),
4359            ("open only", "2024-01-01 open Assets:Bank USD\n"),
4360            (
4361                "single txn",
4362                "\
43632024-01-15 * \"Coffee\"
4364  Assets:Bank  -5.00 USD
4365  Expenses:Food
4366",
4367            ),
4368            (
4369                "multi txn varying widths",
4370                "\
43712024-01-15 * \"A\"
4372  Assets:Bank  -5.00 USD
4373  Expenses:Food
43742024-02-15 * \"B\"
4375  Assets:Investment:Long:Path  -123456.78 USD
4376  Expenses:Tax  100.00 USD
4377",
4378            ),
4379        ];
4380        for (label, source) in fixtures {
4381            let (node, _src) = parse_for_range(source);
4382            let source_file = SourceFile::cast(node.clone()).unwrap();
4383            let alignment = compute_alignment(&source_file);
4384            assert_eq!(
4385                format_node(&node),
4386                format_node_with_alignment(&node, alignment),
4387                "format_node_with_alignment must match format_node for {label}",
4388            );
4389        }
4390    }
4391
4392    /// `format_node_range_with_alignment(node, range, compute_alignment(sf))`
4393    /// matches `format_node_range(node, range)` byte-identically.
4394    /// Same shape as the previous test, for the range path.
4395    #[test]
4396    fn format_node_range_matches_format_node_range_with_alignment() {
4397        let source = "\
43982024-01-15 * \"A\"
4399  Assets:Bank  -5.00 USD
4400  Expenses:Food
44012024-02-15 * \"B\"
4402  Assets:Investment:Long:Path  -123456.78 USD
4403  Expenses:Tax  100.00 USD
4404";
4405        let (node, src) = parse_for_range(source);
4406        let source_file = SourceFile::cast(node.clone()).unwrap();
4407        let alignment = compute_alignment(&source_file);
4408        // Pin the equivalence on three ranges: whole file,
4409        // cursor inside the first transaction, cursor inside the
4410        // second.
4411        let sels = [
4412            rowan::TextRange::new(ts(0), ts(src.len())),
4413            rowan::TextRange::new(ts(0), ts(10)),
4414            rowan::TextRange::new(ts(src.len() - 10), ts(src.len())),
4415        ];
4416        for sel in sels {
4417            let uncached = format_node_range(&node, sel);
4418            let cached = format_node_range_with_alignment(&node, sel, alignment);
4419            assert_eq!(
4420                uncached, cached,
4421                "format_node_range_with_alignment must match \
4422                 format_node_range for range {sel:?}",
4423            );
4424        }
4425    }
4426
4427    /// The cached `ParseResult::alignment` value matches what
4428    /// `format_node` would compute on the parsed tree. End-to-end
4429    /// regression: an LSP caller passing `parse_result.alignment`
4430    /// to `format_node_with_alignment` produces the same output
4431    /// as the bare `format_node` (uncached path).
4432    #[test]
4433    fn parse_result_alignment_drives_identical_format_output() {
4434        let source = "\
44352024-01-15 * \"Coffee\"
4436  Assets:Bank  -5.00 USD
4437  Expenses:Food
4438";
4439        let parse_result = crate::parse(source);
4440        let node = parse_result.syntax_node();
4441        assert_eq!(
4442            format_node(&node),
4443            format_node_with_alignment(&node, parse_result.alignment),
4444            "ParseResult::alignment must drive identical format output to format_node",
4445        );
4446    }
4447
4448    /// `format_source_with_parsed(parse(s), s) == format_source(s)`
4449    /// byte-identical across a representative fixture set including
4450    /// CRLF and BOM-prefixed sources. This is the load-bearing
4451    /// equivalence for the LSP `format_document` / FFI
4452    /// `format.source` / WASM `ParsedLedger::format` migrations:
4453    /// they swap `format_source(source)` for
4454    /// `format_source_with_parsed(parse_result, source)` on the
4455    /// assumption that the two produce the same output. Without
4456    /// this test, a future converter or formatter change that
4457    /// silently diverged the two paths would break canonical-form
4458    /// expectations in production.
4459    #[test]
4460    fn format_source_with_parsed_matches_format_source() {
4461        let fixtures: &[(&str, &str)] = &[
4462            ("empty", ""),
4463            ("comment only", "; hello\n"),
4464            (
4465                "single transaction LF",
4466                "\
44672024-01-15 * \"Coffee\"
4468  Assets:Bank  -5.00 USD
4469  Expenses:Food
4470",
4471            ),
4472            (
4473                "multi transaction varying widths LF",
4474                "\
44752024-01-15 * \"A\"
4476  Assets:Bank  -5.00 USD
4477  Expenses:Food
44782024-02-15 * \"B\"
4479  Assets:Investment:Long:Path  -123456.78 USD
4480  Expenses:Tax  100.00 USD
4481",
4482            ),
4483            (
4484                "arithmetic amounts LF",
4485                "\
44862024-01-15 * \"Split\"
4487  Assets:Bank  -10.00 + 5.00 USD
4488  Expenses:Misc
4489",
4490            ),
4491            (
4492                "CRLF source",
4493                "2024-01-15 * \"Coffee\"\r\n  Assets:Bank  -5.00 USD\r\n  Expenses:Food\r\n",
4494            ),
4495            ("BOM-prefixed", "\u{FEFF}2024-01-01 open Assets:Bank USD\n"),
4496            // BOM + CRLF — Windows-authored ledger with a BOM
4497            // prefix. `format_source` BOM-strips + CRLF→LF
4498            // normalizes before parsing. The cache path consumes
4499            // a CST that's BOM-stripped but NOT CRLF-normalized.
4500            // Byte-identity holds because the formatter rebuilds
4501            // canonical output from typed values (no trivia
4502            // passthrough).
4503            (
4504                "BOM + CRLF combination",
4505                "\u{FEFF}2024-01-15 * \"Coffee\"\r\n  Assets:Bank  -5.00 USD\r\n  Expenses:Food\r\n",
4506            ),
4507            // Parse-error file — exercises the fallback. Without
4508            // the `errors.is_empty()` guard, the cache path would
4509            // emit text for ERROR_NODE-wrapped content while
4510            // `format_source` would drop those bytes; identity
4511            // would fail. The fallback delegates to
4512            // `format_source(source)` so identity holds.
4513            (
4514                "parse errors (exercises fallback)",
4515                "2024-01-15 * \"x\"\n  Assets:Bank  -5.00 USD\n}}}garbage\n",
4516            ),
4517            // Bare-`\r` (classic Mac) line terminators. The
4518            // `format_source` path normalizes bare-CR to LF via
4519            // `crlf_to_lf_outside_strings`, then parses cleanly.
4520            // `parse_via_cst` does NOT normalize bare-CR, so the
4521            // CST sees broken syntax and `parse_result.errors`
4522            // is non-empty — the fallback fires. Byte-identity
4523            // holds via the same `format_source` delegation.
4524            (
4525                "bare CR line terminators (exercises fallback)",
4526                "2024-01-01 open Assets:Bank USD\r2024-01-02 open Assets:Cash USD\r",
4527            ),
4528        ];
4529        for (label, source) in fixtures {
4530            let parse_result = crate::parse(source);
4531            let baseline = format_source(source);
4532            let cached = format_source_with_parsed(&parse_result, source);
4533            assert_eq!(
4534                cached, baseline,
4535                "format_source_with_parsed must match format_source for {label}: \
4536                 baseline {baseline:?}, cached {cached:?}",
4537            );
4538        }
4539    }
4540
4541    /// Mismatched-pair safety: in debug builds, passing a
4542    /// length-mismatched `(parse_result, source)` pair panics via
4543    /// the `debug_assert_eq!`. Release builds silently emit text
4544    /// for the wrong buffer (the producer-only invariant is the
4545    /// caller's responsibility, documented in
4546    /// `ParseResult::alignment`).
4547    #[cfg(debug_assertions)]
4548    #[test]
4549    #[should_panic(expected = "source` whose length doesn't match")]
4550    fn format_source_with_parsed_panics_on_length_mismatch() {
4551        let parse_result = crate::parse("2024-01-01 open Assets:Bank USD\n");
4552        // Different length — debug_assert fires.
4553        let _ = format_source_with_parsed(&parse_result, "different");
4554    }
4555}
rustledger_parser/cst/format.rs

rustledger_parser/cst/
format.rs