rustledger_parser/cst/format.rs
1//! Opinionated CST-backed formatter (phase 4.1 of #1262).
2//!
3//! [`format_source`] is a pure function `&str → String`: it
4//! reparses the input into a CST and emits text in one canonical
5//! form per AST shape. Two semantically-equivalent inputs produce
6//! byte-identical output; idempotence (`f(f(x)) == f(x)`) follows
7//! trivially.
8//!
9//! Replaces the pre-#1262 source-level formatter that took
10//! `(source, ParseResult, FormatConfig)` and re-emitted via the
11//! AST-driven `rustledger_core::format` path. Typed-directive
12//! synthesis (`rustledger_core::format::format_directives`) still
13//! lives in `rustledger-core` for callers that build a directive
14//! from scratch (e.g., `rledger add`, importer extract, FFI
15//! `format.entry`) — that's a different shape of input and is
16//! out of scope here.
17//!
18//! # Typed-directive emit: known coupling
19//!
20//! The typed-directive path is a two-pass shim: callers run
21//! `core::format::format_directives` to get bean-format-style text,
22//! then run that text back through [`format_source`] for the
23//! canonical pass. This keeps the FINAL byte sequence single-
24//! sourced (always emitted by this module), but it means
25//! `core::format` is permanently load-bearing as a parser-clean
26//! intermediate and every canonical-form rule needs the legacy
27//! emitter to produce SOMETHING the new parser accepts.
28//!
29//! Call sites (`rustledger-ffi-wasi::router::canonical_format_directives`,
30//! `rustledger::cmd::add_cmd::canonical_format_directive`,
31//! `rustledger::cmd::extract_cmd`) all guard the round-trip with
32//! an explicit `parse(&raw)` step that bails on parse errors, so a
33//! divergence between the two emitters surfaces as a hard error
34//! instead of silently dropping content.
35//!
36//! The eventual fix is a typed-directive emit path on this module
37//! (`format_directive(&Directive) -> String`) that bypasses the
38//! source-string round-trip. Tracked in a follow-up issue.
39//!
40//! # Canonical form (locked in the PR-decision comment on #1262)
41//!
42//! - Indent inside a directive body: 2 spaces. Tabs converted.
43//! - Blank lines between directives: preserved from the source
44//! (#1325). Grouped directives (consecutive `open`s, a `price`
45//! feed) stay grouped; the formatter does not insert or collapse
46//! blank lines, matching Python `bean-format`.
47//! - Blank lines inside a directive: 0.
48//! - Number lexical form: thousands separators dropped; user
49//! decimal-place count preserved.
50//! - Comment content: verbatim.
51//! - Comment positions: normalized to the attachment slot
52//! (header-trailing / inter-directive / body-internal /
53//! posting-trailing).
54//! - Cost spec spacing: `{cost CCY}` (no inner padding).
55//! - Tag/link order on a transaction header: source order, after
56//! the strings.
57//! - Trailing newline at EOF: always exactly one.
58//! - Line endings: LF; CRLF inputs normalized.
59//! - Leading BOM: dropped.
60//!
61//! No `FormatConfig` parameter. One canonical form, no knobs.
62
63use crate::cst::ast::{self, AstNode, AstToken, MetaEntry, SourceFile};
64
65/// Pre-computed alignment data for a whole source file.
66///
67/// Bean-format-style two-axis alignment. The **number field** is a
68/// fixed-width slot starting at column `number_col` and `number_width`
69/// chars wide, into which each posting's number / arithmetic
70/// expression is right-justified. Shorter numbers are left-padded
71/// with spaces, so the currency column (right after the field) is
72/// uniform across the whole file even when individual numbers have
73/// different widths or signs.
74///
75/// - `number_col` = INDENT + max(account width with optional `flag `) + 2
76/// - `number_width` = max rendered width of any posting's number /
77/// arithmetic expression (sign included)
78///
79/// `PostingAlignment` is `Copy` and `Default` (the all-zero state);
80/// the default is the alignment used for files that contain no
81/// postings (no transactions, or transactions with no AMOUNT).
82/// Marked `#[non_exhaustive]` so that a future column-derivation
83/// rule can add fields without breaking downstream consumers.
84///
85/// **Name choice.** The type is qualified by its semantic purpose
86/// (posting layout column widths) so the public path
87/// `rustledger_parser::format::PostingAlignment` doesn't compete
88/// with future generic "alignment" types (text justification,
89/// memory layout, etc.).
90#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
91#[non_exhaustive]
92pub struct PostingAlignment {
93 /// 0-indexed column at which the right-justified number field
94 /// starts.
95 pub number_col: usize,
96 /// Width of the number field; shorter numbers are left-padded
97 /// with spaces so the currency column stays uniform.
98 pub number_width: usize,
99}
100
101/// Two-space indent for directive bodies (postings, metadata).
102const INDENT: &str = " ";
103
104/// Format a Beancount source file in opinionated canonical form.
105///
106/// Reparses internally — callers that already have a CST in hand
107/// and want to avoid the double-parse can use [`format_node`].
108///
109/// Returns canonical text; output always ends with exactly one
110/// trailing newline (even for an empty file, where the output is
111/// just `"\n"`).
112///
113/// **Line-ending normalization runs BEFORE parsing.** The lexer
114/// does not treat bare `\r` as a line terminator, so a classic-
115/// Mac-authored `directive\r…\rdirective\r` would otherwise parse
116/// as a single broken directive and the rest of the user's ledger
117/// would be silently dropped. We normalize `\r\n` and bare `\r`
118/// to `\n` first, then parse — matching the canonical-form
119/// promise that line endings are LF-only on output.
120#[must_use]
121pub fn format_source(source: &str) -> String {
122 let (stripped, _had_bom) = crate::bom::strip_leading(source);
123 let normalized = crlf_to_lf_outside_strings(stripped);
124 let parsed = SourceFile::parse(&normalized);
125 format_node(parsed.syntax())
126}
127
128/// Like [`format_source`] but reuses the caller's
129/// [`crate::ParseResult`] instead of re-parsing `source`.
130///
131/// Skips both expensive pre-passes the bare `format_source` runs
132/// every call: the lex+parse from `SourceFile::parse(&normalized)`,
133/// and the `O(N_postings)` `compute_alignment` walk. Both pieces
134/// are already on `parse_result` (in `syntax_root` and
135/// `alignment` respectively, populated by `parse_via_cst`). For
136/// any consumer that already holds a `ParseResult` — the LSP
137/// `format_document` handler, the FFI `format.source` endpoint,
138/// the WASM `ParsedLedger::format` bridge — this entry skips two
139/// redundant traversals of the file.
140///
141/// **Output equivalence with `format_source`.** Pinned by
142/// `parse_result_alignment_cache::format_source_with_parsed_matches_format_source_under_fallback`
143/// (the fallback exercises broken sources) and
144/// `cst::format::tests::format_source_with_parsed_matches_format_source`
145/// (the cache path exercises clean sources) across LF / CRLF /
146/// BOM / parse-error / mixed-line-ending fixtures. The cache-
147/// path equivalence holds because the formatter rebuilds output
148/// from each directive's typed values rather than echoing
149/// trivia, so the CRLF-vs-LF difference in the underlying CST
150/// trivia never reaches the output. The fallback path is
151/// byte-trivially equivalent (it IS `format_source`).
152///
153/// **CRLF re-injection is still the caller's responsibility.**
154/// Same as `format_source`: this function always returns LF;
155/// LSP consumers that need to preserve CRLF for Windows-
156/// authored files call [`lf_to_crlf_outside_strings`] on the
157/// returned text.
158///
159/// **Parse-error fallback.** When `parse_result.errors` is
160/// non-empty, this function delegates to `format_source(source)`
161/// — losing the cache benefit but preserving byte-identity for
162/// inputs whose CST diverges from what `format_source`'s
163/// pre-parse normalization would produce. Concretely: bare-`\r`
164/// (classic Mac) line terminators are normalized to LF by
165/// `format_source` before parsing, but `parse_via_cst` does NOT
166/// normalize them — so the cached CST treats them as broken
167/// content and `parse_result.errors` is non-empty. The fallback
168/// path keeps the byte-identity claim total instead of
169/// "holds-only-when-clean".
170///
171/// **Stale `parse_result` is the caller's responsibility.** The
172/// producer-side cache invariant (see
173/// [`crate::ParseResult::alignment`] rustdoc) says
174/// `parse_result` must come from a fresh `parse(source)` with
175/// the same `source`. A `debug_assert_eq!` compares the CST's
176/// text length against `source.len() - bom_offset` to catch the
177/// most common mismatched-pair class (different documents have
178/// different lengths) in debug builds; release builds skip the
179/// check. Identical-length mismatches still pass silently —
180/// the rustdoc-level contract remains the source of truth.
181///
182/// # Panics
183///
184/// Panics if `parse_result.syntax_root` is not a `SOURCE_FILE`
185/// (always true for results produced by [`crate::parse`]).
186///
187/// In debug builds, panics on a `(parse_result, source)`
188/// length-mismatch via `debug_assert_eq!`. Release builds
189/// silently emit possibly-wrong output (the producer-only
190/// invariant is the caller's responsibility).
191#[must_use]
192pub fn format_source_with_parsed(parse_result: &crate::ParseResult, source: &str) -> String {
193 // Parse-error fallback. See the function rustdoc for the
194 // rationale: `parse_via_cst` does not run the same input
195 // normalization `format_source` does (no CRLF/bare-CR
196 // normalize), so for sources containing bare-`\r` line
197 // terminators the cached CST is wrong-shaped and the cache
198 // path would diverge from `format_source`. Delegating
199 // preserves byte-identity unconditionally.
200 if !parse_result.errors.is_empty() {
201 return format_source(source);
202 }
203 let node = parse_result.syntax_node();
204 // Defensive length check (debug-only). Catches the most
205 // common form of `(parse_result, source)` mismatched pair —
206 // different documents with different lengths. The CST's
207 // text range is BOM-stripped, so we add back the BOM bytes
208 // if the parser saw one.
209 //
210 // Computed outside the `debug_assert_eq!` to avoid clippy's
211 // `debug_assert_with_mut_call` (`syntax_node()` does an Arc
212 // bump, which clippy treats as state mutation in a debug
213 // context).
214 let cst_len =
215 usize::from(node.text_range().len()) + if parse_result.has_leading_bom { 3 } else { 0 };
216 debug_assert_eq!(
217 cst_len,
218 source.len(),
219 "format_source_with_parsed called with a `source` whose length doesn't \
220 match the CST stored in `parse_result`. The two arguments came from \
221 different documents — the cache path will emit text for the wrong \
222 buffer. See `ParseResult::alignment` rustdoc for the producer-only \
223 invariant.",
224 );
225 format_node_with_alignment(&node, parse_result.alignment)
226}
227
228/// Like [`format_source`], but returns the parse errors instead
229/// of silently formatting around them.
230///
231/// `format_source` is intentionally infallible — the canonical
232/// formatter must still emit *something* for a file the parser
233/// could only recover from. Tooling that wants to refuse to
234/// rewrite a file with parse errors (the `rledger format` CLI,
235/// the LSP `format` handler) previously had to call `parse`
236/// out-of-band, inspect `errors`, then call `format_source` on
237/// the SAME input — a contract two functions cooperated on
238/// implicitly, and the kind of pairing a future caller could
239/// easily forget. This helper makes the contract explicit.
240///
241/// Returns `Ok(formatted)` if and only if `parse(source).errors`
242/// would be empty. Otherwise returns the parse errors verbatim,
243/// in the same order the parser emitted them.
244///
245/// # Errors
246///
247/// Returns `Err(Vec<ParseError>)` containing every parse error
248/// the underlying [`parse`](crate::parse) call would surface for
249/// `source`. The caller decides whether to abort, render the
250/// errors, or fall back to a non-canonical pass.
251pub fn try_format_source(source: &str) -> Result<String, Vec<crate::ParseError>> {
252 let result = crate::parse(source);
253 if !result.errors.is_empty() {
254 return Err(result.errors);
255 }
256 // Reuse the parse + alignment we already produced for the
257 // error gate instead of letting `format_source` re-parse +
258 // re-walk every posting. Byte-identical output pinned by
259 // `format_source_with_parsed_matches_format_source`.
260 Ok(format_source_with_parsed(&result, source))
261}
262
263/// Convert every `\n` line terminator OUTSIDE string literals back
264/// to `\r\n`, leaving `\n` characters inside strings (and inside
265/// comments… see below) untouched.
266///
267/// The canonical form emitted by [`format_source`] is LF-only.
268/// Editors that round-trip Windows-authored files want to see CRLF
269/// echoed back on every line. This helper bridges the two by
270/// walking the canonical output with the shared `SourceState`
271/// state machine. The walker respects:
272///
273/// - String literals: bytes pass through verbatim. The user's
274/// original line endings inside a multi-line narration / note /
275/// document string are preserved.
276/// - Line comments (`;`, `%`, `#!`, `#+`): the comment's
277/// terminating newline IS a real structural line terminator, so
278/// it gets converted to CRLF; bytes inside the comment region
279/// (which can include arbitrary characters, notably stray `"`)
280/// pass through without flipping the in-string state. `#!` and
281/// `#+` open a comment at any column — the lexer's
282/// `SHEBANG` / `EMACS_DIRECTIVE` regexes carry no line-start
283/// anchor, and the state machine matches that classification.
284///
285/// The helper lives in this module rather than the LSP crate
286/// because its correctness depends on the lexer's `STRING` and
287/// comment rules. Keep it co-located with the formatter so a
288/// lexer change forces a co-evaluation here.
289#[must_use]
290pub fn lf_to_crlf_outside_strings(s: &str) -> String {
291 let mut out = String::with_capacity(s.len() + s.matches('\n').count());
292 // BOM is data, not classification input. We re-prepend it
293 // verbatim and let the body start fresh in Code state. The
294 // sibling crlf_to_lf_outside_strings does the same so the two
295 // walkers handle a leading-BOM file identically.
296 let (body, bom) = match s.strip_prefix('\u{FEFF}') {
297 Some(rest) => (rest, "\u{FEFF}"),
298 None => (s, ""),
299 };
300 out.push_str(bom);
301 let mut chars = body.chars().peekable();
302 let mut state = SourceState::Code;
303 let mut prev_was_backslash = false;
304 while let Some(ch) = chars.next() {
305 let peek = chars.peek().copied();
306 match state {
307 SourceState::InString => out.push(ch),
308 SourceState::InComment | SourceState::Code => {
309 if ch == '\n' {
310 out.push_str("\r\n");
311 } else {
312 out.push(ch);
313 }
314 }
315 }
316 state = advance_source_state(ch, peek, state, &mut prev_was_backslash);
317 }
318 out
319}
320
321/// Render typed Beancount `Directive`s in the canonical form
322/// emitted by [`format_source`].
323///
324/// Two-pass pipeline:
325///
326/// 1. Synthesize a source string via the typed-directive emitter
327/// in `rustledger_core::format::format_directives`. That
328/// emitter is `Directive → text`; its output is bean-format-
329/// style, parser-clean, and used here purely as an
330/// intermediate.
331/// 2. Re-parse the synthesized text. If the legacy emitter
332/// produced something the new parser cannot fully accept,
333/// return [`CanonicalizeError::ReparseFailed`] rather than
334/// silently emitting the recoverable subset — that silent-loss
335/// failure mode is what the older `crates/rustledger/tests/
336/// format_compat.rs` (deleted in phase 4.1, distinct from the
337/// phase 4.2 file-pair suite at `crates/rustledger-parser/
338/// tests/format_compat/`) used to guard against. The new file-
339/// pair suite exercises `format_source`, not this two-pass
340/// shim; a future change to `canonicalize_directives`'s error
341/// semantics needs its own dedicated regression test.
342/// 3. Run the re-parsed text through [`format_source`] for the
343/// canonical pass.
344///
345/// Single source of truth for the synthesize → canonicalize
346/// shim. Every consumer that builds a typed `Directive` in memory
347/// and wants canonical text — `rledger add`, `rledger extract`,
348/// the FFI `format.entry` / `format.entries` endpoints — should
349/// call this function instead of reinventing the pipeline.
350pub fn canonicalize_directives<'a, I>(
351 directives: I,
352 config: &rustledger_core::format::FormatConfig,
353) -> Result<String, CanonicalizeError>
354where
355 I: IntoIterator<Item = &'a rustledger_core::Directive>,
356 I::IntoIter: ExactSizeIterator,
357{
358 // Take the count off the ExactSizeIterator without
359 // collecting — the legacy emitter only walks the iterator
360 // once, so we don't need to materialize a Vec just to know
361 // how many directives the caller passed.
362 let iter = directives.into_iter();
363 let input_count = iter.len();
364 let raw = rustledger_core::format::format_directives(iter, config);
365 let parse_result = crate::parse(&raw);
366 if !parse_result.errors.is_empty() {
367 return Err(CanonicalizeError::ReparseFailed {
368 errors: parse_result
369 .errors
370 .iter()
371 .map(ToString::to_string)
372 .collect(),
373 });
374 }
375 // Count check covers the only Directive variants we have
376 // today (12, all of which surface on parse_result.directives).
377 // If a future `rustledger_core::Directive` variant is added
378 // that the parser routes to a different `ParseResult`
379 // collection (e.g., a typed Pushtag whose legacy text the
380 // parser puts on a `pragmas` field), this check needs to
381 // include that field too — otherwise a perfectly healthy
382 // round-trip would always report DirectiveCountMismatch. The
383 // compile-time `_directive_variant_fixture_coverage` match
384 // pins the variant set we're committed to here; any new
385 // variant breaks that match and surfaces this same
386 // maintenance need.
387 let reparsed_count = parse_result.directives.len();
388 if reparsed_count != input_count {
389 return Err(CanonicalizeError::DirectiveCountMismatch {
390 input: input_count,
391 reparsed: reparsed_count,
392 });
393 }
394 Ok(format_source(&raw))
395}
396
397/// Error returned by [`canonicalize_directives`].
398///
399/// Marked `#[non_exhaustive]` so that adding a future variant
400/// (e.g. a `CanonicalizationTimeout` for an async path, or a new
401/// guard for a future canonical-form rule) does not become a
402/// SemVer-breaking change. Consumers must use a `_ => …` arm.
403#[derive(Debug, Clone)]
404#[non_exhaustive]
405pub enum CanonicalizeError {
406 /// The synthesized intermediate failed to re-parse cleanly.
407 /// Carries the rendered error messages so callers can surface
408 /// a diagnostic; the source text itself is not retained
409 /// because it's an internal intermediate the caller has no
410 /// control over.
411 ReparseFailed {
412 /// One rendered message per parse error from the
413 /// intermediate text. Capped at the parser's own error
414 /// limit so this field is bounded.
415 errors: Vec<String>,
416 },
417 /// The synthesized intermediate parsed cleanly but produced a
418 /// different directive count than the input. This indicates
419 /// the legacy emitter and the new parser disagree on what
420 /// constitutes a directive — typically a future
421 /// `rustledger_core::Directive` variant whose legacy text the
422 /// CST parser silently swallows as comments / error-recovery
423 /// trivia. Without this guard, the call would round-trip to
424 /// truncated text with no error returned.
425 DirectiveCountMismatch {
426 /// Number of directives the caller passed in.
427 input: usize,
428 /// Number of directives the parser recovered from the
429 /// synthesized text.
430 reparsed: usize,
431 },
432}
433
434impl std::fmt::Display for CanonicalizeError {
435 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
436 match self {
437 Self::ReparseFailed { errors } => {
438 let preview: Vec<&str> = errors.iter().take(3).map(String::as_str).collect();
439 write!(
440 f,
441 "canonical formatter failed to re-parse the synthesized \
442 directive text ({} error(s)): {}",
443 errors.len(),
444 preview.join("; ")
445 )
446 }
447 Self::DirectiveCountMismatch { input, reparsed } => write!(
448 f,
449 "the canonical formatter could not emit {input} directive(s) \
450 without loss ({reparsed} survived the round-trip). This is \
451 an rledger bug; please report it with the input directives.",
452 ),
453 }
454 }
455}
456
457impl std::error::Error for CanonicalizeError {}
458
459/// Replace CRLF and bare-CR line terminators with LF, but ONLY
460/// outside string literals.
461///
462/// String literals (`"…"`) can contain raw `\r` and `\n` per the
463/// lexer's `STRING` rule; folding CR inside a string would mutate
464/// the user's data. Uses the shared `SourceState` state machine
465/// to track string / comment boundaries.
466///
467/// Cheap fast path: if the input contains no `\r`, returns the
468/// source slice borrowed (no allocation). Used by
469/// [`format_source`] before parsing so the lexer never has to see
470/// legacy line endings. Exposed publicly under [`crlf_to_lf_outside_strings`]
471/// for tooling (CLI `--diff`, format-equivalence checks) that
472/// needs the same string-aware normalization.
473pub fn crlf_to_lf_outside_strings(src: &str) -> std::borrow::Cow<'_, str> {
474 if !src.contains('\r') {
475 return std::borrow::Cow::Borrowed(src);
476 }
477 // Re-prepend the BOM verbatim and let the body start fresh in
478 // Code state. The state machine no longer needs line-start
479 // tracking — the lexer's `SHEBANG` / `EMACS_DIRECTIVE` regexes
480 // have no line-start anchor, so `#!`/`#+` open a comment at
481 // any column, and the state machine mirrors that.
482 let (body, bom) = match src.strip_prefix('\u{FEFF}') {
483 Some(rest) => (rest, "\u{FEFF}"),
484 None => (src, ""),
485 };
486 let mut out = String::with_capacity(src.len());
487 out.push_str(bom);
488 let mut chars = body.chars().peekable();
489 let mut state = SourceState::Code;
490 let mut prev_was_backslash = false;
491 while let Some(ch) = chars.next() {
492 let peek = chars.peek().copied();
493 match state {
494 SourceState::InString => out.push(ch),
495 _ => {
496 if ch == '\r' {
497 out.push('\n');
498 if peek == Some('\n') {
499 chars.next();
500 }
501 } else {
502 out.push(ch);
503 }
504 }
505 }
506 state = advance_source_state(ch, peek, state, &mut prev_was_backslash);
507 }
508 std::borrow::Cow::Owned(out)
509}
510
511/// `true` iff `src` contains at least one `\r` byte OUTSIDE a
512/// string literal — i.e. the byte sequence the canonical
513/// formatter would fold to `\n` via
514/// [`crlf_to_lf_outside_strings`].
515///
516/// This is the explicit predicate companion to the Cow return of
517/// [`crlf_to_lf_outside_strings`]. Tooling that only needs to
518/// know whether the fold would change bytes (the CLI `--diff`
519/// "CR-bearing line endings folded" cause line, the LSP
520/// did-the-formatter-touch-this guard) should call this instead
521/// of matching on `Cow::Owned`, which conflates allocation with
522/// semantic change. A future optimization that pre-allocated the
523/// Cow even on a no-op fold would silently invert that
524/// match-on-Cow guard; this predicate keeps the question
525/// answered by the bytes, not by allocation behavior.
526#[must_use]
527pub fn cr_outside_strings_present(src: &str) -> bool {
528 if !src.contains('\r') {
529 return false;
530 }
531 let body = src.strip_prefix('\u{FEFF}').unwrap_or(src);
532 let mut chars = body.chars().peekable();
533 let mut state = SourceState::Code;
534 let mut prev_was_backslash = false;
535 while let Some(ch) = chars.next() {
536 let peek = chars.peek().copied();
537 if matches!(state, SourceState::Code | SourceState::InComment) && ch == '\r' {
538 return true;
539 }
540 state = advance_source_state(ch, peek, state, &mut prev_was_backslash);
541 }
542 false
543}
544
545/// Per-character walker state for line-ending normalization passes
546/// that must respect string-literal and comment boundaries.
547///
548/// Used by both line-ending helpers: a flat `is_in_string` boolean
549/// is not enough because a quote character inside a `;`/`%` /
550/// `#!` / `#+` comment is data, not a string delimiter.
551#[derive(Debug, Clone, Copy, PartialEq, Eq)]
552enum SourceState {
553 /// In normal code. `"` opens a string; `;` / `%` / `#!` /
554 /// `#+` opens a comment; everything else is just bytes.
555 Code,
556 /// Inside `"…"`. Bytes pass through; an unescaped `"` exits.
557 InString,
558 /// Inside `;…\n`, `%…\n`, `#!…\n`, or `#+…\n`. Bytes pass
559 /// through until LF/CR.
560 InComment,
561}
562
563/// One-step state transition shared by both line-ending helpers.
564///
565/// Returns the state AFTER consuming `ch`. The string-escape
566/// bookkeeping (`prev_was_backslash`) updates in place. Comment
567/// opener detection covers all four line-comment lexemes: `;` and
568/// `%` open a comment unconditionally; `#!` and `#+` open one at
569/// any column — the lexer's `#![^\n\r]*` / `#\+[^\n\r]*` regexes
570/// have NO line-start anchor, so a mid-line `#!` or `#+` is still
571/// a `SHEBANG` / `EMACS_DIRECTIVE` token. A `#` followed by
572/// anything else is a `TAG` / `HASH` token, not a comment.
573const fn advance_source_state(
574 ch: char,
575 peek: Option<char>,
576 state: SourceState,
577 prev_was_backslash: &mut bool,
578) -> SourceState {
579 match state {
580 SourceState::InString => {
581 let is_close = ch == '"' && !*prev_was_backslash;
582 *prev_was_backslash = ch == '\\' && !*prev_was_backslash;
583 if is_close {
584 SourceState::Code
585 } else {
586 SourceState::InString
587 }
588 }
589 SourceState::InComment => {
590 if matches!(ch, '\n' | '\r') {
591 SourceState::Code
592 } else {
593 SourceState::InComment
594 }
595 }
596 SourceState::Code => {
597 let is_hash_line_comment = ch == '#' && matches!(peek, Some('!' | '+'));
598 if ch == '"' {
599 *prev_was_backslash = false;
600 SourceState::InString
601 } else if matches!(ch, ';' | '%') || is_hash_line_comment {
602 SourceState::InComment
603 } else {
604 SourceState::Code
605 }
606 }
607 }
608}
609
610/// Format a `SOURCE_FILE` syntax node in opinionated canonical form.
611///
612/// The bare-node entry for callers that already parsed the CST
613/// (typically LSP formatting providers). Output rules are the
614/// same as [`format_source`].
615///
616/// Internally runs [`compute_alignment`] on `node` to derive the
617/// file-wide column targets. Hot paths that hold a precomputed
618/// `PostingAlignment` (e.g., via [`crate::ParseResult::alignment`]) should
619/// call [`format_node_with_alignment`] instead to skip the
620/// per-call walk. Equivalence pinned by
621/// `format_node_equals_format_node_with_alignment` in this file's
622/// tests.
623#[must_use]
624pub fn format_node(node: &crate::SyntaxNode) -> String {
625 let source_file =
626 SourceFile::cast(node.clone()).expect("format_node called on non-SOURCE_FILE node");
627 let alignment = compute_alignment(&source_file);
628 format_node_with_alignment(node, alignment)
629}
630
631/// Like [`format_node`] but skips the per-call
632/// [`compute_alignment`] walk by accepting a precomputed
633/// `PostingAlignment`.
634///
635/// The cache pattern: parse → take `ParseResult::alignment` (the
636/// pre-computed file-wide alignment, populated by `parse_via_cst`)
637/// → call this function. Subsequent formatting calls on the same
638/// `ParseResult` pay only the per-call emit cost, not the
639/// `O(N_postings)` pre-pass.
640///
641/// `alignment` MUST match what `compute_alignment(&SourceFile::cast(node).unwrap())` would
642/// return for the given `node` — passing a mismatched alignment
643/// is allowed but produces output with non-canonical column
644/// widths. Use `PostingAlignment::default()` for files known to have no
645/// postings (no transactions, or transactions with no AMOUNT).
646///
647/// # Panics
648///
649/// Panics if `node`'s kind is not `SOURCE_FILE`.
650#[must_use]
651pub fn format_node_with_alignment(node: &crate::SyntaxNode, alignment: PostingAlignment) -> String {
652 // Precondition check (debug-only). The bare `format_node`
653 // delegate already validated the kind via the
654 // `SourceFile::cast` it performs for `compute_alignment`, so
655 // for the most common call path (bare → with_alignment) the
656 // debug_assert is a redundant no-op in release. External
657 // direct callers of this entry point (FFI, future LSP
658 // handlers calling `format_node_with_alignment` with a
659 // `parse_result.alignment` cache) get the panic in debug
660 // builds; in release, a wrong-kind `node` produces empty or
661 // malformed output rather than panicking — acceptable for
662 // a precondition that's guaranteed by the call's typed
663 // contract.
664 debug_assert_eq!(
665 node.kind(),
666 crate::SyntaxKind::SOURCE_FILE,
667 "format_node_with_alignment called on non-SOURCE_FILE node (got {:?})",
668 node.kind(),
669 );
670 let mut out = String::new();
671 // Walk every direct child in source order so file-level comments
672 // (file-leading per phase-2.0 trivia attachment, plus file-
673 // trailing) interleave correctly with directives. Inter-directive
674 // and same-line trailing comments live INSIDE the next/owning
675 // directive and surface from `emit_directive`'s leading-trivia
676 // pass.
677 //
678 // Blank-line policy at the top level: PRESERVE the author's blank
679 // lines between directives rather than normalizing to exactly one.
680 // Between two directives, emit as many blank lines as the source
681 // had — including zero, so deliberately grouped runs (consecutive
682 // `open`s, a dense `price` feed) stay grouped instead of being
683 // double-spaced (#1325). This matches Python `bean-format` and the
684 // rest of the beancount formatter lineage (fava,
685 // beancount-language-server, beancount-mode), all of which leave
686 // blank-line structure untouched and only realign amounts.
687 //
688 // Adjacent file-level comments still stay tight as a group (so a
689 // `; ====\n; HEADER\n; ====` section header keeps its visual
690 // grouping), and a comment group sitting against a directive on
691 // either side stays flush.
692 let mut prev_was_directive = false;
693 for el in node.children_with_tokens() {
694 match el {
695 rowan::NodeOrToken::Node(n) => {
696 if let Some(directive) = ast::Directive::cast(n.clone()) {
697 if prev_was_directive {
698 for _ in 0..leading_blank_lines(directive.syntax()) {
699 out.push('\n');
700 }
701 }
702 emit_directive(&directive, alignment, &mut out);
703 prev_was_directive = true;
704 } else if n.kind() == crate::SyntaxKind::ERROR_NODE {
705 // Preserve unparsable content verbatim (#1335): `format`
706 // must never delete the author's text. Org-mode `*`
707 // section headers (and any comments grouped with them)
708 // parse into ERROR_NODEs; emit them as-is rather than
709 // dropping them. Treated like a directive for spacing — an
710 // ERROR_NODE is a top-level content block, so the author's
711 // blank lines around it (before it, and before the next
712 // directive) are preserved, not flushed.
713 if prev_was_directive {
714 for _ in 0..leading_blank_lines(&n) {
715 out.push('\n');
716 }
717 }
718 emit_error_node(&n, &mut out);
719 prev_was_directive = true;
720 }
721 // Any other non-directive node: nothing to emit.
722 }
723 rowan::NodeOrToken::Token(t) => {
724 if matches!(
725 t.kind(),
726 crate::SyntaxKind::COMMENT
727 | crate::SyntaxKind::PERCENT_COMMENT
728 | crate::SyntaxKind::SHEBANG
729 | crate::SyntaxKind::EMACS_DIRECTIVE
730 ) {
731 out.push_str(t.text().trim_end_matches(['\n', '\r']));
732 out.push('\n');
733 prev_was_directive = false;
734 }
735 }
736 }
737 }
738 if !out.ends_with('\n') {
739 out.push('\n');
740 }
741 out
742}
743
744/// Format the subset of `node`'s top-level children that intersect
745/// `range`, returning the snapped byte range and the canonical-form
746/// replacement text.
747///
748/// This is the building block for the LSP `textDocument/rangeFormatting`
749/// provider: the client sends a `Range`, the server snaps it up to
750/// the smallest set of top-level structural nodes (directives or
751/// standalone comments) that intersect the selection, formats those
752/// nodes the same way [`format_node`] formats the whole file, and
753/// returns a single `TextEdit` replacing the snapped range. The
754/// alternative — formatting a substring of the source — would have
755/// to either invent a partial canonical form (creating a second
756/// truth alongside the whole-file canonical form, the failure mode
757/// that bit #1252) or refuse to format anything that crosses a
758/// structural boundary. Snapping up to top-level boundaries is the
759/// only choice that lets the same canonical-form rules apply.
760///
761/// **Frame.** `range` is in the *CST* byte frame — the same frame
762/// the syntax node's `TextRange`s use. The LSP handler is
763/// responsible for shifting `bom_offset` at the input/output
764/// boundary (mirrors the [`super::super::SyntaxNode`] /
765/// `selection_range` handler convention; see
766/// `ParseResult::syntax_root` rustdoc for the rationale).
767///
768/// **Behavior.**
769///
770/// - If `range` intersects no top-level Directive or standalone
771/// COMMENT/SHEBANG/EMACS token, returns `None`. The LSP handler
772/// surfaces `None` directly (serialized as `null` per LSP, not
773/// as `[]`); the client treats it as "nothing to format".
774/// - If the computed snap range would cover any top-level
775/// `ERROR_NODE` byte, returns `None`. **Range formatting refuses
776/// to delete user content the parser couldn't classify.** This
777/// diverges from [`format_node`], which silently drops
778/// `ERROR_NODE` children on the whole-file path; the rationale
779/// is the per-handler asymmetry the LSP exposes — the user
780/// pressing "Format Selection" expects either a clean
781/// reformat or a no-op, never a silent partial delete of an
782/// in-progress directive. Tooling that genuinely wants to drop
783/// broken regions can still call [`format_node`] on the same
784/// node.
785/// - Otherwise returns `Some((snap, text))` where `snap` is the
786/// union of the included children's text ranges (so it begins at
787/// the first included child's start and ends at the last
788/// included child's end, including each child's leading-trivia
789/// prefix per the phase-2.0 Directive-Terminator Rule) and
790/// `text` is the canonical-form replacement.
791/// - Cursor-only selection (`range.is_empty()`): the child at the
792/// cursor is included if the cursor is strictly inside it OR is
793/// exactly at the child's start. Boundary at the child's end
794/// belongs to the next child, not the previous one — matches
795/// the standard "end-of-line cursor is start-of-next-line"
796/// convention.
797///
798/// **Posting alignment.** The pre-pass uses the FULL `SourceFile`, not
799/// the selected subset. A selection that formats one transaction
800/// in a file with many other transactions inherits the file's
801/// alignment columns, so the formatted output stays visually
802/// aligned with un-formatted postings elsewhere. The opposite
803/// policy (per-selection alignment) would create a jarring
804/// visual jump every time the user re-formats a sub-range.
805///
806/// **Round-trip invariant.** For any `range` that contains every
807/// top-level child, the returned text equals the result of
808/// [`format_node`] on the same node. Pinned by
809/// `format_node_range_full_range_matches_format_node` in this
810/// file's test module.
811///
812/// # Panics
813///
814/// Panics if `node`'s kind is not `SOURCE_FILE` — same precondition
815/// as [`format_node`].
816#[must_use]
817pub fn format_node_range(
818 node: &crate::SyntaxNode,
819 range: rowan::TextRange,
820) -> Option<(rowan::TextRange, String)> {
821 let source_file =
822 SourceFile::cast(node.clone()).expect("format_node_range called on non-SOURCE_FILE node");
823 // File-wide alignment pre-pass: see rustdoc above for the
824 // rationale. The selected subset always uses the full file's
825 // alignment columns. Hot paths with a precomputed `PostingAlignment`
826 // should call `format_node_range_with_alignment` instead.
827 let alignment = compute_alignment(&source_file);
828 format_node_range_with_alignment(node, range, alignment)
829}
830
831/// Like [`format_node_range`] but skips the per-call
832/// [`compute_alignment`] walk by accepting a precomputed
833/// `PostingAlignment`.
834///
835/// The cache pattern is identical to
836/// [`format_node_with_alignment`]: parse → take
837/// `ParseResult::alignment` → call this function. The hot path the
838/// cache addresses is the LSP `textDocument/rangeFormatting`
839/// fallback (CST-snap path that fires on parse-error files), which
840/// can be invoked per-keystroke through format-on-type clients.
841/// Without the cache the per-call cost is
842/// `O(N_postings_in_file)`; with the cache it's
843/// `O(N_cst_nodes covered by range)`.
844///
845/// `alignment` MUST match what `compute_alignment(&SourceFile::cast(node).unwrap())` would
846/// return for the given `node`; pinned by
847/// `format_node_range_matches_format_node_range_with_alignment`. Same
848/// `range` semantics, `ERROR_NODE` policy, snap rules, and
849/// `# Panics` precondition as [`format_node_range`].
850#[must_use]
851pub fn format_node_range_with_alignment(
852 node: &crate::SyntaxNode,
853 range: rowan::TextRange,
854 alignment: PostingAlignment,
855) -> Option<(rowan::TextRange, String)> {
856 // Precondition check (debug-only). Same rationale as
857 // `format_node_with_alignment`: the bare delegate already
858 // validated the kind, so the most common call path (bare →
859 // with_alignment) gets no release-build cost from this
860 // assert. External direct callers — the LSP range_formatting
861 // fallback, FFI, future format-on-type — get a debug-build
862 // panic; release-build wrong-kind input produces no output
863 // (rather than panicking).
864 debug_assert_eq!(
865 node.kind(),
866 crate::SyntaxKind::SOURCE_FILE,
867 "format_node_range_with_alignment called on non-SOURCE_FILE node (got {:?})",
868 node.kind(),
869 );
870
871 // First pass: identify the included children and the snap range.
872 // We pick:
873 // - Directive nodes whose `text_range` intersects `range`
874 // - top-level COMMENT/PERCENT_COMMENT/SHEBANG/EMACS_DIRECTIVE
875 // tokens whose range intersects `range`
876 // ERROR_NODE and other non-Directive nodes are skipped (matches
877 // `format_node`); a selection that lands only on them returns
878 // None below.
879 let mut snap_start: Option<rowan::TextSize> = None;
880 let mut snap_end: Option<rowan::TextSize> = None;
881 let mut any_included = false;
882 for el in node.children_with_tokens() {
883 let (kind, child_range) = (el.kind(), el.text_range());
884 let is_formattable = match &el {
885 rowan::NodeOrToken::Node(n) => ast::Directive::cast(n.clone()).is_some(),
886 rowan::NodeOrToken::Token(_) => matches!(
887 kind,
888 crate::SyntaxKind::COMMENT
889 | crate::SyntaxKind::PERCENT_COMMENT
890 | crate::SyntaxKind::SHEBANG
891 | crate::SyntaxKind::EMACS_DIRECTIVE
892 ),
893 };
894 if !is_formattable {
895 continue;
896 }
897 if !range_intersects(child_range, range) {
898 continue;
899 }
900 any_included = true;
901 snap_start = Some(snap_start.map_or(child_range.start(), |s| s.min(child_range.start())));
902 snap_end = Some(snap_end.map_or(child_range.end(), |e| e.max(child_range.end())));
903 }
904 if !any_included {
905 return None;
906 }
907 let snap = rowan::TextRange::new(snap_start.unwrap(), snap_end.unwrap());
908
909 // ERROR_NODE intersection bail: if the snap range covers any
910 // top-level ERROR_NODE byte, refuse to format and return None.
911 // Range formatting must not silently delete content the parser
912 // could not classify — without this guard, a selection
913 // spanning two valid directives with an ERROR_NODE between
914 // them would emit a TextEdit that replaces all three with
915 // just the two formatted directives, deleting the user's
916 // in-progress source bytes.
917 //
918 // This is the deliberate divergence from `format_node`'s
919 // whole-file policy: the whole-file path runs on the
920 // assumption that the caller (CLI / FFI / `try_format_source`)
921 // has already decided to accept content loss; the per-handler
922 // LSP path has no such opt-in. The cost is occasional
923 // "format-selection did nothing" UX while a parse error sits
924 // inside the snap; the benefit is no data loss.
925 for el in node.children_with_tokens() {
926 if !matches!(el.kind(), crate::SyntaxKind::ERROR_NODE) {
927 continue;
928 }
929 let er = el.text_range();
930 // Strict-overlap check: an ERROR_NODE whose end touches
931 // snap.start (or start touches snap.end) is adjacent, not
932 // overlapping — those are safe to emit alongside.
933 if er.end() > snap.start() && er.start() < snap.end() {
934 return None;
935 }
936 }
937
938 // Second pass: emit only the children whose range falls
939 // inside `snap`. We re-walk rather than caching the first
940 // pass because the second pass needs to maintain the
941 // `prev_was_directive` blank-line state in source order, and
942 // the child set is small enough that the second walk is
943 // cheap. (Re-walking also keeps the data-flow obvious: snap
944 // computation and emission are two distinct concerns.)
945 let mut out = String::new();
946 let mut prev_was_directive = false;
947 for el in node.children_with_tokens() {
948 let child_range = el.text_range();
949 // Use the snap range (not the input `range`) so we emit
950 // every child WITHIN the snap, even those that the
951 // original selection didn't directly intersect but that
952 // sit between two intersecting children. Without this,
953 // ERROR_NODE-free trivia between two selected directives
954 // would be re-formatted into our output (the comment
955 // pass picks them up), which matches `format_node`.
956 if child_range.end() <= snap.start() || child_range.start() >= snap.end() {
957 continue;
958 }
959 match el {
960 rowan::NodeOrToken::Node(n) => {
961 // ERROR_NODEs never reach here: the range path bails out
962 // above (returns None) when the snap covers one, so it
963 // refuses to format rather than risk touching unparsable
964 // content. Only the whole-file path preserves them verbatim.
965 let Some(directive) = ast::Directive::cast(n) else {
966 continue;
967 };
968 // Preserve the author's inter-directive blank lines
969 // (#1325), identically to `format_node_with_alignment`,
970 // so range formatting and whole-file formatting agree.
971 //
972 // The FIRST directive emitted from the snap needs care:
973 // its predecessor may sit OUTSIDE the selection, but the
974 // blank lines between them are this directive's leading
975 // trivia (the Directive-Terminator Rule), so they fall
976 // INSIDE the snapped range. Dropping them would delete
977 // the blank line above the selection. Emit them whenever
978 // a directive precedes this one in the file — the same
979 // condition the whole-file path expresses as
980 // `prev_was_directive`. For the file's first directive
981 // (no predecessor) there is nothing to preserve.
982 let preceded_by_directive = prev_was_directive
983 || directive
984 .syntax()
985 .prev_sibling()
986 .and_then(ast::Directive::cast)
987 .is_some();
988 if preceded_by_directive {
989 for _ in 0..leading_blank_lines(directive.syntax()) {
990 out.push('\n');
991 }
992 }
993 emit_directive(&directive, alignment, &mut out);
994 prev_was_directive = true;
995 }
996 rowan::NodeOrToken::Token(t) => {
997 if matches!(
998 t.kind(),
999 crate::SyntaxKind::COMMENT
1000 | crate::SyntaxKind::PERCENT_COMMENT
1001 | crate::SyntaxKind::SHEBANG
1002 | crate::SyntaxKind::EMACS_DIRECTIVE
1003 ) {
1004 out.push_str(t.text().trim_end_matches(['\n', '\r']));
1005 out.push('\n');
1006 prev_was_directive = false;
1007 }
1008 }
1009 }
1010 }
1011 if !out.ends_with('\n') {
1012 out.push('\n');
1013 }
1014 Some((snap, out))
1015}
1016
1017/// Whether `child` (a CST node's text range) intersects the
1018/// caller's selection. Zero-width selections (a cursor with no
1019/// extent) are handled specially: the cursor counts as "inside"
1020/// a child if the cursor is strictly inside the child's range or
1021/// is exactly at the child's start. Boundary at the child's end
1022/// is NOT a match — it belongs to the next child, matching
1023/// editors' "end-of-line cursor = start of next line" convention.
1024fn range_intersects(child: rowan::TextRange, sel: rowan::TextRange) -> bool {
1025 if sel.is_empty() {
1026 child.contains(sel.start()) || sel.start() == child.start()
1027 } else {
1028 child.start() < sel.end() && sel.start() < child.end()
1029 }
1030}
1031
1032/// Compute the file-wide alignment columns for a parsed `SourceFile`.
1033///
1034/// Walks every Transaction's postings once, takes the max LHS
1035/// width (account + optional `flag `) and max number-text width,
1036/// and derives the column targets from them.
1037///
1038/// **`O(N_postings)`.** Public so consumers can pre-compute the
1039/// alignment once (typically at parse time) and pass the cached
1040/// `PostingAlignment` into [`format_node_with_alignment`] or
1041/// [`format_node_range_with_alignment`] — eliminates the per-call
1042/// walk in hot formatting paths (LSP format-on-type through a
1043/// parse error, repeat-format scripts, etc.).
1044///
1045/// **Tree-shape precondition.** `sf` must be a `SourceFile` whose
1046/// CST was produced by `parse_structured` (directly or transitively
1047/// via `parse_via_cst` / `parse`). Hand-built partial trees (e.g.,
1048/// a `GreenNodeBuilder` invocation for snippet formatting) silently
1049/// return `PostingAlignment::default()` because their wrapping
1050/// nodes fail the `ast::Directive::Transaction::cast` check.
1051/// Likewise, transactions wrapped in `ERROR_NODE` by mid-edit
1052/// error recovery are excluded — see
1053/// `parse_result_alignment_cache::mid_transaction_error_node` for
1054/// the pinned behavior. The function never panics on a partial
1055/// tree; it just returns the all-zero alignment for the no-postings
1056/// case.
1057///
1058/// **Pinning the contract.** `ParseResult::alignment` is populated
1059/// by calling this function during `parse_via_cst`; the equivalence
1060/// between the cached value and a fresh call is guaranteed by the
1061/// `parse_result_alignment_cache::*` regression tests (7 fixtures) in
1062/// this module.
1063#[must_use]
1064pub fn compute_alignment(sf: &SourceFile) -> PostingAlignment {
1065 let mut max_lhs: usize = 0;
1066 let mut max_num: usize = 0;
1067 // Tracks postings that actually render a number — the only ones that
1068 // participate in alignment. A file whose postings render no numbers
1069 // gets `PostingAlignment::default()`, matching the type docs.
1070 let mut any_aligned_posting = false;
1071 for directive in sf.directives() {
1072 let ast::Directive::Transaction(t) = directive else {
1073 continue;
1074 };
1075 for child in t.syntax().children() {
1076 let Some(p) = ast::Posting::cast(child) else {
1077 continue;
1078 };
1079 let mut lhs = 0usize;
1080 if let Some(flag) = p.flag() {
1081 lhs += flag.text().chars().count() + 1; // `! ` etc.
1082 }
1083 if let Some(account) = p.account() {
1084 lhs += account.text().chars().count();
1085 }
1086
1087 // Only postings that render a number drive the alignment
1088 // column. `bean-format` computes the number column from the
1089 // prefixes of number-bearing lines only, so two kinds of
1090 // posting must NOT push the column right:
1091 // - amount-less postings (the elided balancing leg, or a
1092 // long account with no amount), and
1093 // - currency-only amounts (`Assets:Cash USD`), which
1094 // `emit_posting` prints with no number at all.
1095 // Counting either is why `rledger format` and `bean-format`
1096 // disagreed and round-tripping never converged (issue #1290).
1097 // `amount_number_text` is the shared predicate that keeps
1098 // this pre-pass in lockstep with `emit_posting`.
1099 if let Some(amt) = p.amount()
1100 && let Some(text) = amount_number_text(&amt)
1101 {
1102 any_aligned_posting = true;
1103 max_lhs = max_lhs.max(lhs);
1104 max_num = max_num.max(text.chars().count());
1105 }
1106 }
1107 }
1108 if !any_aligned_posting {
1109 return PostingAlignment::default();
1110 }
1111 // 2 spaces between the longest account end and the number field,
1112 // matching the conventional Beancount layout.
1113 PostingAlignment {
1114 number_col: INDENT.len() + max_lhs + 2,
1115 number_width: max_num,
1116 }
1117}
1118
1119/// The rendered number / arithmetic-expression text of an amount *if it
1120/// renders a number*, or `None` when it renders nothing (a currency-only
1121/// amount like `USD`, whose value text is empty). EXCLUDES the trailing
1122/// currency; sign (if any) is included.
1123///
1124/// This is the single source of truth for "does this posting line have a
1125/// number?". Both the file-wide alignment pre-pass ([`compute_alignment`])
1126/// and the emitter ([`emit_posting`]) consult it, so they can never
1127/// disagree about which postings participate in alignment — the bug
1128/// class behind #1290 (amount-less postings) and its currency-only
1129/// sibling.
1130fn amount_number_text(amt: &ast::Amount) -> Option<String> {
1131 let text = amount_value_text(amt);
1132 (!text.is_empty()).then_some(text)
1133}
1134
1135/// Render an amount's value portion (number or arithmetic
1136/// expression) as a string, EXCLUDING the trailing currency.
1137/// Mirrors the value half of [`format_amount`].
1138fn amount_value_text(amt: &ast::Amount) -> String {
1139 let mut buf = String::new();
1140 if amt.is_arithmetic() {
1141 emit_amount_subnode_expression(amt.syntax(), &mut buf);
1142 return buf;
1143 }
1144 if let Some(sign) = amt.sign()
1145 && sign.is_minus()
1146 {
1147 buf.push('-');
1148 }
1149 if let Some(n) = amt.number() {
1150 buf.push_str(&canonical_number(n.text()));
1151 }
1152 buf
1153}
1154
1155fn emit_directive(d: &ast::Directive, align: PostingAlignment, out: &mut String) {
1156 // Leading inter-directive trivia: COMMENT tokens that sit
1157 // BEFORE the directive's first content token. Per phase-2.0
1158 // trivia attachment, these live inside the directive's syntax
1159 // node — emit them as their own lines BEFORE the canonical
1160 // content.
1161 emit_leading_comments(d.syntax(), out);
1162
1163 // Capture an optional same-line trailing comment so we can
1164 // splice it back in immediately before the directive's
1165 // terminating NEWLINE — see the comment-aware emit loop at
1166 // the bottom of this function.
1167 let trailing = collect_trailing_comment(d.syntax());
1168
1169 let len_before = out.len();
1170 match d {
1171 ast::Directive::Open(d) => emit_open(d, out),
1172 ast::Directive::Close(d) => emit_close(d, out),
1173 ast::Directive::Commodity(d) => emit_commodity(d, out),
1174 ast::Directive::Note(d) => emit_note(d, out),
1175 ast::Directive::Event(d) => emit_event(d, out),
1176 ast::Directive::Query(d) => emit_query(d, out),
1177 ast::Directive::Pad(d) => emit_pad(d, out),
1178 ast::Directive::Document(d) => emit_document(d, out),
1179 ast::Directive::Price(d) => emit_price(d, out),
1180 ast::Directive::Balance(d) => emit_balance(d, out),
1181 ast::Directive::Custom(d) => emit_custom(d, out),
1182 ast::Directive::Option(d) => emit_option(d, out),
1183 ast::Directive::Include(d) => emit_include(d, out),
1184 ast::Directive::Plugin(d) => emit_plugin(d, out),
1185 ast::Directive::Pushtag(d) => emit_pushtag(d, out),
1186 ast::Directive::Poptag(d) => emit_poptag(d, out),
1187 ast::Directive::Pushmeta(d) => emit_pushmeta(d, out),
1188 ast::Directive::Popmeta(d) => emit_popmeta(d, out),
1189 ast::Directive::Transaction(d) => emit_transaction(d, align, out),
1190 }
1191 // Splice the same-line trailing comment in: find the FIRST '\n'
1192 // after `len_before` (= end of the directive's header line in
1193 // the emitted bytes) and insert `" ; comment"` before it. For
1194 // single-line directives the first '\n' is also the only one
1195 // and this lands the comment on the directive line. For multi-
1196 // line transactions it lands the comment on the header line
1197 // (where the source had it), not after the body.
1198 if let Some(c) = trailing
1199 && let Some(newline_rel) = out[len_before..].find('\n')
1200 {
1201 let insert_at = len_before + newline_rel;
1202 let mut splice = String::with_capacity(c.len() + 1);
1203 splice.push(' ');
1204 splice.push_str(&c);
1205 out.insert_str(insert_at, &splice);
1206 }
1207}
1208
1209/// Emit an `ERROR_NODE`'s text verbatim, so `format` never deletes content it
1210/// could not parse (#1335) — chiefly org-mode `*` section headers and the
1211/// comments grouped with them. Only trailing whitespace per line is stripped
1212/// (the formatter's no-trailing-space policy) and the node's trailing newlines
1213/// are collapsed to one; everything else — including blank lines, comments and
1214/// the unparsable lines themselves — is preserved exactly as written.
1215fn emit_error_node(node: &crate::SyntaxNode, out: &mut String) {
1216 let text = node.text().to_string();
1217 // Trim leading AND trailing blank lines: the caller emits the leading
1218 // blank lines (via `leading_blank_lines`) so emitting them here too would
1219 // double-count them and break idempotence. Internal blank lines and the
1220 // content (org headers, grouped comments) are preserved.
1221 for line in text.trim_matches(['\n', '\r']).split('\n') {
1222 out.push_str(line.trim_end());
1223 out.push('\n');
1224 }
1225}
1226
1227/// Number of blank lines the author left immediately before this
1228/// directive's first visible line (its leading comment, if any, else
1229/// its content). Each NEWLINE in the leading trivia that precedes the
1230/// first comment / content token is exactly one blank line: the
1231/// previous directive owns its own terminator NEWLINE (the Directive-
1232/// Terminator Rule), so this node's leading NEWLINEs are purely the
1233/// blank gap, with no off-by-one. WHITESPACE-only "blank" lines count
1234/// too (the NEWLINE that ends them is included). Scanning stops at the
1235/// first comment or content token, so a blank line sitting *between* a
1236/// leading comment and the directive's content is not counted here
1237/// (that gap is collapsed by `emit_leading_comments`, as before).
1238fn leading_blank_lines(node: &crate::SyntaxNode) -> usize {
1239 let mut blanks = 0;
1240 for el in node.children_with_tokens() {
1241 let rowan::NodeOrToken::Token(t) = el else {
1242 break;
1243 };
1244 match t.kind() {
1245 crate::SyntaxKind::NEWLINE => blanks += 1,
1246 crate::SyntaxKind::WHITESPACE => {}
1247 // First comment or content token — past the leading gap.
1248 _ => break,
1249 }
1250 }
1251 blanks
1252}
1253
1254/// Walk the directive's direct-child tokens until the first
1255/// non-trivia token, emitting each `COMMENT` (and `PERCENT_COMMENT`)
1256/// on its own line. Whitespace and newlines in the leading region
1257/// are ignored — the canonical form controls inter-directive
1258/// blank-line spacing separately.
1259fn emit_leading_comments(node: &crate::SyntaxNode, out: &mut String) {
1260 for el in node.children_with_tokens() {
1261 let rowan::NodeOrToken::Token(t) = el else {
1262 break;
1263 };
1264 match t.kind() {
1265 crate::SyntaxKind::COMMENT | crate::SyntaxKind::PERCENT_COMMENT => {
1266 out.push_str(t.text().trim_end_matches(['\n', '\r']));
1267 out.push('\n');
1268 }
1269 crate::SyntaxKind::WHITESPACE | crate::SyntaxKind::NEWLINE => {}
1270 _ => break,
1271 }
1272 }
1273}
1274
1275/// Return the directive's same-line trailing comment (if any) —
1276/// the COMMENT token that appears between the LAST non-trivia
1277/// content token and the directive-terminating NEWLINE on the
1278/// header line. Returns the verbatim comment text (no trailing
1279/// newline).
1280fn collect_trailing_comment(node: &crate::SyntaxNode) -> Option<String> {
1281 // Find the directive-header terminating NEWLINE: the FIRST
1282 // direct-child NEWLINE that follows at least one non-trivia
1283 // content token. (For single-line directives there's only one
1284 // NEWLINE; for transactions the header line is the first
1285 // NEWLINE, after which postings/metadata follow.)
1286 let mut header_nl_idx: Option<usize> = None;
1287 let mut saw_content = false;
1288 let tokens: Vec<crate::SyntaxToken> = node
1289 .children_with_tokens()
1290 .filter_map(rowan::NodeOrToken::into_token)
1291 .collect();
1292 for (i, t) in tokens.iter().enumerate() {
1293 let k = t.kind();
1294 if k == crate::SyntaxKind::NEWLINE && saw_content {
1295 header_nl_idx = Some(i);
1296 break;
1297 }
1298 if !matches!(
1299 k,
1300 crate::SyntaxKind::WHITESPACE
1301 | crate::SyntaxKind::NEWLINE
1302 | crate::SyntaxKind::COMMENT
1303 | crate::SyntaxKind::PERCENT_COMMENT
1304 ) {
1305 saw_content = true;
1306 }
1307 }
1308 // EOF-without-newline fallback: if there is no header-
1309 // terminating NEWLINE, the directive runs to the end of the
1310 // file. Scan from the LAST token instead. A `?` early-return
1311 // here previously dropped same-line trailing comments at the
1312 // final line of a file that lacked a trailing newline, e.g.
1313 // `2024-01-15 open Assets:A ; trailing` (no `\n`). The
1314 // canonical formatter restores the trailing newline, but the
1315 // comment was already gone.
1316 let nl_idx = header_nl_idx.unwrap_or(tokens.len());
1317 // Scan backwards from the header NEWLINE (or EOF): the
1318 // trailing comment is the last COMMENT before the NEWLINE
1319 // separated only by WHITESPACE.
1320 for i in (0..nl_idx).rev() {
1321 let k = tokens[i].kind();
1322 if matches!(
1323 k,
1324 crate::SyntaxKind::COMMENT | crate::SyntaxKind::PERCENT_COMMENT
1325 ) {
1326 return Some(tokens[i].text().to_string());
1327 }
1328 if k != crate::SyntaxKind::WHITESPACE {
1329 return None;
1330 }
1331 }
1332 None
1333}
1334
1335// ---- Single-line directives ------------------------------------
1336
1337fn emit_open(d: &ast::OpenDirective, out: &mut String) {
1338 let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1339 let account = d
1340 .account()
1341 .map(|t| t.text().to_string())
1342 .unwrap_or_default();
1343 out.push_str(&date);
1344 out.push_str(" open ");
1345 out.push_str(&account);
1346 // The currency constraint list is comma-separated (`USD,EUR`), not
1347 // space-separated — emitting spaces produces invalid beancount (#1405).
1348 for (i, currency) in d.currencies().enumerate() {
1349 out.push_str(if i == 0 { " " } else { "," });
1350 out.push_str(currency.text());
1351 }
1352 if let Some(booking) = d.booking_method() {
1353 // `booking.text()` includes the surrounding quotes.
1354 out.push(' ');
1355 out.push_str(booking.text());
1356 }
1357 out.push('\n');
1358 emit_meta_entries_of(d.syntax(), out);
1359}
1360
1361fn emit_close(d: &ast::CloseDirective, out: &mut String) {
1362 let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1363 let account = d
1364 .account()
1365 .map(|t| t.text().to_string())
1366 .unwrap_or_default();
1367 out.push_str(&date);
1368 out.push_str(" close ");
1369 out.push_str(&account);
1370 out.push('\n');
1371 emit_meta_entries_of(d.syntax(), out);
1372}
1373
1374fn emit_commodity(d: &ast::CommodityDirective, out: &mut String) {
1375 let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1376 let currency = d
1377 .currency()
1378 .map(|t| t.text().to_string())
1379 .unwrap_or_default();
1380 out.push_str(&date);
1381 out.push_str(" commodity ");
1382 out.push_str(¤cy);
1383 out.push('\n');
1384 emit_meta_entries_of(d.syntax(), out);
1385}
1386
1387fn emit_note(d: &ast::NoteDirective, out: &mut String) {
1388 let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1389 let account = d
1390 .account()
1391 .map(|t| t.text().to_string())
1392 .unwrap_or_default();
1393 let text = d.text().map(|s| s.text().to_string()).unwrap_or_default();
1394 out.push_str(&date);
1395 out.push_str(" note ");
1396 out.push_str(&account);
1397 out.push(' ');
1398 out.push_str(&text);
1399 out.push('\n');
1400 emit_meta_entries_of(d.syntax(), out);
1401}
1402
1403fn emit_event(d: &ast::EventDirective, out: &mut String) {
1404 let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1405 let event_type = d
1406 .event_type()
1407 .map(|s| s.text().to_string())
1408 .unwrap_or_default();
1409 let value = d.value().map(|s| s.text().to_string()).unwrap_or_default();
1410 out.push_str(&date);
1411 out.push_str(" event ");
1412 out.push_str(&event_type);
1413 out.push(' ');
1414 out.push_str(&value);
1415 out.push('\n');
1416 emit_meta_entries_of(d.syntax(), out);
1417}
1418
1419fn emit_query(d: &ast::QueryDirective, out: &mut String) {
1420 let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1421 let name = d.name().map(|s| s.text().to_string()).unwrap_or_default();
1422 let query = d.query().map(|s| s.text().to_string()).unwrap_or_default();
1423 out.push_str(&date);
1424 out.push_str(" query ");
1425 out.push_str(&name);
1426 out.push(' ');
1427 out.push_str(&query);
1428 out.push('\n');
1429 emit_meta_entries_of(d.syntax(), out);
1430}
1431
1432fn emit_pad(d: &ast::PadDirective, out: &mut String) {
1433 let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1434 let target = d
1435 .target_account()
1436 .map(|t| t.text().to_string())
1437 .unwrap_or_default();
1438 let source = d
1439 .source_account()
1440 .map(|t| t.text().to_string())
1441 .unwrap_or_default();
1442 out.push_str(&date);
1443 out.push_str(" pad ");
1444 out.push_str(&target);
1445 out.push(' ');
1446 out.push_str(&source);
1447 out.push('\n');
1448 emit_meta_entries_of(d.syntax(), out);
1449}
1450
1451fn emit_document(d: &ast::DocumentDirective, out: &mut String) {
1452 let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1453 let account = d
1454 .account()
1455 .map(|t| t.text().to_string())
1456 .unwrap_or_default();
1457 let path = d.path().map(|s| s.text().to_string()).unwrap_or_default();
1458 out.push_str(&date);
1459 out.push_str(" document ");
1460 out.push_str(&account);
1461 out.push(' ');
1462 out.push_str(&path);
1463 // Trailing TAG / LINK tokens — typed AST has no accessor, so
1464 // walk direct-child tokens. Skip LEADING trivia (a blank line
1465 // before a non-first directive attaches its NEWLINE inside the
1466 // node) and stop at the first NEWLINE *after* the header content
1467 // begins; otherwise the tags/links are dropped when reformatting
1468 // any document past the first — the same bug as #1321 in the
1469 // transaction path.
1470 let mut seen_content = false;
1471 for el in d.syntax().children_with_tokens() {
1472 let rowan::NodeOrToken::Token(t) = el else {
1473 break;
1474 };
1475 match t.kind() {
1476 crate::SyntaxKind::TAG | crate::SyntaxKind::LINK => {
1477 out.push(' ');
1478 out.push_str(t.text());
1479 seen_content = true;
1480 }
1481 crate::SyntaxKind::NEWLINE if seen_content => break,
1482 // Leading trivia before the date: whitespace, blank-line
1483 // NEWLINEs, AND comment lines. A comment before a non-first
1484 // directive attaches inside this node (Directive-Terminator
1485 // Rule); skipping only WHITESPACE/NEWLINE would let it flip
1486 // `seen_content`, break at the comment's NEWLINE, and drop
1487 // the real header tags/links.
1488 k if k.is_trivia() => {}
1489 _ => seen_content = true,
1490 }
1491 }
1492 out.push('\n');
1493 emit_meta_entries_of(d.syntax(), out);
1494}
1495
1496fn emit_price(d: &ast::PriceDirective, out: &mut String) {
1497 let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1498 let base = d
1499 .base_currency()
1500 .map(|t| t.text().to_string())
1501 .unwrap_or_default();
1502 let quote = d
1503 .quote_currency()
1504 .map(|t| t.text().to_string())
1505 .unwrap_or_default();
1506 out.push_str(&date);
1507 out.push_str(" price ");
1508 out.push_str(&base);
1509 out.push(' ');
1510 emit_amount_expression(d.syntax(), out);
1511 out.push(' ');
1512 out.push_str("e);
1513 out.push('\n');
1514 emit_meta_entries_of(d.syntax(), out);
1515}
1516
1517fn emit_balance(d: &ast::BalanceDirective, out: &mut String) {
1518 let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1519 let account = d
1520 .account()
1521 .map(|t| t.text().to_string())
1522 .unwrap_or_default();
1523 let currency = d
1524 .currency()
1525 .map(|t| t.text().to_string())
1526 .unwrap_or_default();
1527 out.push_str(&date);
1528 out.push_str(" balance ");
1529 out.push_str(&account);
1530 out.push(' ');
1531 emit_amount_expression(d.syntax(), out);
1532 out.push(' ');
1533 out.push_str(¤cy);
1534 // Optional `~ tolerance [CCY]` — walk raw tokens.
1535 if let Some((tolerance, tol_currency)) = balance_tolerance(d.syntax()) {
1536 out.push_str(" ~ ");
1537 out.push_str(&tolerance);
1538 if let Some(c) = tol_currency {
1539 out.push(' ');
1540 out.push_str(&c);
1541 }
1542 }
1543 out.push('\n');
1544 emit_meta_entries_of(d.syntax(), out);
1545}
1546
1547fn emit_custom(d: &ast::CustomDirective, out: &mut String) {
1548 let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1549 let custom_type = d
1550 .custom_type()
1551 .map(|s| s.text().to_string())
1552 .unwrap_or_default();
1553 out.push_str(&date);
1554 out.push_str(" custom ");
1555 out.push_str(&custom_type);
1556 // Walk raw tokens after the type STRING and emit each value
1557 // with single-space separation. NUMBER + CURRENCY adjacent
1558 // counts as an Amount; emitted together with one space.
1559 let tokens: Vec<crate::SyntaxToken> = d
1560 .syntax()
1561 .children_with_tokens()
1562 .filter_map(rowan::NodeOrToken::into_token)
1563 .filter(|t| !is_trivia_kind(t.kind()))
1564 .collect();
1565 // `seen_type` skips the leading DATE + CUSTOM_KW + type-STRING
1566 // tokens (already emitted above as the directive header); once
1567 // it flips true, every subsequent non-trivia token is a value
1568 // argument and gets emitted with single-space separation. An
1569 // adjacent NUMBER + CURRENCY pair is glued with a single space
1570 // (canonical Amount shape); the CURRENCY is NOT eaten as a
1571 // standalone arg next iteration.
1572 //
1573 // Beancount custom directives accept any mix of value kinds
1574 // including DATE — a `custom "type" 2024-06-15 100.00 USD`
1575 // shape has a DATE in value position. The previous version
1576 // skipped every DATE after seen_type, silently dropping such
1577 // user-provided date arguments.
1578 let mut seen_type = false;
1579 let mut i = 0;
1580 while i < tokens.len() {
1581 let t = &tokens[i];
1582 if !seen_type {
1583 if t.kind() == crate::SyntaxKind::STRING {
1584 seen_type = true;
1585 }
1586 i += 1;
1587 continue;
1588 }
1589 out.push(' ');
1590 if t.kind() == crate::SyntaxKind::NUMBER {
1591 out.push_str(&canonical_number(t.text()));
1592 if matches!(
1593 tokens.get(i + 1).map(rowan::SyntaxToken::kind),
1594 Some(crate::SyntaxKind::CURRENCY)
1595 ) {
1596 out.push(' ');
1597 out.push_str(tokens[i + 1].text());
1598 i += 2;
1599 continue;
1600 }
1601 } else {
1602 out.push_str(t.text());
1603 }
1604 i += 1;
1605 }
1606 out.push('\n');
1607 emit_meta_entries_of(d.syntax(), out);
1608}
1609
1610// ---- Top-level non-dated directives -----------------------------
1611
1612fn emit_option(d: &ast::OptionDirective, out: &mut String) {
1613 let key = d.key().map(|s| s.text().to_string()).unwrap_or_default();
1614 let value = d.value().map(|s| s.text().to_string()).unwrap_or_default();
1615 out.push_str("option ");
1616 out.push_str(&key);
1617 out.push(' ');
1618 out.push_str(&value);
1619 out.push('\n');
1620}
1621
1622fn emit_include(d: &ast::IncludeDirective, out: &mut String) {
1623 let path = d.path().map(|s| s.text().to_string()).unwrap_or_default();
1624 out.push_str("include ");
1625 out.push_str(&path);
1626 out.push('\n');
1627}
1628
1629fn emit_plugin(d: &ast::PluginDirective, out: &mut String) {
1630 let module = d.module().map(|s| s.text().to_string()).unwrap_or_default();
1631 out.push_str("plugin ");
1632 out.push_str(&module);
1633 if let Some(config) = d.config() {
1634 out.push(' ');
1635 out.push_str(config.text());
1636 }
1637 out.push('\n');
1638}
1639
1640// ---- State directives (no metadata) -----------------------------
1641
1642fn emit_pushtag(d: &ast::PushtagDirective, out: &mut String) {
1643 let tag = d.tag().map(|t| t.text().to_string()).unwrap_or_default();
1644 out.push_str("pushtag ");
1645 out.push_str(&tag);
1646 out.push('\n');
1647}
1648
1649fn emit_poptag(d: &ast::PoptagDirective, out: &mut String) {
1650 let tag = d.tag().map(|t| t.text().to_string()).unwrap_or_default();
1651 out.push_str("poptag ");
1652 out.push_str(&tag);
1653 out.push('\n');
1654}
1655
1656fn emit_pushmeta(d: &ast::PushmetaDirective, out: &mut String) {
1657 let key = d.key().map(|t| t.text().to_string()).unwrap_or_default();
1658 out.push_str("pushmeta ");
1659 out.push_str(&key);
1660 // Walk the value tokens after META_KEY, single-space separated.
1661 let mut past_key = false;
1662 for el in d.syntax().children_with_tokens() {
1663 let rowan::NodeOrToken::Token(t) = el else {
1664 continue;
1665 };
1666 if !past_key {
1667 if t.kind() == crate::SyntaxKind::META_KEY {
1668 past_key = true;
1669 }
1670 continue;
1671 }
1672 if is_trivia_kind(t.kind()) {
1673 continue;
1674 }
1675 out.push(' ');
1676 if t.kind() == crate::SyntaxKind::NUMBER {
1677 out.push_str(&canonical_number(t.text()));
1678 } else {
1679 out.push_str(t.text());
1680 }
1681 }
1682 out.push('\n');
1683}
1684
1685fn emit_popmeta(d: &ast::PopmetaDirective, out: &mut String) {
1686 let key = d.key().map(|t| t.text().to_string()).unwrap_or_default();
1687 out.push_str("popmeta ");
1688 out.push_str(&key);
1689 out.push('\n');
1690}
1691
1692// ---- Transaction + Posting --------------------------------------
1693
1694fn emit_transaction(d: &ast::Transaction, align: PostingAlignment, out: &mut String) {
1695 let date = d.date().map(|t| t.text().to_string()).unwrap_or_default();
1696 out.push_str(&date);
1697 out.push(' ');
1698 out.push_str(&transaction_flag_string(d));
1699 if let Some(payee) = d.payee() {
1700 out.push(' ');
1701 out.push_str(payee.text());
1702 }
1703 if let Some(narration) = d.narration() {
1704 out.push(' ');
1705 out.push_str(narration.text());
1706 }
1707 // Header-region tags/links — emitted in source order
1708 // (typed `.tags()` / `.links()` accessors return each kind
1709 // grouped, which loses interleaving like `#a ^l #b`). Walk
1710 // direct-child tokens, stopping at the header-terminating
1711 // NEWLINE.
1712 //
1713 // `seen_content` guards against LEADING trivia: for any directive
1714 // after the first, the preceding blank line's NEWLINE attaches
1715 // inside this node before the date (the Directive-Terminator Rule).
1716 // The header terminator is the first NEWLINE *after* the date, not
1717 // a leading one — otherwise this loop would break immediately and
1718 // emit no header tags (#1321).
1719 let mut seen_content = false;
1720 for el in d.syntax().children_with_tokens() {
1721 let rowan::NodeOrToken::Token(t) = el else {
1722 break;
1723 };
1724 match t.kind() {
1725 crate::SyntaxKind::TAG | crate::SyntaxKind::LINK => {
1726 out.push(' ');
1727 out.push_str(t.text());
1728 seen_content = true;
1729 }
1730 crate::SyntaxKind::NEWLINE if seen_content => break,
1731 // Leading trivia before the date: whitespace, blank-line
1732 // NEWLINEs, AND comment lines (a comment before a non-first
1733 // directive attaches inside this node per the Directive-
1734 // Terminator Rule). Skipping only WHITESPACE/NEWLINE would
1735 // let a leading comment flip `seen_content`, break at the
1736 // comment's NEWLINE, and drop the real header tags/links.
1737 k if k.is_trivia() => {}
1738 // DATE / flag / STRING etc. — header content has begun.
1739 _ => seen_content = true,
1740 }
1741 }
1742 out.push('\n');
1743 // Body: a single source-order walk over the transaction's children,
1744 // emitting — in the order they appear — POSTING / META_ENTRY nodes, any
1745 // body-internal COMMENT lines (#1332: the formatter must not delete the
1746 // author's comments), and trailing body-line TAG / LINK continuation
1747 // tokens (valid Beancount per the body-line exemption).
1748 //
1749 // `seen_content` / `past_header` skip the header region exactly as the
1750 // header loop above does, so the header-trailing comment (spliced onto
1751 // the header line by `emit_directive`) and the header tags/links (already
1752 // emitted inline above) are not duplicated here. A leading blank-line
1753 // NEWLINE for any directive past the first is trivia and must not flip
1754 // `past_header` early (#1321).
1755 let mut past_header = false;
1756 let mut seen_content = false;
1757 for el in d.syntax().children_with_tokens() {
1758 match el {
1759 rowan::NodeOrToken::Node(n) => {
1760 // A POSTING / META_ENTRY node is definitively past the header.
1761 past_header = true;
1762 if let Some(p) = ast::Posting::cast(n.clone()) {
1763 emit_posting(&p, align, out);
1764 } else if let Some(m) = ast::MetaEntry::cast(n) {
1765 emit_meta_entry(&m, INDENT, out);
1766 }
1767 }
1768 rowan::NodeOrToken::Token(t) => {
1769 if !past_header {
1770 match t.kind() {
1771 crate::SyntaxKind::NEWLINE if seen_content => past_header = true,
1772 k if k.is_trivia() => {}
1773 // DATE / flag / STRING / header TAG / LINK: still header.
1774 _ => seen_content = true,
1775 }
1776 continue;
1777 }
1778 // Body tokens: preserve comment-only lines and emit
1779 // continuation tags/links, each on its own indented line.
1780 match t.kind() {
1781 crate::SyntaxKind::COMMENT | crate::SyntaxKind::PERCENT_COMMENT => {
1782 out.push_str(INDENT);
1783 out.push_str(t.text().trim_end_matches(['\n', '\r']));
1784 out.push('\n');
1785 }
1786 crate::SyntaxKind::TAG | crate::SyntaxKind::LINK => {
1787 out.push_str(INDENT);
1788 out.push_str(t.text());
1789 out.push('\n');
1790 }
1791 _ => {}
1792 }
1793 }
1794 }
1795 }
1796}
1797
1798fn transaction_flag_string(d: &ast::Transaction) -> String {
1799 use crate::cst::ast::TransactionFlagKind;
1800 match d.flag() {
1801 None => "*".to_string(),
1802 Some(f) => match f.classify() {
1803 TransactionFlagKind::Star | TransactionFlagKind::Txn => "*".to_string(),
1804 TransactionFlagKind::Pending => "!".to_string(),
1805 TransactionFlagKind::Hash => "#".to_string(),
1806 TransactionFlagKind::Letter | TransactionFlagKind::CurrencyLetter => {
1807 f.text().to_string()
1808 }
1809 },
1810 }
1811}
1812
1813fn emit_posting(p: &ast::Posting, align: PostingAlignment, out: &mut String) {
1814 // Posting-trailing comment (same-line, before the posting-line
1815 // NEWLINE) — capture upfront so we can splice it back in just
1816 // before that NEWLINE, preserving the user's attachment intent.
1817 let trailing = collect_trailing_comment(p.syntax());
1818 let posting_start = out.len();
1819
1820 out.push_str(INDENT);
1821 let mut col = INDENT.len();
1822 if let Some(flag) = p.flag() {
1823 out.push_str(flag.text());
1824 out.push(' ');
1825 col += flag.text().chars().count() + 1;
1826 }
1827 let account_text = p
1828 .account()
1829 .map(|a| a.text().to_string())
1830 .unwrap_or_default();
1831 out.push_str(&account_text);
1832 col += account_text.chars().count();
1833
1834 if let Some(amt) = p.amount() {
1835 // `amount_number_text` is the shared "does this render a number?"
1836 // predicate (see `compute_alignment`); a currency-only amount
1837 // returns `None` and prints no number.
1838 if let Some(value) = amount_number_text(&amt) {
1839 // Two stages of padding:
1840 // 1) Account end → start of number field (`number_col`).
1841 // Fall back to 2 spaces when the LHS already exceeds
1842 // the file-wide max (over-long account name).
1843 // 2) Inside the number field, left-pad to right-justify
1844 // to `number_width`. Effect: the currency column
1845 // lands at a single uniform position file-wide even
1846 // when numbers have different widths or signs.
1847 let field_pad = align.number_col.saturating_sub(col).max(2);
1848 let justify_pad = align.number_width.saturating_sub(value.chars().count());
1849 for _ in 0..(field_pad + justify_pad) {
1850 out.push(' ');
1851 }
1852 out.push_str(&value);
1853 if let Some(c) = amt.currency() {
1854 out.push(' ');
1855 out.push_str(c.text());
1856 }
1857 if let Some(cs) = p.cost_spec() {
1858 out.push(' ');
1859 out.push_str(&format_cost_spec(&cs));
1860 }
1861 if let Some(pa) = p.price_annotation() {
1862 out.push(' ');
1863 out.push_str(&format_price_annotation(&pa));
1864 }
1865 }
1866 }
1867 out.push('\n');
1868 // Splice the trailing comment in BEFORE the posting-line
1869 // NEWLINE (the first '\n' in the emitted posting region).
1870 if let Some(c) = trailing
1871 && let Some(rel) = out[posting_start..].find('\n')
1872 {
1873 let mut splice = String::with_capacity(c.len() + 1);
1874 splice.push(' ');
1875 splice.push_str(&c);
1876 out.insert_str(posting_start + rel, &splice);
1877 }
1878 // Posting body: emit attached metadata AND posting-internal comment
1879 // lines in source order, indented 4 (deeper than the posting's 2).
1880 // Comment-only lines inside a posting attach as COMMENT tokens of the
1881 // POSTING node; walking children-with-tokens preserves them (#1337)
1882 // instead of dropping them. The posting's own header line is skipped via
1883 // the seen_content/past_header guard, so the same-line trailing comment
1884 // (spliced above) is not duplicated here.
1885 let mut past_header = false;
1886 let mut seen_content = false;
1887 for el in p.syntax().children_with_tokens() {
1888 match el {
1889 rowan::NodeOrToken::Node(n) => {
1890 // Header child nodes (AMOUNT / COST_SPEC / PRICE_ANNOTATION)
1891 // are emitted inline above and must NOT flip `past_header` —
1892 // only the posting-line NEWLINE does. Otherwise the same-line
1893 // trailing comment, which follows the AMOUNT node, would be
1894 // re-emitted here as a body comment. META_ENTRY nodes only
1895 // appear in the body, after `past_header` is already set.
1896 if let Some(m) = ast::MetaEntry::cast(n) {
1897 emit_meta_entry(&m, " ", out);
1898 }
1899 }
1900 rowan::NodeOrToken::Token(t) => {
1901 if !past_header {
1902 match t.kind() {
1903 crate::SyntaxKind::NEWLINE if seen_content => past_header = true,
1904 k if k.is_trivia() => {}
1905 _ => seen_content = true,
1906 }
1907 continue;
1908 }
1909 if matches!(
1910 t.kind(),
1911 crate::SyntaxKind::COMMENT | crate::SyntaxKind::PERCENT_COMMENT
1912 ) {
1913 out.push_str(" ");
1914 out.push_str(t.text().trim_end_matches(['\n', '\r']));
1915 out.push('\n');
1916 }
1917 }
1918 }
1919 }
1920}
1921
1922/// Format an `AMOUNT` (units + currency) in canonical form. For
1923/// arithmetic shapes, emits the expression with single-space
1924/// separators (parens tight); for plain shapes, emits
1925/// `NUMBER CURRENCY` with thousands separators stripped.
1926fn format_amount(amt: &ast::Amount) -> String {
1927 let mut out = String::new();
1928 if amt.is_arithmetic() {
1929 emit_amount_subnode_expression(amt.syntax(), &mut out);
1930 if let Some(c) = amt.currency() {
1931 if !out.is_empty() {
1932 out.push(' ');
1933 }
1934 out.push_str(c.text());
1935 }
1936 return out;
1937 }
1938 if let Some(sign) = amt.sign()
1939 && sign.is_minus()
1940 {
1941 out.push('-');
1942 }
1943 if let Some(n) = amt.number() {
1944 out.push_str(&canonical_number(n.text()));
1945 }
1946 if let Some(c) = amt.currency() {
1947 if !out.is_empty() && !out.ends_with('-') {
1948 out.push(' ');
1949 }
1950 out.push_str(c.text());
1951 }
1952 out
1953}
1954
1955/// Canonical form for cost specs: `{cost CCY}` (single-brace
1956/// per-unit), `{{cost CCY}}` (double-brace total), `{# cost CCY}`
1957/// (per-unit + total via opener), or the in-brace `{N # T CCY}`
1958/// shape preserved as-is with single-space normalization.
1959///
1960/// Commas separating cost components (`{N CCY, DATE, "label"}`)
1961/// stay tight against the preceding token; every other adjacent
1962/// token pair is joined with a single space.
1963fn format_cost_spec(cs: &ast::CostSpec) -> String {
1964 let (open, close) = if cs.is_total() {
1965 ("{{", "}}")
1966 } else if cs.is_per_unit_plus_total() {
1967 ("{#", "}")
1968 } else {
1969 ("{", "}")
1970 };
1971 // Collect inner content tokens (skip opener/closer/whitespace),
1972 // then route through write_canonical_token_sequence so the spacing rule
1973 // is identical to balance/price/AMOUNT-subnode arithmetic — most
1974 // importantly, unary `+`/`-` stays tight (`{-500 USD}`, not
1975 // `{- 500 USD}`) and COMMA stays tight.
1976 let inner_tokens: Vec<crate::SyntaxToken> = cs
1977 .syntax()
1978 .children_with_tokens()
1979 .filter_map(rowan::NodeOrToken::into_token)
1980 .filter(|t| {
1981 !matches!(
1982 t.kind(),
1983 crate::SyntaxKind::L_BRACE
1984 | crate::SyntaxKind::R_BRACE
1985 | crate::SyntaxKind::L_DOUBLE_BRACE
1986 | crate::SyntaxKind::R_DOUBLE_BRACE
1987 | crate::SyntaxKind::L_BRACE_HASH
1988 | crate::SyntaxKind::WHITESPACE
1989 | crate::SyntaxKind::NEWLINE
1990 )
1991 })
1992 .collect();
1993 let mut inner = String::new();
1994 write_canonical_token_sequence(&inner_tokens, &mut inner);
1995 // The `{#` opener is a two-character marker; canonical form
1996 // separates it from the first inner token with a single space
1997 // (matching the rendering in this function's rustdoc). `{` and
1998 // `{{` don't get inner padding per the canonical-form spec.
1999 if cs.is_per_unit_plus_total() && !inner.is_empty() {
2000 format!("{open} {inner}{close}")
2001 } else {
2002 format!("{open}{inner}{close}")
2003 }
2004}
2005
2006/// Canonical price annotation: `@ amount` (per-unit) or
2007/// `@@ amount` (total).
2008fn format_price_annotation(pa: &ast::PriceAnnotation) -> String {
2009 let op = if pa.is_total() { "@@" } else { "@" };
2010 match pa.amount() {
2011 Some(a) => format!("{op} {}", format_amount(&a)),
2012 None => op.to_string(),
2013 }
2014}
2015
2016// ---- Helpers ---------------------------------------------------
2017
2018/// True for tokens that don't contribute content to the canonical
2019/// form: whitespace, newlines, every comment kind, and the
2020/// leading-file `BOM` token.
2021const fn is_trivia_kind(kind: crate::SyntaxKind) -> bool {
2022 matches!(
2023 kind,
2024 crate::SyntaxKind::WHITESPACE
2025 | crate::SyntaxKind::NEWLINE
2026 | crate::SyntaxKind::COMMENT
2027 | crate::SyntaxKind::PERCENT_COMMENT
2028 | crate::SyntaxKind::SHEBANG
2029 | crate::SyntaxKind::EMACS_DIRECTIVE
2030 | crate::SyntaxKind::BOM
2031 )
2032}
2033
2034/// Strip thousands-separator commas from a NUMBER token's text;
2035/// preserve the user's decimal-place count. Per the locked
2036/// canonical-form decision: `1,000.00` → `1000.00`, `1.0` → `1.0`.
2037fn canonical_number(text: &str) -> String {
2038 if text.contains(',') {
2039 text.replace(',', "")
2040 } else {
2041 text.to_string()
2042 }
2043}
2044
2045/// Emit the arithmetic expression of a `PRICE` / `BALANCE`
2046/// directive: tokens from the first expression-starting token
2047/// (`NUMBER`, unary `+`/`-`, or `(`) up to (but not including) the
2048/// first `CURRENCY` at paren-depth 0. Spacing rules per
2049/// [`write_canonical_token_sequence`].
2050///
2051/// **Why the predicate must allow `PLUS` / `MINUS` / `L_PAREN`,
2052/// not just `NUMBER`.** A previous version skipped tokens until
2053/// it hit a `NUMBER`, which silently dropped leading unary signs
2054/// and opening parens — flipping the sign on inputs like
2055/// `2024-01-15 price USD -1.00 EUR` (formatted to `1.00 EUR`) and
2056/// corrupting parenthesized expressions like
2057/// `2024-01-15 balance Assets:A (1 + 2) USD` (formatted to
2058/// `1 + 2) USD USD`). Sign drift in BALANCE / PRICE is silent data
2059/// corruption — a balance assertion that previously asserted a
2060/// debit would assert a credit after a round-trip.
2061fn emit_amount_expression(node: &crate::SyntaxNode, out: &mut String) {
2062 let raw: Vec<crate::SyntaxToken> = node
2063 .children_with_tokens()
2064 .filter_map(rowan::NodeOrToken::into_token)
2065 .filter(|t| !is_trivia_kind(t.kind()))
2066 .skip_while(|t| {
2067 !matches!(
2068 t.kind(),
2069 crate::SyntaxKind::NUMBER
2070 | crate::SyntaxKind::PLUS
2071 | crate::SyntaxKind::MINUS
2072 | crate::SyntaxKind::L_PAREN
2073 )
2074 })
2075 .collect();
2076 let mut depth: i32 = 0;
2077 let mut first_currency_idx: Option<usize> = None;
2078 for (i, t) in raw.iter().enumerate() {
2079 match t.kind() {
2080 crate::SyntaxKind::L_PAREN => depth += 1,
2081 crate::SyntaxKind::R_PAREN => depth -= 1,
2082 crate::SyntaxKind::CURRENCY if depth == 0 && first_currency_idx.is_none() => {
2083 first_currency_idx = Some(i);
2084 }
2085 _ => {}
2086 }
2087 }
2088 let end = first_currency_idx.unwrap_or(raw.len());
2089 write_canonical_token_sequence(&raw[..end], out);
2090}
2091
2092/// Emit an `AMOUNT` subnode's expression region: every non-trivia
2093/// token minus the trailing `CURRENCY` (caller re-emits the
2094/// currency itself). Used by [`format_amount`] for arithmetic
2095/// posting amounts like `-(1.00 + 2.00) USD`.
2096fn emit_amount_subnode_expression(node: &crate::SyntaxNode, out: &mut String) {
2097 let mut tokens: Vec<crate::SyntaxToken> = node
2098 .children_with_tokens()
2099 .filter_map(rowan::NodeOrToken::into_token)
2100 .filter(|t| !is_trivia_kind(t.kind()))
2101 .collect();
2102 if let Some(last) = tokens.last()
2103 && last.kind() == crate::SyntaxKind::CURRENCY
2104 {
2105 tokens.pop();
2106 }
2107 write_canonical_token_sequence(&tokens, out);
2108}
2109
2110/// Single dispatcher for the canonical spacing rules used by EVERY
2111/// token-sequence emit path: balance / price arithmetic, AMOUNT
2112/// subnodes, cost-spec interiors, and metadata values. There is no
2113/// separate path; each call site collects the relevant non-trivia
2114/// tokens and routes them through here so the rules cannot drift
2115/// between contexts.
2116///
2117/// Rules:
2118///
2119/// - single space between adjacent operands / binary operators
2120/// - no space after `(` or before `)` (parens stay tight)
2121/// - no space after a unary `+` / `-` (one that opens the run
2122/// or follows `(` or another operator)
2123/// - no space before `,` (commas in cost-spec component lists
2124/// stay tight against the preceding token)
2125///
2126/// **Adding a new `SyntaxKind` to the formatter implies thinking
2127/// about its effect on every call site of this function.** A new
2128/// operator-like kind added to `is_op` will silently change cost-
2129/// spec and metadata spacing too; a new bracket-like kind needs
2130/// its own rule. The corpus-level idempotence test
2131/// (`idempotence_corpus_sweep`) is the safety net that catches
2132/// drifts.
2133fn write_canonical_token_sequence(tokens: &[crate::SyntaxToken], out: &mut String) {
2134 let is_op = |k: crate::SyntaxKind| {
2135 matches!(
2136 k,
2137 crate::SyntaxKind::PLUS
2138 | crate::SyntaxKind::MINUS
2139 | crate::SyntaxKind::STAR
2140 | crate::SyntaxKind::SLASH
2141 )
2142 };
2143 let mut prev_kind: Option<crate::SyntaxKind> = None;
2144 let mut prev_was_unary = false;
2145 for t in tokens {
2146 let kind = t.kind();
2147 let is_unary = is_op(kind)
2148 && match prev_kind {
2149 None => true,
2150 Some(p) => p == crate::SyntaxKind::L_PAREN || is_op(p),
2151 };
2152 let need_space = match prev_kind {
2153 None => false,
2154 Some(prev) => {
2155 prev != crate::SyntaxKind::L_PAREN
2156 && kind != crate::SyntaxKind::R_PAREN
2157 && kind != crate::SyntaxKind::COMMA
2158 && !prev_was_unary
2159 }
2160 };
2161 if need_space {
2162 out.push(' ');
2163 }
2164 if kind == crate::SyntaxKind::NUMBER {
2165 out.push_str(&canonical_number(t.text()));
2166 } else {
2167 out.push_str(t.text());
2168 }
2169 prev_kind = Some(kind);
2170 prev_was_unary = is_unary;
2171 }
2172}
2173
2174/// Extract a balance directive's optional tolerance — the
2175/// `NUMBER` after the first `TILDE`, plus an optional trailing
2176/// `CURRENCY` at paren-depth 0.
2177fn balance_tolerance(node: &crate::SyntaxNode) -> Option<(String, Option<String>)> {
2178 let mut past_tilde = false;
2179 let mut number: Option<String> = None;
2180 let mut currency: Option<String> = None;
2181 for el in node.children_with_tokens() {
2182 let rowan::NodeOrToken::Token(t) = el else {
2183 continue;
2184 };
2185 if !past_tilde {
2186 if t.kind() == crate::SyntaxKind::TILDE {
2187 past_tilde = true;
2188 }
2189 continue;
2190 }
2191 match t.kind() {
2192 crate::SyntaxKind::NUMBER if number.is_none() => {
2193 number = Some(canonical_number(t.text()));
2194 }
2195 crate::SyntaxKind::CURRENCY if number.is_some() && currency.is_none() => {
2196 currency = Some(t.text().to_string());
2197 }
2198 _ => {}
2199 }
2200 }
2201 number.map(|n| (n, currency))
2202}
2203
2204// ---- Metadata --------------------------------------------------
2205
2206/// Walk a directive's direct-child `META_ENTRY` nodes and emit
2207/// each on its own indented line in canonical form (`indent + KEY:
2208/// value\n`). Most directive types don't have a `.meta_entries()`
2209/// accessor on their typed wrapper; we walk the syntax node
2210/// directly to stay uniform.
2211fn emit_meta_entries_of(node: &crate::SyntaxNode, out: &mut String) {
2212 // Source-order walk so body-internal COMMENT lines are preserved
2213 // alongside the metadata entries (#1332). The header region (up to and
2214 // including the header-terminating NEWLINE) is skipped so the
2215 // header-trailing comment — spliced onto the header line by
2216 // `emit_directive` — is not duplicated here.
2217 let mut past_header = false;
2218 let mut seen_content = false;
2219 for el in node.children_with_tokens() {
2220 match el {
2221 rowan::NodeOrToken::Node(n) => {
2222 past_header = true;
2223 if let Some(entry) = MetaEntry::cast(n) {
2224 emit_meta_entry(&entry, INDENT, out);
2225 }
2226 }
2227 rowan::NodeOrToken::Token(t) => {
2228 if !past_header {
2229 match t.kind() {
2230 crate::SyntaxKind::NEWLINE if seen_content => past_header = true,
2231 k if k.is_trivia() => {}
2232 _ => seen_content = true,
2233 }
2234 continue;
2235 }
2236 if matches!(
2237 t.kind(),
2238 crate::SyntaxKind::COMMENT | crate::SyntaxKind::PERCENT_COMMENT
2239 ) {
2240 out.push_str(INDENT);
2241 out.push_str(t.text().trim_end_matches(['\n', '\r']));
2242 out.push('\n');
2243 }
2244 }
2245 }
2246 }
2247}
2248
2249/// Canonical emit for a single `META_ENTRY`. Walks non-trivia
2250/// tokens, prints them with single-space separation, and
2251/// normalizes numbers via [`canonical_number`]. The `META_KEY`
2252/// token already includes the trailing colon (e.g. `note:`); the
2253/// value side gets the same NUMBER + CURRENCY gluing rule the
2254/// rest of the formatter uses elsewhere.
2255///
2256/// Two semantically-equivalent inputs (e.g. `foo: "bar"` and
2257/// `foo: "bar"`) produce byte-identical output — the
2258/// gofmt-style invariant the file rustdoc promises.
2259fn emit_meta_entry(m: &MetaEntry, indent: &str, out: &mut String) {
2260 out.push_str(indent);
2261 // Split the META_ENTRY's non-trivia tokens into [META_KEY,
2262 // value*]. The META_KEY token already includes the trailing
2263 // colon (e.g. `note:`); the value tokens go through
2264 // write_canonical_token_sequence so the spacing rules — unary +/-
2265 // tight, COMMA tight, paren-tight, NUMBER canonicalized — are
2266 // shared with the balance/price/cost-spec/posting-amount paths.
2267 let content: Vec<crate::SyntaxToken> = m
2268 .syntax()
2269 .children_with_tokens()
2270 .filter_map(rowan::NodeOrToken::into_token)
2271 .filter(|t| {
2272 !matches!(
2273 t.kind(),
2274 crate::SyntaxKind::WHITESPACE | crate::SyntaxKind::NEWLINE
2275 )
2276 })
2277 .collect();
2278 let mut iter = content.iter();
2279 if let Some(key) = iter.next() {
2280 out.push_str(key.text());
2281 }
2282 let value_tokens: Vec<crate::SyntaxToken> = iter.cloned().collect();
2283 if !value_tokens.is_empty() {
2284 out.push(' ');
2285 write_canonical_token_sequence(&value_tokens, out);
2286 }
2287 out.push('\n');
2288}
2289
2290#[cfg(test)]
2291mod tests {
2292 use super::*;
2293
2294 #[test]
2295 fn empty_input_yields_single_newline() {
2296 assert_eq!(format_source(""), "\n");
2297 }
2298
2299 #[test]
2300 fn open_directive_canonical() {
2301 let src = "2024-01-15 open Assets:Cash\n";
2302 assert_eq!(format_source(src), "2024-01-15 open Assets:Cash\n");
2303 }
2304
2305 #[test]
2306 fn open_with_currencies_and_booking_canonical() {
2307 // The currency constraint list is comma-separated; emitting spaces
2308 // produced invalid beancount (#1405).
2309 let src = "2024-01-15 open Assets:Brokerage USD,EUR \"STRICT\"\n";
2310 assert_eq!(
2311 format_source(src),
2312 "2024-01-15 open Assets:Brokerage USD,EUR \"STRICT\"\n"
2313 );
2314 }
2315
2316 /// Regression for #1405: `format` must keep the open currency list
2317 /// comma-separated, not rewrite it space-separated (invalid syntax), and
2318 /// the result must be idempotent.
2319 #[test]
2320 fn open_currency_list_stays_comma_separated() {
2321 let src = "2026-01-01 open Assets:Wallet USD,EUR\n";
2322 let once = format_source(src);
2323 assert_eq!(once, "2026-01-01 open Assets:Wallet USD,EUR\n");
2324 assert_eq!(format_source(&once), once, "format must be idempotent");
2325 }
2326
2327 #[test]
2328 fn close_directive_canonical() {
2329 let src = "2024-12-31 close Assets:Cash\n";
2330 assert_eq!(format_source(src), "2024-12-31 close Assets:Cash\n");
2331 }
2332
2333 #[test]
2334 fn commodity_directive_canonical() {
2335 let src = "2024-01-01 commodity HOOL\n";
2336 assert_eq!(format_source(src), "2024-01-01 commodity HOOL\n");
2337 }
2338
2339 #[test]
2340 fn blank_lines_between_directives_preserved() {
2341 // #1325: the formatter preserves the author's inter-directive
2342 // blank lines rather than normalizing to exactly one (matching
2343 // Python bean-format and the rest of the beancount lineage).
2344
2345 // Grouped (no blank in source) stays grouped — not double-spaced.
2346 let grouped = "2024-01-01 open Assets:A\n2024-01-02 open Assets:B\n";
2347 assert_eq!(format_source(grouped), grouped);
2348
2349 // One blank is preserved as one.
2350 let one = "2024-01-01 open Assets:A\n\n2024-01-02 open Assets:B\n";
2351 assert_eq!(format_source(one), one);
2352
2353 // Two blanks are preserved as two (not collapsed).
2354 let two = "2024-01-01 open Assets:A\n\n\n2024-01-02 open Assets:B\n";
2355 assert_eq!(format_source(two), two);
2356
2357 // A whitespace-only "blank" line still counts as one blank line
2358 // (its trailing whitespace is stripped, leaving an empty line).
2359 let ws_blank = "2024-01-01 open Assets:A\n \n2024-01-02 open Assets:B\n";
2360 assert_eq!(
2361 format_source(ws_blank),
2362 "2024-01-01 open Assets:A\n\n2024-01-02 open Assets:B\n"
2363 );
2364 }
2365
2366 #[test]
2367 fn trailing_newline_always_present() {
2368 let src = "2024-01-01 open Assets:A";
2369 let formatted = format_source(src);
2370 assert!(formatted.ends_with('\n'));
2371 assert!(!formatted.ends_with("\n\n"));
2372 }
2373
2374 #[test]
2375 fn idempotent_on_canonical_input() {
2376 let src = "2024-01-01 open Assets:A\n\n2024-01-02 close Assets:A\n";
2377 let once = format_source(src);
2378 let twice = format_source(&once);
2379 assert_eq!(once, twice);
2380 }
2381
2382 #[test]
2383 fn note_canonical() {
2384 let src = "2024-01-15 note Assets:Cash \"a note\"\n";
2385 assert_eq!(
2386 format_source(src),
2387 "2024-01-15 note Assets:Cash \"a note\"\n"
2388 );
2389 }
2390
2391 #[test]
2392 fn event_canonical() {
2393 let src = "2024-01-15 event \"location\" \"NYC\"\n";
2394 assert_eq!(
2395 format_source(src),
2396 "2024-01-15 event \"location\" \"NYC\"\n"
2397 );
2398 }
2399
2400 #[test]
2401 fn query_canonical() {
2402 let src = "2024-01-15 query \"q1\" \"SELECT account\"\n";
2403 assert_eq!(
2404 format_source(src),
2405 "2024-01-15 query \"q1\" \"SELECT account\"\n"
2406 );
2407 }
2408
2409 #[test]
2410 fn pad_canonical() {
2411 let src = "2024-01-15 pad Assets:A Equity:Opening\n";
2412 assert_eq!(
2413 format_source(src),
2414 "2024-01-15 pad Assets:A Equity:Opening\n"
2415 );
2416 }
2417
2418 #[test]
2419 fn document_with_tags_and_links_canonical() {
2420 let src = "2024-06-01 document Assets:Bank \"stmt.pdf\" #q1 ^scan42 #urgent\n";
2421 assert_eq!(
2422 format_source(src),
2423 "2024-06-01 document Assets:Bank \"stmt.pdf\" #q1 ^scan42 #urgent\n"
2424 );
2425 }
2426
2427 #[test]
2428 fn issue_1321_document_tags_links_idempotent_across_directives() {
2429 // Same class as the transaction case, in `document` directives:
2430 // the 2nd+ document's trailing tags/links were dropped on a
2431 // reformat (found by the #1323 corpus idempotence check). Assert
2432 // the fixed-point property: re-formatting must not change (and
2433 // must not drop the tags/links of the second document).
2434 let src = "\
24352013-05-18 document Assets:Bank \"/a.pdf\" #tag1 ^link1
24362013-05-19 document Assets:Bank \"/b.pdf\" #tag2 ^link2
2437";
2438 let once = format_source(src);
2439 assert_eq!(format_source(&once), once, "format must be idempotent");
2440 assert!(
2441 once.contains("#tag2") && once.contains("^link2"),
2442 "the second document's tags/links must survive formatting; got:\n{once}"
2443 );
2444 }
2445
2446 #[test]
2447 fn issue_1321_header_tags_links_idempotent_across_transactions() {
2448 // Header tags/links must stay on the header line for EVERY
2449 // transaction, not just the first. Regression for #1321 where
2450 // the 2nd+ transaction's header tags/links got migrated to
2451 // continuation lines.
2452 let src = "\
24532024-01-15 * \"x\" #tag1 ^link1 #tag2 ^link2
2454 Assets:Cash -1.00 USD
2455 Expenses:Misc 1.00 USD
2456
24572024-01-16 * \"x\" #tag1 ^link1 #tag2 ^link2
2458 Assets:Cash -1.00 USD
2459 Expenses:Misc 1.00 USD
2460";
2461 assert_eq!(
2462 format_source(src),
2463 src,
2464 "format must be a no-op (idempotent)"
2465 );
2466 }
2467
2468 #[test]
2469 fn issue_1321_comment_before_transaction_keeps_header_tags() {
2470 // A comment line before a transaction is leading trivia attached
2471 // inside the transaction node (Directive-Terminator Rule), exactly
2472 // like a blank line. Skipping only WHITESPACE/NEWLINE let the
2473 // comment flip `seen_content`, break at the comment's NEWLINE, and
2474 // migrate the real header tags/links to continuation lines. The
2475 // header tags/links must stay on the header line. (Found by the
2476 // Copilot review of the #1321 fix.)
2477 let src = "\
24782024-01-15 * \"first\" #h1 ^l1
2479 Assets:Cash -1.00 USD
2480 Expenses:Misc 1.00 USD
2481
2482; a comment before the second transaction
24832024-01-16 * \"second\" #tag1 ^link1
2484 Assets:Cash -2.00 USD
2485 Expenses:Misc 2.00 USD
2486";
2487 assert_eq!(
2488 format_source(src),
2489 src,
2490 "a leading comment must not migrate header tags/links to continuation lines"
2491 );
2492 }
2493
2494 #[test]
2495 fn issue_1321_comment_before_document_keeps_tags() {
2496 // Document-directive variant of the comment-trivia case above.
2497 let src = "\
24982013-05-18 document Assets:Bank \"/a.pdf\" #tag1 ^link1
2499; a comment before the second document
25002013-05-19 document Assets:Bank \"/b.pdf\" #tag2 ^link2
2501";
2502 let once = format_source(src);
2503 assert_eq!(format_source(&once), once, "format must be idempotent");
2504 assert!(
2505 once.contains("\"/b.pdf\" #tag2 ^link2"),
2506 "the second document's tags/links must stay on its header line; got:\n{once}"
2507 );
2508 }
2509
2510 #[test]
2511 fn issue_1332_body_comments_in_metadata_preserved() {
2512 // The formatter must NOT delete comment-only lines inside a
2513 // directive body (#1332). Here two commented-out `; price:` lines
2514 // sit between metadata entries in a `commodity` body; they must
2515 // survive, interleaved in source order, and the result is idempotent.
2516 let src = "\
25172023-06-04 commodity EAM-VEUR ; cSpell: word VEUR
2518 name: \"Vanguard FTSE Developed Europe UCITS ETF EUR Dist\"
2519 ; price: \"EUR:alphavantage/price:VEUR.AS:EUR\"
2520 ; price: \"EUR:yahoo/VEUR.AS\"
2521 price: \"EUR:pricehist.beanprice.yahoo/VEUR.AS\"
2522";
2523 assert_eq!(
2524 format_source(src),
2525 src,
2526 "body comments must be preserved verbatim"
2527 );
2528 assert_eq!(format_source(&format_source(src)), format_source(src));
2529 }
2530
2531 #[test]
2532 fn issue_1332_body_comments_between_postings_preserved() {
2533 // Same class, inside a transaction body: a comment-only line between
2534 // postings must survive (in source order, 2-space indent). Asserted
2535 // via preservation + idempotence rather than an exact match, since
2536 // amount alignment is also canonicalized.
2537 let src = "\
25382024-01-15 * \"Cafe\" \"Latte\"
2539 Expenses:Coffee 4.50 USD
2540 ; was 5.00 before the discount
2541 Assets:Checking
2542";
2543 let out = format_source(src);
2544 assert!(
2545 out.contains("\n ; was 5.00 before the discount\n"),
2546 "the body comment must be preserved on its own indented line; got:\n{out}"
2547 );
2548 // Order: the comment stays between the two postings.
2549 let coffee = out.find("Expenses:Coffee").unwrap();
2550 let comment = out.find("; was 5.00").unwrap();
2551 let checking = out.find("Assets:Checking").unwrap();
2552 assert!(
2553 coffee < comment && comment < checking,
2554 "comment must stay between postings:\n{out}"
2555 );
2556 assert_eq!(format_source(&out), out, "format must be idempotent");
2557 }
2558
2559 #[test]
2560 fn issue_1335_org_headers_and_grouped_comments_preserved() {
2561 // The formatter must not delete unparsable content (#1335).
2562 // Org-mode `*` section headers parse into ERROR_NODEs, and comments
2563 // grouped with them get swallowed into the same node — previously all
2564 // dropped. They must survive, and the result must be idempotent.
2565 let src = "\
2566* Section A
2567;; comment between headers
2568;; second line
2569* Section B
25702013-01-01 open Assets:X
2571";
2572 let out = format_source(src);
2573 // Use the exact `;;` needles: a single-`;` substring would still match
2574 // `;; ...` even if one `;` were dropped, weakening the regression.
2575 for needle in [
2576 "* Section A",
2577 ";; comment between headers",
2578 ";; second line",
2579 "* Section B",
2580 "2013-01-01 open Assets:X",
2581 ] {
2582 assert!(
2583 out.contains(needle),
2584 "lost {needle:?} on format; got:\n{out}"
2585 );
2586 }
2587 assert_eq!(format_source(&out), out, "format must be idempotent");
2588 }
2589
2590 #[test]
2591 fn issue_1335_org_header_then_directive_keeps_header() {
2592 // A lone org header before a directive: the header is an ERROR_NODE
2593 // and must be kept (the comment here attaches to the directive and
2594 // was already preserved).
2595 let src = "* Accounts\n2013-01-01 open Assets:X\n";
2596 let out = format_source(src);
2597 assert!(
2598 out.contains("* Accounts"),
2599 "org header dropped; got:\n{out}"
2600 );
2601 assert_eq!(format_source(&out), out);
2602 }
2603
2604 #[test]
2605 fn issue_1335_blank_lines_around_org_header_preserved() {
2606 // An ERROR_NODE is a top-level content block: the author's blank line
2607 // between an org header and the following directive is preserved (it
2608 // is not flushed), and the result is idempotent.
2609 let src = "* Accounts\n\n2013-01-01 open Assets:X\n";
2610 assert_eq!(
2611 format_source(src),
2612 src,
2613 "blank around org header must be kept"
2614 );
2615 assert_eq!(format_source(&format_source(src)), format_source(src));
2616 }
2617
2618 #[test]
2619 fn issue_1337_posting_internal_comments_preserved() {
2620 // A comment on its own line inside a posting attaches as a COMMENT
2621 // token of the POSTING node; it must be preserved (#1337), not
2622 // dropped, and stay between its posting and the next.
2623 let src = "\
26242024-01-15 * \"x\"
2625 Assets:A 1.00 USD
2626 ; posting-internal note
2627 Assets:B
2628";
2629 let out = format_source(src);
2630 assert!(
2631 out.contains("; posting-internal note"),
2632 "posting-internal comment dropped; got:\n{out}"
2633 );
2634 let a = out.find("Assets:A").unwrap();
2635 let c = out.find("; posting-internal note").unwrap();
2636 let b = out.find("Assets:B").unwrap();
2637 assert!(a < c && c < b, "comment must stay between postings:\n{out}");
2638 assert_eq!(format_source(&out), out, "format must be idempotent");
2639 }
2640
2641 #[test]
2642 fn price_canonical_strips_thousands_separators() {
2643 let src = "2024-01-15 price USD 1,234.56 EUR\n";
2644 assert_eq!(format_source(src), "2024-01-15 price USD 1234.56 EUR\n");
2645 }
2646
2647 #[test]
2648 fn price_arithmetic_canonicalizes_spacing() {
2649 let src = "2024-01-15 price USD 1/2 EUR\n";
2650 assert_eq!(format_source(src), "2024-01-15 price USD 1 / 2 EUR\n");
2651 }
2652
2653 #[test]
2654 fn balance_canonical() {
2655 let src = "2024-01-15 balance Assets:Cash 100.00 USD\n";
2656 assert_eq!(
2657 format_source(src),
2658 "2024-01-15 balance Assets:Cash 100.00 USD\n"
2659 );
2660 }
2661
2662 #[test]
2663 fn balance_with_tolerance_canonical() {
2664 let src = "2024-01-15 balance Assets:Cash 100.00 USD ~ 0.01 USD\n";
2665 assert_eq!(
2666 format_source(src),
2667 "2024-01-15 balance Assets:Cash 100.00 USD ~ 0.01 USD\n"
2668 );
2669 }
2670
2671 #[test]
2672 fn balance_arithmetic_canonical() {
2673 let src = "2024-01-15 balance Assets:Cash 0.25 + 0.75 USD\n";
2674 assert_eq!(
2675 format_source(src),
2676 "2024-01-15 balance Assets:Cash 0.25 + 0.75 USD\n"
2677 );
2678 }
2679
2680 #[test]
2681 fn custom_canonical() {
2682 let src = "2024-01-01 custom \"budget\" Expenses:Food 500.00 USD\n";
2683 assert_eq!(
2684 format_source(src),
2685 "2024-01-01 custom \"budget\" Expenses:Food 500.00 USD\n"
2686 );
2687 }
2688
2689 #[test]
2690 fn option_canonical() {
2691 let src = "option \"title\" \"My Ledger\"\n";
2692 assert_eq!(format_source(src), "option \"title\" \"My Ledger\"\n");
2693 }
2694
2695 #[test]
2696 fn include_canonical() {
2697 let src = "include \"other.beancount\"\n";
2698 assert_eq!(format_source(src), "include \"other.beancount\"\n");
2699 }
2700
2701 #[test]
2702 fn plugin_canonical_with_config() {
2703 let src = "plugin \"beancount.plugins.unrealized\" \"Unrealized\"\n";
2704 assert_eq!(
2705 format_source(src),
2706 "plugin \"beancount.plugins.unrealized\" \"Unrealized\"\n"
2707 );
2708 }
2709
2710 #[test]
2711 fn plugin_canonical_without_config() {
2712 let src = "plugin \"my.plugin\"\n";
2713 assert_eq!(format_source(src), "plugin \"my.plugin\"\n");
2714 }
2715
2716 #[test]
2717 fn pushtag_poptag_canonical() {
2718 // No blank line in the source — preserved as grouped (#1325).
2719 let src = "pushtag #active\npoptag #active\n";
2720 assert_eq!(format_source(src), "pushtag #active\npoptag #active\n");
2721 }
2722
2723 #[test]
2724 fn pushmeta_popmeta_canonical() {
2725 // No blank line in the source — preserved as grouped (#1325).
2726 let src = "pushmeta location: \"NYC\"\npopmeta location:\n";
2727 assert_eq!(
2728 format_source(src),
2729 "pushmeta location: \"NYC\"\npopmeta location:\n"
2730 );
2731 }
2732
2733 // ---- Transaction tests ------------------------------------
2734
2735 #[test]
2736 fn transaction_minimal_two_postings_aligns_amounts() {
2737 let src = "\
27382024-01-15 * \"Coffee\"
2739 Assets:Cash -5.00 USD
2740 Expenses:Coffee 5.00 USD
2741";
2742 // max LHS = 15 (Expenses:Coffee); number_col = 17.
2743 // max number width = 6 (`-5.00`); number_width = 6.
2744 // Posting 1: account end at col 13, pad 4 → `-5.00` (width 6,
2745 // no left-pad) → currency at col 24.
2746 // Posting 2: account end at col 17, pad 2 → ` 5.00` (width
2747 // 5 left-padded by 1) → currency at col 24.
2748 let expected = "\
27492024-01-15 * \"Coffee\"
2750 Assets:Cash -5.00 USD
2751 Expenses:Coffee 5.00 USD
2752";
2753 assert_eq!(format_source(src), expected);
2754 }
2755
2756 /// Regression for #1290: an amount-less posting (the common elided
2757 /// balancing leg) must NOT widen the number column, even when its
2758 /// account is longer than every amount-bearing account. `bean-format`
2759 /// computes the column only from number-bearing lines, so counting
2760 /// `Expenses:Food` here would make `rledger format` and `bean-format`
2761 /// disagree and never converge on round-trip.
2762 #[test]
2763 fn transaction_elided_posting_does_not_widen_amount_column() {
2764 let src = "\
27652024-01-15 * \"Coffee\"
2766 Assets:Cash -5.00 USD
2767 Expenses:Food
2768";
2769 // Only Assets:Cash (11) bears an amount; Expenses:Food (13) is
2770 // elided and is ignored for alignment. number_col = 2+11+2 = 15.
2771 let expected = "\
27722024-01-15 * \"Coffee\"
2773 Assets:Cash -5.00 USD
2774 Expenses:Food
2775";
2776 assert_eq!(format_source(src), expected);
2777 // Idempotent: re-formatting the output is a no-op.
2778 assert_eq!(format_source(expected), expected);
2779 }
2780
2781 /// Regression for #1290 using the reporter's exact fixture: a long
2782 /// elided account (`Expenses:Thingamabobs`) alongside a short
2783 /// amount-bearing one (`Assets:Money`). Pre-fix the number was
2784 /// pushed right to clear the long account; `bean-format` keeps it
2785 /// two spaces after `Assets:Money`. Also confirms the thousands
2786 /// separator is stripped.
2787 #[test]
2788 fn transaction_long_elided_account_matches_bean_format() {
2789 let src = "\
27902024-07-20 * \"Commas should stay\"
2791 Assets:Money -1,024 USD
2792 Expenses:Thingamabobs
2793";
2794 let expected = "\
27952024-07-20 * \"Commas should stay\"
2796 Assets:Money -1024 USD
2797 Expenses:Thingamabobs
2798";
2799 assert_eq!(format_source(src), expected);
2800 assert_eq!(format_source(expected), expected);
2801 }
2802
2803 /// Regression for the currency-only gap (#1307, found in review): a
2804 /// currency-only posting (`... USD`, no number) renders no number,
2805 /// so — like an elided posting — it must not widen the alignment
2806 /// column even when its account is the longest. Only `Assets:Bank`
2807 /// bears a number here, so the number stays two spaces after it. The
2808 /// assertion checks the numbered line directly, independent of how
2809 /// the currency-only line itself renders.
2810 #[test]
2811 fn transaction_currency_only_posting_does_not_widen_amount_column() {
2812 let out = format_source(
2813 "2024-01-15 * \"x\"\n Assets:Bank -5.00 USD\n Assets:LongCashReserve USD\n",
2814 );
2815 assert!(
2816 out.contains(" Assets:Bank -5.00 USD"),
2817 "number column must align to the numbered posting, not the longer \
2818 currency-only one; got:\n{out}"
2819 );
2820 }
2821
2822 #[test]
2823 fn transaction_payee_and_narration() {
2824 let src =
2825 "2024-01-15 * \"Starbucks\" \"Coffee\"\n Assets:Cash -5.00 USD\n Expenses:Coffee\n";
2826 let out = format_source(src);
2827 assert!(
2828 out.contains("2024-01-15 * \"Starbucks\" \"Coffee\"\n"),
2829 "got: {out}"
2830 );
2831 }
2832
2833 #[test]
2834 fn transaction_pending_flag() {
2835 let src = "2024-01-15 ! \"Pending\"\n Assets:Cash -5.00 USD\n Expenses:Misc\n";
2836 let out = format_source(src);
2837 assert!(out.starts_with("2024-01-15 ! \"Pending\"\n"), "got: {out}");
2838 }
2839
2840 #[test]
2841 fn transaction_txn_keyword_normalized_to_star() {
2842 // The `txn` keyword form is canonical-form equivalent to `*`.
2843 let src = "2024-01-15 txn \"x\"\n Assets:Cash -1.00 USD\n Expenses:Misc\n";
2844 let out = format_source(src);
2845 assert!(out.starts_with("2024-01-15 * \"x\"\n"), "got: {out}");
2846 }
2847
2848 #[test]
2849 fn transaction_header_tags_and_links() {
2850 let src =
2851 "2024-01-15 * \"x\" #tag1 ^link1 #tag2\n Assets:Cash -1.00 USD\n Expenses:Misc\n";
2852 let out = format_source(src);
2853 assert!(
2854 out.starts_with("2024-01-15 * \"x\" #tag1 ^link1 #tag2\n"),
2855 "got: {out}"
2856 );
2857 }
2858
2859 #[test]
2860 fn transaction_auto_balance_posting_no_amount() {
2861 let src = "2024-01-15 * \"x\"\n Assets:Cash -5.00 USD\n Expenses:Misc\n";
2862 let out = format_source(src);
2863 // The auto-balance posting has no amount; should just be
2864 // the indented account name.
2865 assert!(out.contains("\n Expenses:Misc\n"), "got: {out}");
2866 }
2867
2868 #[test]
2869 fn transaction_posting_with_cost_spec() {
2870 let src = "2024-01-15 * \"buy\"\n Assets:Brokerage 10 HOOL {500.00 USD}\n Assets:Cash -5000.00 USD\n";
2871 let out = format_source(src);
2872 assert!(out.contains("10 HOOL {500.00 USD}"), "got: {out}");
2873 }
2874
2875 #[test]
2876 fn transaction_posting_with_total_cost_spec() {
2877 let src = "2024-01-15 * \"buy\"\n Assets:Brokerage 10 HOOL {{5000.00 USD}}\n Assets:Cash -5000.00 USD\n";
2878 let out = format_source(src);
2879 assert!(out.contains("10 HOOL {{5000.00 USD}}"), "got: {out}");
2880 }
2881
2882 #[test]
2883 fn transaction_posting_with_per_unit_price() {
2884 let src = "2024-01-15 * \"buy\"\n Assets:Brokerage 10 HOOL @ 500.00 USD\n Assets:Cash -5000.00 USD\n";
2885 let out = format_source(src);
2886 assert!(out.contains("10 HOOL @ 500.00 USD"), "got: {out}");
2887 }
2888
2889 #[test]
2890 fn transaction_posting_with_total_price() {
2891 let src = "2024-01-15 * \"buy\"\n Assets:Brokerage 10 HOOL @@ 5000.00 USD\n Assets:Cash -5000.00 USD\n";
2892 let out = format_source(src);
2893 assert!(out.contains("10 HOOL @@ 5000.00 USD"), "got: {out}");
2894 }
2895
2896 #[test]
2897 fn transaction_posting_with_flag() {
2898 let src = "2024-01-15 * \"x\"\n ! Assets:Cash -5.00 USD\n Expenses:Misc 5.00 USD\n";
2899 let out = format_source(src);
2900 assert!(out.contains("\n ! Assets:Cash"), "got: {out}");
2901 }
2902
2903 #[test]
2904 fn transaction_negative_amount() {
2905 let src = "2024-01-15 * \"x\"\n Assets:Cash -5.00 USD\n Expenses:Misc 5.00 USD\n";
2906 let out = format_source(src);
2907 assert!(out.contains("-5.00 USD"), "got: {out}");
2908 assert!(out.contains(" 5.00 USD"), "got: {out}");
2909 }
2910
2911 #[test]
2912 fn transaction_strips_thousands_separators_in_postings() {
2913 let src = "2024-01-15 * \"x\"\n Assets:Cash -1,000.00 USD\n Expenses:Misc 1,000.00 USD\n";
2914 let out = format_source(src);
2915 assert!(out.contains("-1000.00 USD"), "got: {out}");
2916 assert!(!out.contains("1,000"), "got: {out}");
2917 }
2918
2919 #[test]
2920 fn transaction_arithmetic_amount() {
2921 let src =
2922 "2024-01-15 * \"x\"\n Assets:Cash -(1.00 + 2.00) USD\n Expenses:Misc 3.00 USD\n";
2923 let out = format_source(src);
2924 // The arithmetic expression should render with single
2925 // spaces around binary ops and tight parens.
2926 assert!(
2927 out.contains("(1.00 + 2.00) USD") || out.contains("-(1.00 + 2.00) USD"),
2928 "got: {out}"
2929 );
2930 }
2931
2932 #[test]
2933 fn transaction_idempotent() {
2934 let src = "\
29352024-01-15 * \"Coffee\"
2936 Assets:Cash -5.00 USD
2937 Expenses:Coffee 5.00 USD
2938";
2939 let once = format_source(src);
2940 let twice = format_source(&once);
2941 assert_eq!(once, twice);
2942 }
2943
2944 #[test]
2945 fn transaction_file_wide_alignment_across_transactions() {
2946 let src = "\
29472024-01-15 * \"x\"
2948 Assets:Cash -5.00 USD
2949 Expenses:Misc 5.00 USD
2950
29512024-01-16 * \"y\"
2952 Liabilities:CreditCard:Visa -100.00 USD
2953 Expenses:Big 100.00 USD
2954";
2955 let out = format_source(src);
2956 // Cross-posting invariant: the currency column (USD here)
2957 // lands at the same column on every posting line, even when
2958 // individual numbers differ in width or sign. The number
2959 // field is right-justified so the currency column is uniform.
2960 let usd_cols: Vec<usize> = out
2961 .lines()
2962 .filter(|l| l.starts_with(" ") && l.contains(" USD"))
2963 .filter_map(|l| l.find("USD"))
2964 .collect();
2965 assert!(
2966 usd_cols.len() >= 4,
2967 "expected ≥4 posting lines, got {usd_cols:?} in {out}"
2968 );
2969 let first = usd_cols[0];
2970 assert!(
2971 usd_cols.iter().all(|&c| c == first),
2972 "expected USD column uniform at {first}, got {usd_cols:?} in:\n{out}"
2973 );
2974 }
2975
2976 #[test]
2977 fn transaction_posting_metadata_indented_four() {
2978 let src =
2979 "2024-01-15 * \"x\"\n Assets:Cash -5.00 USD\n foo: \"bar\"\n Expenses:Misc\n";
2980 let out = format_source(src);
2981 assert!(out.contains("\n foo: \"bar\"\n"), "got: {out}");
2982 }
2983
2984 // ---- Code-review regression tests -----------------------------
2985 //
2986 // Each test pins a bug surfaced by the high-effort code review of
2987 // PR #1284 and verified at runtime against the unfixed formatter.
2988
2989 #[test]
2990 fn cost_spec_per_unit_plus_total_opener_preserved() {
2991 // Bug: format_cost_spec only branched on is_total() and emitted
2992 // `{` for the `{#` opener too, dropping the `#` marker and
2993 // changing semantics from per-unit-plus-total to plain
2994 // per-unit cost.
2995 let src = "2024-01-01 * \"buy\"\n Assets:Brokerage 10 HOOL {# 500.00 USD}\n Assets:Cash -5000.00 USD\n";
2996 let out = format_source(src);
2997 assert!(
2998 out.contains("{# 500.00 USD}"),
2999 "expected `{{#` opener preserved; got:\n{out}"
3000 );
3001 assert!(!out.contains("{500.00 USD}"), "got:\n{out}");
3002 }
3003
3004 #[test]
3005 fn cost_spec_comma_stays_tight_to_prev_token() {
3006 // Bug: format_cost_spec's catch-all arm inserted a space
3007 // before every non-trivia token including COMMA, producing
3008 // `{500.00 USD , 2024-01-15}` instead of the canonical
3009 // `{500.00 USD, 2024-01-15}`.
3010 let src = "2024-01-01 * \"buy\"\n Assets:Brokerage 10 HOOL {500.00 USD, 2024-01-15}\n Assets:Cash -5000.00 USD\n";
3011 let out = format_source(src);
3012 assert!(
3013 out.contains("{500.00 USD, 2024-01-15}"),
3014 "comma must stay tight to USD; got:\n{out}"
3015 );
3016 assert!(
3017 !out.contains("USD ,"),
3018 "no space allowed before comma; got:\n{out}"
3019 );
3020 }
3021
3022 #[test]
3023 fn custom_directive_preserves_date_value_arguments() {
3024 // Bug: emit_custom's post-seen_type match skipped every DATE
3025 // token, silently dropping legitimate date-typed value
3026 // arguments. The leading directive date is already skipped
3027 // via the seen_type=false phase.
3028 let src = "2024-01-01 custom \"budget\" \"name\" 2024-06-15 100.00 USD\n";
3029 let out = format_source(src);
3030 assert!(
3031 out.contains("2024-06-15"),
3032 "value-position DATE must survive; got: {out}"
3033 );
3034 }
3035
3036 #[test]
3037 fn file_level_adjacent_comments_stay_tight() {
3038 // Bug: format_node's top-level walk inserted a blank `\n`
3039 // separator before every emitted item including comments,
3040 // breaking section-header blocks like `; ====\n; HEADER\n; ====`
3041 // by injecting blanks between every adjacent comment line.
3042 let src = "; ====\n; HEADER\n; ====\n2024-01-01 open Assets:A\n";
3043 let expected = "; ====\n; HEADER\n; ====\n2024-01-01 open Assets:A\n";
3044 assert_eq!(format_source(src), expected);
3045 }
3046
3047 #[test]
3048 fn metadata_internal_whitespace_normalized() {
3049 // Bug: emit_meta_entries_of passed META_ENTRY source text
3050 // through verbatim, so `foo: "bar"` and `foo: "bar"` —
3051 // identical typed ASTs — produced different formatter
3052 // output, violating the gofmt-style invariant the rustdoc
3053 // declares.
3054 let a = "2024-01-01 open Assets:Bank\n starting: \"foo\"\n";
3055 let b = "2024-01-01 open Assets:Bank\n starting: \"foo\"\n";
3056 assert_eq!(format_source(a), format_source(b));
3057 }
3058
3059 #[test]
3060 fn metadata_number_thousands_separator_stripped() {
3061 // Same invariant: numbers inside metadata values share the
3062 // canonical thousands-separator policy with posting numbers
3063 // (otherwise the same file would emit inconsistent numeric
3064 // forms in postings vs. metadata).
3065 let src = "2024-01-01 open Assets:Bank\n starting_balance: 1,000.00 USD\n";
3066 let out = format_source(src);
3067 assert!(
3068 out.contains("1000.00 USD"),
3069 "thousands-sep should strip in metadata too; got: {out}"
3070 );
3071 assert!(!out.contains("1,000"), "got: {out}");
3072 }
3073
3074 #[test]
3075 fn bare_cr_line_endings_normalized_to_lf_before_parse() {
3076 // Bug: the lexer doesn't treat bare CR as a line terminator,
3077 // so a classic-Mac-authored `directive\r…\rdirective\r`
3078 // parsed as one broken directive and the rest were silently
3079 // dropped. format_source normalizes line endings BEFORE
3080 // parsing so bare CR (and CRLF) are treated as LF.
3081 let src = "2024-01-01 open Assets:A\r2024-01-02 open Assets:B\r";
3082 let out = format_source(src);
3083 assert!(
3084 out.contains("2024-01-01 open Assets:A"),
3085 "first directive lost: {out:?}"
3086 );
3087 assert!(
3088 out.contains("2024-01-02 open Assets:B"),
3089 "second directive lost on bare-CR input: {out:?}"
3090 );
3091 }
3092
3093 #[test]
3094 fn crlf_input_canonicalizes_to_lf() {
3095 // CRLF and bare CR both fold to LF on the way through the
3096 // canonical pass (the canonical form is LF-only).
3097 let src = "2024-01-01 open Assets:A\r\n2024-01-02 open Assets:B\r\n";
3098 let out = format_source(src);
3099 assert!(
3100 !out.contains('\r'),
3101 "canonical output must be LF-only: {out:?}"
3102 );
3103 assert!(out.contains("2024-01-01 open Assets:A\n"), "got: {out:?}");
3104 assert!(out.contains("2024-01-02 open Assets:B\n"), "got: {out:?}");
3105 }
3106
3107 #[test]
3108 fn metadata_value_with_unary_minus_stays_tight() {
3109 // Bug: emit_meta_entry's tokenized walk inserted a space
3110 // after a unary `+`/`-`, breaking `key: -5.00 USD` →
3111 // `key: - 5.00 USD`. Routed through write_canonical_token_sequence
3112 // so unary detection matches the balance/price/posting paths.
3113 let src = "2024-01-01 open Assets:Bank\n threshold: -5.00 USD\n";
3114 let out = format_source(src);
3115 assert!(
3116 out.contains("threshold: -5.00 USD"),
3117 "unary minus must stay tight in metadata; got: {out}"
3118 );
3119 assert!(
3120 !out.contains("- 5.00"),
3121 "no space after unary minus; got: {out}"
3122 );
3123 }
3124
3125 #[test]
3126 fn metadata_value_with_unary_plus_stays_tight() {
3127 let src = "2024-01-01 open Assets:Bank\n min: +1.00 USD\n";
3128 let out = format_source(src);
3129 assert!(out.contains("min: +1.00 USD"), "got: {out}");
3130 assert!(!out.contains("+ 1.00"), "got: {out}");
3131 }
3132
3133 #[test]
3134 fn cost_spec_negative_cost_stays_tight() {
3135 // Bug: format_cost_spec catch-all had no unary-operator
3136 // handling. `{-500 USD}` formatted to `{- 500 USD}`. Now
3137 // routes through write_canonical_token_sequence.
3138 let src = "2024-01-01 * \"x\"\n Assets:Brokerage 10 HOOL {-500 USD}\n Assets:Cash -5000.00 USD\n";
3139 let out = format_source(src);
3140 assert!(
3141 out.contains("{-500 USD}"),
3142 "negative cost spec must stay tight; got:\n{out}"
3143 );
3144 assert!(!out.contains("{- "), "got:\n{out}");
3145 }
3146
3147 #[test]
3148 fn cost_spec_arithmetic_with_unary_stays_tight() {
3149 // `{500 * -2 USD}` formerly emitted `{500 * - 2 USD}` because
3150 // the cost-spec catch-all didn't understand unary +/-.
3151 let src = "2024-01-01 * \"x\"\n Assets:Brokerage 10 HOOL {500 * -2 USD}\n Assets:Cash -1000.00 USD\n";
3152 let out = format_source(src);
3153 assert!(
3154 out.contains("{500 * -2 USD}"),
3155 "cost-spec arithmetic unary must stay tight; got:\n{out}"
3156 );
3157 }
3158
3159 // ---- Property tests -------------------------------------------
3160 //
3161 // Two invariants the rustdoc's gofmt-style promise depends on,
3162 // pinned over a hand-curated input matrix:
3163 //
3164 // - **Idempotence:** `format_source(format_source(x)) == format_source(x)`.
3165 // - **Round-trip stability for canonicalize_directives:** the
3166 // synthesize-then-canonicalize shim produces text that, when
3167 // parsed back, yields the same Directive count and zero parse
3168 // errors.
3169 //
3170 // The matrix covers every directive kind plus the high-risk
3171 // edge cases the prior reviews surfaced (unary +/- in metadata,
3172 // cost-spec arithmetic, CRLF, bare CR, multi-line strings,
3173 // comments containing quotes, non-Latin accounts). When the
3174 // upstream compatibility corpus is fetched into
3175 // `tests/compatibility/files/` the per-file sweep at the bottom
3176 // also runs; otherwise the file-based test is skipped.
3177
3178 const IDEMPOTENCE_MATRIX: &[(&str, &str)] = &[
3179 ("empty", ""),
3180 ("only_comment", "; header comment\n"),
3181 ("only_directive", "2024-01-01 open Assets:Cash\n"),
3182 (
3183 "two_open_directives",
3184 "2024-01-01 open Assets:A\n2024-01-02 open Assets:B\n",
3185 ),
3186 (
3187 "transaction_with_cost_and_price",
3188 "2024-01-15 * \"buy\"\n Assets:Brokerage 10 HOOL {500.00 USD} @ 510.00 USD\n Assets:Cash -5000.00 USD\n",
3189 ),
3190 (
3191 "transaction_with_per_unit_plus_total_cost",
3192 "2024-01-15 * \"x\"\n Assets:Brokerage 10 HOOL {# 500.00 USD}\n Assets:Cash -5000.00 USD\n",
3193 ),
3194 (
3195 "transaction_with_arithmetic_amount",
3196 "2024-01-15 * \"x\"\n Assets:Cash -(1.00 + 2.00) USD\n Expenses:Misc 3.00 USD\n",
3197 ),
3198 (
3199 "balance_with_arithmetic_and_tolerance",
3200 "2024-01-15 balance Assets:Cash 0.25 + 0.75 USD ~ 0.01 USD\n",
3201 ),
3202 // Regression for Copilot #2: a previous emit_amount_expression
3203 // skipped tokens until the first NUMBER, which dropped a
3204 // leading unary `-` and silently flipped the sign — a
3205 // balance assertion that asserted a debit would assert a
3206 // credit after a round-trip. These fixtures pin the
3207 // sign / paren preservation explicitly.
3208 (
3209 "balance_leading_unary_minus",
3210 "2024-01-15 balance Assets:A -1.00 USD\n",
3211 ),
3212 (
3213 "balance_leading_parenthesized_expression",
3214 "2024-01-15 balance Assets:A (1 + 2) USD\n",
3215 ),
3216 (
3217 "price_leading_unary_minus",
3218 "2024-01-15 price USD -1.00 EUR\n",
3219 ),
3220 (
3221 "price_with_thousands_separator",
3222 "2024-01-15 price USD 1,234.56 EUR\n",
3223 ),
3224 (
3225 "metadata_unary_minus",
3226 "2024-01-01 open Assets:Bank\n threshold: -5.00 USD\n",
3227 ),
3228 (
3229 "metadata_arithmetic",
3230 "2024-01-01 open Assets:Bank\n total: 1000 + 500 USD\n",
3231 ),
3232 (
3233 "cost_spec_with_comma_and_date",
3234 "2024-01-15 * \"x\"\n Assets:Brokerage 10 HOOL {500.00 USD, 2024-01-15}\n Assets:Cash -5000.00 USD\n",
3235 ),
3236 (
3237 "cost_spec_with_negative",
3238 "2024-01-15 * \"x\"\n Assets:Brokerage 10 HOOL {-500 USD}\n Assets:Cash 5000.00 USD\n",
3239 ),
3240 (
3241 "transaction_with_tags_and_links",
3242 "2024-01-15 * \"x\" #tag1 ^link1 #tag2\n Assets:Cash -1.00 USD\n Expenses:Misc 1.00 USD\n",
3243 ),
3244 (
3245 "custom_with_date_value",
3246 "2024-01-01 custom \"budget\" \"name\" 2024-06-15 100.00 USD\n",
3247 ),
3248 (
3249 "non_latin_account_name",
3250 "2024-01-15 * \"x\"\n Активы:Банк -5.00 USD\n Expenses:Misc 5.00 USD\n",
3251 ),
3252 (
3253 "section_header_comments",
3254 "; ====\n; HEADER\n; ====\n2024-01-01 open Assets:A\n",
3255 ),
3256 (
3257 "multiline_note_string",
3258 "2024-01-15 note Assets:Bank \"line 1\nline 2\"\n",
3259 ),
3260 (
3261 "comment_containing_quote",
3262 "; comment with \"a quote\n2024-01-01 open Assets:A\n",
3263 ),
3264 (
3265 "crlf_input",
3266 "2024-01-01 open Assets:A\r\n2024-01-02 open Assets:B\r\n",
3267 ),
3268 (
3269 "bare_cr_input",
3270 "2024-01-01 open Assets:A\r2024-01-02 open Assets:B\r",
3271 ),
3272 (
3273 "file_with_trailing_newlines",
3274 "2024-01-01 open Assets:A\n\n\n",
3275 ),
3276 ("file_without_trailing_newline", "2024-01-01 open Assets:A"),
3277 // Regression for Copilot #1: collect_trailing_comment
3278 // previously returned None for a directive with no
3279 // header-terminating NEWLINE token, which silently dropped
3280 // a same-line trailing comment at EOF when the file lacked
3281 // a trailing newline. The canonical formatter restores the
3282 // trailing newline, but the dropped comment was already
3283 // gone.
3284 (
3285 "trailing_comment_no_final_newline",
3286 "2024-01-15 open Assets:A ; trailing",
3287 ),
3288 (
3289 "posting_with_trailing_comment",
3290 "2024-01-15 * \"x\"\n Assets:Cash -5.00 USD ; pocket\n Expenses:Misc 5.00 USD\n",
3291 ),
3292 (
3293 "balance_assertion_with_meta",
3294 "2024-01-15 balance Assets:Cash 100.00 USD\n source: \"bank\"\n",
3295 ),
3296 (
3297 "options_and_includes",
3298 "option \"title\" \"My Ledger\"\ninclude \"sub.beancount\"\nplugin \"my.plugin\" \"cfg\"\n",
3299 ),
3300 // ---- per-variant coverage ---------------------------------
3301 ("close_directive", "2024-12-31 close Assets:Cash\n"),
3302 ("commodity_directive", "2024-01-01 commodity HOOL\n"),
3303 ("note_directive", "2024-01-15 note Assets:Cash \"a note\"\n"),
3304 ("event_directive", "2024-01-15 event \"location\" \"NYC\"\n"),
3305 (
3306 "query_directive",
3307 "2024-01-15 query \"q1\" \"SELECT account\"\n",
3308 ),
3309 ("pad_directive", "2024-01-15 pad Assets:A Equity:Opening\n"),
3310 (
3311 "document_directive",
3312 "2024-06-01 document Assets:Bank \"stmt.pdf\" #q1\n",
3313 ),
3314 // Note: `#!` and `#+` anywhere on a line, not just at
3315 // line start, open the lexer's SHEBANG / EMACS_DIRECTIVE
3316 // tokens. The fixture places `#+` mid-line and tails it
3317 // with an unbalanced `"`: an incorrect state machine that
3318 // gated the opener on `at_line_start` would stay in Code
3319 // when it hit the `#+`, then flip to InString on the next
3320 // `"` and trap there for the remainder of the file. The
3321 // lexer-agreement property test catches that divergence,
3322 // and the round-trip body runs too because the parser
3323 // treats the mid-line EMACS_DIRECTIVE as same-line
3324 // trailing trivia under the directive-terminator rule.
3325 (
3326 "emacs_directive_mid_line_with_quote",
3327 "2024-01-15 open Assets:A #+stray \"q\n",
3328 ),
3329 ("pushtag_directive", "pushtag #active\n"),
3330 ("poptag_directive", "poptag #active\n"),
3331 ("pushmeta_directive", "pushmeta location: \"NYC\"\n"),
3332 ("popmeta_directive", "popmeta location:\n"),
3333 ];
3334
3335 /// Number of fixtures in [`IDEMPOTENCE_MATRIX`] that legitimately
3336 /// produce zero typed directives — comment-only / empty /
3337 /// pragma-only inputs. The round-trip property test skips these
3338 /// (they have nothing to emit), but every OTHER fixture MUST
3339 /// exercise the body. Bumping this constant when adding such a
3340 /// fixture is the only manual maintenance the coverage floor
3341 /// needs; otherwise the floor (`IDEMPOTENCE_MATRIX.len() -
3342 /// ROUNDTRIP_KNOWN_ZERO_DIRECTIVE_FIXTURES`) tracks the matrix
3343 /// automatically.
3344 ///
3345 /// Today's zero-directive fixtures (skipped by the round-trip
3346 /// body), verified by an exhaustive probe against the live
3347 /// parser:
3348 ///
3349 /// - `empty`, `only_comment` — no directives at all.
3350 /// - `bare_cr_input` — the parser does not recognize bare CR
3351 /// (without a following LF) as a directive terminator, so
3352 /// the file's two would-be directives never surface as
3353 /// structured tokens. The fixture's purpose is the
3354 /// line-ending state-machine pass, not the round-trip body.
3355 /// - `pushtag_directive`, `poptag_directive`,
3356 /// `pushmeta_directive`, `popmeta_directive` — pragma
3357 /// directives don't surface as `Directive` variants on the
3358 /// typed-AST side (the parser also rejects them today, so
3359 /// they produce parse errors and the skip-on-errors guard
3360 /// triggers).
3361 /// - `options_and_includes` — option / include / plugin lines
3362 /// live on separate `ParseResult` collections, not on
3363 /// `.directives`.
3364 ///
3365 /// Note: `comment_containing_quote` and
3366 /// `emacs_directive_mid_line_with_quote` BOTH exercise the
3367 /// body — each is paired with a parseable directive on the
3368 /// same line or an adjacent line, and the trivia token
3369 /// (comment / `EMACS_DIRECTIVE`) attaches as same-line or
3370 /// inter-directive trivia under the directive-terminator
3371 /// rule. Their purpose is the state-machine / lexer agreement
3372 /// property on a comment with an unbalanced `"`, not the
3373 /// zero-directive case.
3374 const ROUNDTRIP_KNOWN_ZERO_DIRECTIVE_FIXTURES: usize = 8;
3375
3376 #[test]
3377 fn lf_to_crlf_outside_strings_preserves_string_interior() {
3378 // Bug: a flat in_string-only state machine would re-inject
3379 // CRLF inside multi-line strings, mutating the user's bytes.
3380 let s = "2024-01-15 note Assets:Bank \"line 1\nline 2\"\n";
3381 let out = lf_to_crlf_outside_strings(s);
3382 assert!(out.contains("line 1\nline 2"), "got: {out:?}");
3383 assert!(out.ends_with("\r\n"), "got: {out:?}");
3384 }
3385
3386 #[test]
3387 fn lf_to_crlf_outside_strings_handles_comment_with_quote() {
3388 // Bug: an unbalanced `"` inside a `;` comment formerly flipped
3389 // in_string=true for the rest of the file, leaving every
3390 // subsequent newline as LF.
3391 let s = "; comment with \"a quote\n2024-01-01 open Assets:A\n";
3392 let out = lf_to_crlf_outside_strings(s);
3393 assert_eq!(
3394 out,
3395 "; comment with \"a quote\r\n2024-01-01 open Assets:A\r\n",
3396 );
3397 }
3398
3399 #[test]
3400 fn lf_to_crlf_outside_strings_handles_percent_comment_with_quote() {
3401 let s = "% percent \"quote\n2024-01-01 open Assets:A\n";
3402 let out = lf_to_crlf_outside_strings(s);
3403 assert_eq!(out, "% percent \"quote\r\n2024-01-01 open Assets:A\r\n");
3404 }
3405
3406 #[test]
3407 fn crlf_to_lf_preserves_crlf_inside_strings() {
3408 // Bug fix mirror: a Windows-authored multi-line string had
3409 // its CRLF folded to LF by the pre-parse normalizer too,
3410 // which silently mutated the user's bytes.
3411 let s = "2024-01-15 note Assets:Bank \"line1\r\nline2\"\r\n";
3412 let normalized = crlf_to_lf_outside_strings(s);
3413 // Outside the string, the trailing CRLF folds to LF; inside
3414 // the string, CRLF stays CRLF (user's bytes preserved).
3415 assert!(
3416 normalized.contains("\"line1\r\nline2\""),
3417 "got: {:?}",
3418 &*normalized
3419 );
3420 assert!(normalized.ends_with('\n') && !normalized.ends_with("\r\n"));
3421 }
3422
3423 #[test]
3424 fn idempotence_matrix() {
3425 // The gofmt invariant in the file rustdoc: f(f(x)) == f(x)
3426 // on every accepted input. Each fixture below covers one
3427 // axis of the canonical-form spec; together they exercise
3428 // every directive kind and every spacing rule shared via
3429 // write_canonical_token_sequence.
3430 for (name, src) in IDEMPOTENCE_MATRIX {
3431 let once = format_source(src);
3432 let twice = format_source(&once);
3433 assert_eq!(
3434 once, twice,
3435 "idempotence broken on fixture `{name}`\n--- once ---\n{once}\n--- twice ---\n{twice}",
3436 );
3437 }
3438 }
3439
3440 #[test]
3441 fn canonicalize_directives_roundtrips_every_synthesized_directive() {
3442 // For each canonical-form fixture: parse → take the typed
3443 // directives → run them through canonicalize_directives →
3444 // re-parse the canonical text → assert the parser reports
3445 // zero errors and the directive count is preserved.
3446 //
3447 // This is the proper end-to-end test of the two-pass shim
3448 // the FFI format.entry and rledger add/extract commands all
3449 // depend on. Without it, a future Directive variant added
3450 // to rustledger-core without matching coverage in
3451 // cst::format would silently round-trip to truncated text.
3452 //
3453 // Counter + assertion guards against silent-skip: if the
3454 // guard at the top of the loop ever filters too many
3455 // fixtures (e.g. a parser regression that drops directives
3456 // from previously-clean fixtures), the test fails instead
3457 // of silently passing with zero coverage.
3458 use rustledger_core::format::FormatConfig;
3459 let cfg = FormatConfig::default();
3460 let mut exercised = 0usize;
3461 for (name, src) in IDEMPOTENCE_MATRIX {
3462 let parsed = crate::parse(src);
3463 if parsed.errors.is_empty() && !parsed.directives.is_empty() {
3464 let dirs: Vec<&rustledger_core::Directive> =
3465 parsed.directives.iter().map(|s| &s.value).collect();
3466 let formatted = super::canonicalize_directives(dirs.iter().copied(), &cfg)
3467 .unwrap_or_else(|e| {
3468 panic!("canonicalize_directives error on fixture `{name}`: {e}")
3469 });
3470 let reparsed = crate::parse(&formatted);
3471 assert!(
3472 reparsed.errors.is_empty(),
3473 "round-trip parse errors on fixture `{name}`:\n--- formatted ---\n{formatted}\n--- errors ---\n{:?}",
3474 reparsed.errors,
3475 );
3476 assert_eq!(
3477 parsed.directives.len(),
3478 reparsed.directives.len(),
3479 "directive count drifted on fixture `{name}`\n--- formatted ---\n{formatted}",
3480 );
3481 exercised += 1;
3482 }
3483 }
3484 let expected = IDEMPOTENCE_MATRIX
3485 .len()
3486 .saturating_sub(ROUNDTRIP_KNOWN_ZERO_DIRECTIVE_FIXTURES);
3487 assert!(
3488 exercised >= expected,
3489 "only {exercised} fixtures exercised the round-trip body, \
3490 expected at least {expected} (= IDEMPOTENCE_MATRIX.len() - \
3491 {ROUNDTRIP_KNOWN_ZERO_DIRECTIVE_FIXTURES}). A parser \
3492 regression or a broken fixture is silently dropping coverage."
3493 );
3494 }
3495
3496 /// `SHEBANG` / `EMACS_DIRECTIVE` lines (`#!…` / `#+…` at line
3497 /// start) also count as comments for the LSP-CRLF state
3498 /// machine. A stray quote inside such a line used to flip
3499 /// `in_string=true` for the rest of the file just like the
3500 /// `;` / `%` comment case the round-3 fix covered.
3501 #[test]
3502 fn lf_to_crlf_outside_strings_handles_emacs_directive_with_quote() {
3503 let s = "#+title: \"My Book\n2024-01-01 open Assets:A\n";
3504 let out = lf_to_crlf_outside_strings(s);
3505 assert_eq!(out, "#+title: \"My Book\r\n2024-01-01 open Assets:A\r\n");
3506 }
3507
3508 #[test]
3509 fn lf_to_crlf_outside_strings_handles_shebang_with_quote() {
3510 let s = "#!shebang \"quote\n2024-01-01 open Assets:A\n";
3511 let out = lf_to_crlf_outside_strings(s);
3512 assert_eq!(out, "#!shebang \"quote\r\n2024-01-01 open Assets:A\r\n");
3513 }
3514
3515 /// `#` NOT at line start is a TAG / HASH token; the state
3516 /// machine must NOT treat it as a comment opener.
3517 #[test]
3518 fn lf_to_crlf_outside_strings_hash_mid_line_is_not_comment() {
3519 let s = "2024-01-15 * \"x\" #tag1\n Assets:A 1 USD\n";
3520 let out = lf_to_crlf_outside_strings(s);
3521 // Every LF outside strings becomes CRLF — including the
3522 // one ending the tag-bearing line.
3523 assert!(out.contains("#tag1\r\n"), "got: {out:?}");
3524 assert!(out.ends_with("\r\n"), "got: {out:?}");
3525 }
3526
3527 /// Regression for Copilot #2 inline review on PR #1284: a
3528 /// previous `emit_amount_expression` dropped leading unary
3529 /// signs and parens, flipping the sign on
3530 /// `2024-01-15 balance Assets:A
3531 /// -1.00 USD` to `1.00 USD` — silent data corruption (a debit
3532 /// asserted as a credit). Byte-exact pins on every shape.
3533 #[test]
3534 fn balance_price_preserve_leading_unary_and_parens() {
3535 // Bare leading minus on balance.
3536 let src = "2024-01-15 balance Assets:A -1.00 USD\n";
3537 assert_eq!(
3538 format_source(src),
3539 "2024-01-15 balance Assets:A -1.00 USD\n"
3540 );
3541
3542 // Bare leading minus on price (sign flip would change
3543 // every quote on the user's commodity).
3544 let src = "2024-01-15 price USD -1.00 EUR\n";
3545 assert_eq!(format_source(src), "2024-01-15 price USD -1.00 EUR\n");
3546
3547 // Leading parenthesized expression. The previous code
3548 // dropped the `(`, which made the trailing `)` unbalanced
3549 // AND made the first-CURRENCY scan find the wrong token.
3550 let src = "2024-01-15 balance Assets:A (1 + 2) USD\n";
3551 assert_eq!(
3552 format_source(src),
3553 "2024-01-15 balance Assets:A (1 + 2) USD\n"
3554 );
3555
3556 // Leading minus on a parenthesized arithmetic expression.
3557 let src = "2024-01-15 balance Assets:A -(1 + 2) USD\n";
3558 assert_eq!(
3559 format_source(src),
3560 "2024-01-15 balance Assets:A -(1 + 2) USD\n"
3561 );
3562 }
3563
3564 /// Regression for Copilot #1 inline review on PR #1284:
3565 /// `collect_trailing_comment` used `?` on the header-terminating
3566 /// NEWLINE, silently dropping same-line trailing comments at
3567 /// EOF when the file had no final newline. The canonical
3568 /// formatter restores the trailing newline, but the dropped
3569 /// comment was already gone — a real-world case for editors
3570 /// that don't insert a trailing newline on save.
3571 #[test]
3572 fn trailing_comment_preserved_at_eof_without_newline() {
3573 let src = "2024-01-15 open Assets:A ; trailing";
3574 assert_eq!(format_source(src), "2024-01-15 open Assets:A ; trailing\n");
3575 }
3576
3577 #[test]
3578 fn try_format_source_returns_ok_on_clean_input() {
3579 let src = "2024-01-15 open Assets:Cash\n";
3580 let out = super::try_format_source(src).expect("clean input should format");
3581 assert_eq!(out, super::format_source(src));
3582 }
3583
3584 #[test]
3585 fn try_format_source_returns_err_on_parse_error() {
3586 // Bare `unparsable` text triggers parser errors. The
3587 // helper must surface them instead of silently emitting
3588 // canonical text around a broken file.
3589 let src = "this is not a directive at all\n";
3590 let err = super::try_format_source(src).expect_err("garbage should error");
3591 assert!(!err.is_empty(), "errors must not be empty");
3592 }
3593
3594 #[test]
3595 fn cr_outside_strings_present_distinguishes_in_string_cr() {
3596 // CR inside a multi-line string literal must NOT count —
3597 // the formatter wouldn't fold it.
3598 let in_string_only = "2024-01-15 note Assets:Bank \"line1\r\nline2\"\n";
3599 assert!(!super::cr_outside_strings_present(in_string_only));
3600
3601 // CR outside any string literal (CRLF line terminator)
3602 // counts — that's what crlf_to_lf_outside_strings would
3603 // fold.
3604 let crlf_terminator = "2024-01-01 open Assets:A\r\n";
3605 assert!(super::cr_outside_strings_present(crlf_terminator));
3606
3607 // No `\r` at all — fast path.
3608 let lf_only = "2024-01-01 open Assets:A\n";
3609 assert!(!super::cr_outside_strings_present(lf_only));
3610
3611 // CR inside a `;` comment is outside any string and counts.
3612 // (Beancount lexer's comment regex excludes the newline, so
3613 // the comment region ends at `\r`; either way, the predicate
3614 // says "yes, the formatter would fold this byte".)
3615 let comment_with_cr = "; comment with \"quote\rstuff\n";
3616 assert!(super::cr_outside_strings_present(comment_with_cr));
3617 }
3618
3619 #[test]
3620 fn canonicalize_directives_directive_count_mismatch_is_reported() {
3621 // Drive the new DirectiveCountMismatch error variant.
3622 // Today's Directive variants all round-trip with matching
3623 // counts, so this test pins the Display rendering of the
3624 // variant (the user-facing message). The positive-count-
3625 // match path is exercised by
3626 // `canonicalize_directives_positive_count_check` below.
3627 let err = super::CanonicalizeError::DirectiveCountMismatch {
3628 input: 3,
3629 reparsed: 2,
3630 };
3631 let msg = format!("{err}");
3632 assert!(msg.contains("3 directive(s)"), "got: {msg}");
3633 assert!(msg.contains("2 survived"), "got: {msg}");
3634 assert!(msg.contains("rledger bug"), "got: {msg}");
3635 }
3636
3637 /// Single source of truth for the variant → fixture mapping
3638 /// used by both the compile-time exhaustiveness check
3639 /// ([`_directive_variant_fixture_coverage`]) and the runtime
3640 /// semantic check
3641 /// ([`directive_variant_fixture_names_resolve_in_matrix`]).
3642 ///
3643 /// Each tuple is `(VariantName, fixture_name)`. The
3644 /// `VariantName` half is the string the runtime check uses to
3645 /// confirm the fixture parses to that variant; the
3646 /// `fixture_name` half is what the compile-time match returns
3647 /// for the same variant. A future `Directive::Hedge` variant
3648 /// only ships with canonical-form coverage if BOTH a new
3649 /// arm is added to the compile-time match AND a row here
3650 /// names a fixture that actually produces a `Hedge` on parse.
3651 const DIRECTIVE_VARIANT_FIXTURE_MAP: &[(&str, &str)] = &[
3652 ("Transaction", "transaction_with_cost_and_price"),
3653 ("Balance", "balance_with_arithmetic_and_tolerance"),
3654 ("Open", "only_directive"),
3655 ("Close", "close_directive"),
3656 ("Commodity", "commodity_directive"),
3657 ("Pad", "pad_directive"),
3658 ("Event", "event_directive"),
3659 ("Query", "query_directive"),
3660 ("Note", "note_directive"),
3661 ("Document", "document_directive"),
3662 ("Price", "price_with_thousands_separator"),
3663 ("Custom", "custom_with_date_value"),
3664 ];
3665
3666 /// Lookup helper: variant tag string → fixture name. Used by
3667 /// the compile-time match below. Panics if the variant is not
3668 /// in the map (which would be an internal-consistency bug, not
3669 /// a user-facing case).
3670 const fn fixture_for_variant(tag: &str) -> &'static str {
3671 let mut i = 0;
3672 while i < DIRECTIVE_VARIANT_FIXTURE_MAP.len() {
3673 let (v, f) = DIRECTIVE_VARIANT_FIXTURE_MAP[i];
3674 // const_str equality: compare byte slices.
3675 let v_bytes = v.as_bytes();
3676 let t_bytes = tag.as_bytes();
3677 if v_bytes.len() == t_bytes.len() {
3678 let mut k = 0;
3679 let mut eq = true;
3680 while k < v_bytes.len() {
3681 if v_bytes[k] != t_bytes[k] {
3682 eq = false;
3683 break;
3684 }
3685 k += 1;
3686 }
3687 if eq {
3688 return f;
3689 }
3690 }
3691 i += 1;
3692 }
3693 panic!("DIRECTIVE_VARIANT_FIXTURE_MAP missing entry for variant tag");
3694 }
3695
3696 /// Compile-time check that every `rustledger_core::Directive`
3697 /// variant has at least one source-text fixture in
3698 /// [`IDEMPOTENCE_MATRIX`] exercising its emit path. The
3699 /// function NEVER runs — its body is an exhaustive `match` over
3700 /// the `Directive` enum. Adding a new variant breaks
3701 /// compilation unless the author adds a match arm referencing
3702 /// `fixture_for_variant("NewVariantName")`, AND adds a row to
3703 /// [`DIRECTIVE_VARIANT_FIXTURE_MAP`] naming the fixture. The
3704 /// runtime test then confirms the fixture parses to a directive
3705 /// of that variant.
3706 ///
3707 /// The non-`Directive` pragma-style directives (Pushtag,
3708 /// Poptag, Pushmeta, Popmeta, options, includes, plugins)
3709 /// don't appear in the typed `Directive` enum; they're covered
3710 /// by separate fixtures whose names map directly into
3711 /// `IDEMPOTENCE_MATRIX`.
3712 #[allow(dead_code)]
3713 fn _directive_variant_fixture_coverage(d: &rustledger_core::Directive) -> &'static str {
3714 match d {
3715 rustledger_core::Directive::Transaction(_) => fixture_for_variant("Transaction"),
3716 rustledger_core::Directive::Balance(_) => fixture_for_variant("Balance"),
3717 rustledger_core::Directive::Open(_) => fixture_for_variant("Open"),
3718 rustledger_core::Directive::Close(_) => fixture_for_variant("Close"),
3719 rustledger_core::Directive::Commodity(_) => fixture_for_variant("Commodity"),
3720 rustledger_core::Directive::Pad(_) => fixture_for_variant("Pad"),
3721 rustledger_core::Directive::Event(_) => fixture_for_variant("Event"),
3722 rustledger_core::Directive::Query(_) => fixture_for_variant("Query"),
3723 rustledger_core::Directive::Note(_) => fixture_for_variant("Note"),
3724 rustledger_core::Directive::Document(_) => fixture_for_variant("Document"),
3725 rustledger_core::Directive::Price(_) => fixture_for_variant("Price"),
3726 rustledger_core::Directive::Custom(_) => fixture_for_variant("Custom"),
3727 }
3728 }
3729
3730 #[test]
3731 fn directive_variant_fixture_names_resolve_in_matrix() {
3732 // Runtime mirror of the compile-time match above:
3733 //
3734 // (1) every fixture name appears in IDEMPOTENCE_MATRIX;
3735 // (2) parsing that fixture produces AT LEAST one
3736 // directive of the variant the map row names.
3737 //
3738 // Without check (2) the compile-time match is satisfied by
3739 // any fixture-name string — a future contributor adding
3740 // a row `("Hedge", "only_comment")` would compile, the
3741 // lookup would resolve, and Hedge would ship with zero
3742 // canonical-form coverage. The semantic check rejects that
3743 // by parsing the named fixture and inspecting the
3744 // directive variant.
3745 use rustledger_core::Directive;
3746 fn matches_variant(d: &Directive, expected: &str) -> bool {
3747 matches!(
3748 (d, expected),
3749 (Directive::Transaction(_), "Transaction")
3750 | (Directive::Balance(_), "Balance")
3751 | (Directive::Open(_), "Open")
3752 | (Directive::Close(_), "Close")
3753 | (Directive::Commodity(_), "Commodity")
3754 | (Directive::Pad(_), "Pad")
3755 | (Directive::Event(_), "Event")
3756 | (Directive::Query(_), "Query")
3757 | (Directive::Note(_), "Note")
3758 | (Directive::Document(_), "Document")
3759 | (Directive::Price(_), "Price")
3760 | (Directive::Custom(_), "Custom")
3761 )
3762 }
3763 for (variant, name) in DIRECTIVE_VARIANT_FIXTURE_MAP {
3764 let (_, src) = IDEMPOTENCE_MATRIX
3765 .iter()
3766 .find(|(n, _)| *n == *name)
3767 .unwrap_or_else(|| {
3768 panic!(
3769 "fixture `{name}` is named by \
3770 DIRECTIVE_VARIANT_FIXTURE_MAP but missing from \
3771 IDEMPOTENCE_MATRIX"
3772 )
3773 });
3774 let parsed = crate::parse(src);
3775 let found = parsed
3776 .directives
3777 .iter()
3778 .any(|s| matches_variant(&s.value, variant));
3779 assert!(
3780 found,
3781 "fixture `{name}` is mapped to `Directive::{variant}` by \
3782 DIRECTIVE_VARIANT_FIXTURE_MAP, but parsing it produced \
3783 no directive of that variant (got {:?}). This silently \
3784 leaves the variant without canonical-form coverage.",
3785 parsed
3786 .directives
3787 .iter()
3788 .map(|s| std::mem::discriminant(&s.value))
3789 .collect::<Vec<_>>()
3790 );
3791 }
3792 }
3793
3794 /// Coverage-mirror check: every `matrix_name` half of the
3795 /// `MIRROR_PAIRS` table in the file-pair integration test
3796 /// (`crates/rustledger-parser/tests/format_compat.rs`) must
3797 /// exist as an entry in [`IDEMPOTENCE_MATRIX`]. The
3798 /// integration test asserts the symmetric half (every
3799 /// `file_pair_name` exists as a directory under `cases/`).
3800 /// Together the two checks guarantee that retiring a
3801 /// bug-class fixture from EITHER side forces an edit to
3802 /// `MIRROR_PAIRS` - which surfaces in review and prevents
3803 /// the silent one-sided drop the README's "two audience" split
3804 /// design would otherwise admit.
3805 ///
3806 /// Hand-maintained copy of the matrix half of the table.
3807 /// Editing `MIRROR_PAIRS` in the integration test requires
3808 /// editing this list too; the test below fires otherwise.
3809 #[test]
3810 fn idempotence_matrix_mirrors_format_compat_pairs() {
3811 const MIRROR_PAIRS_MATRIX_HALF: &[&str] = &[
3812 "balance_leading_unary_minus",
3813 "balance_leading_parenthesized_expression",
3814 "price_leading_unary_minus",
3815 "cost_spec_with_negative",
3816 "cost_spec_with_comma_and_date",
3817 "transaction_with_per_unit_plus_total_cost",
3818 "metadata_unary_minus",
3819 "metadata_arithmetic",
3820 "non_latin_account_name",
3821 "posting_with_trailing_comment",
3822 "multiline_note_string",
3823 "comment_containing_quote",
3824 "transaction_with_tags_and_links",
3825 "custom_with_date_value",
3826 "options_and_includes",
3827 "balance_assertion_with_meta",
3828 "crlf_input",
3829 ];
3830 let matrix_names: std::collections::BTreeSet<&str> =
3831 IDEMPOTENCE_MATRIX.iter().map(|(name, _)| *name).collect();
3832 let missing: Vec<&&str> = MIRROR_PAIRS_MATRIX_HALF
3833 .iter()
3834 .filter(|name| !matrix_names.contains(*name))
3835 .collect();
3836 assert!(
3837 missing.is_empty(),
3838 "IDEMPOTENCE_MATRIX is missing the matrix-half of MIRROR_PAIRS: {missing:?}. \
3839 Either re-add the entry to IDEMPOTENCE_MATRIX, or edit MIRROR_PAIRS in \
3840 tests/format_compat.rs to retire the pair from BOTH sides.",
3841 );
3842 }
3843
3844 /// Property test: the `SourceState` classification used by the
3845 /// line-ending helpers must agree with the lexer's
3846 /// classification on every byte of a corpus of fixtures.
3847 ///
3848 /// Concretely: for every byte offset in every fixture, the
3849 /// state machine's `InString` periods MUST line up with the
3850 /// lexer's STRING token spans, and its `InComment` periods MUST
3851 /// line up with the union of COMMENT / SHEBANG /
3852 /// `EMACS_DIRECTIVE` token spans. A divergence — e.g. the lexer
3853 /// gains a new comment lexeme that the state machine treats as
3854 /// code — fails this test instead of silently mutating user
3855 /// bytes inside the new lexeme on a line-ending round-trip.
3856 #[test]
3857 fn source_state_classification_agrees_with_lexer() {
3858 use crate::logos_lexer::{Token, tokenize_lossless};
3859
3860 for (name, src) in IDEMPOTENCE_MATRIX {
3861 // Run the lexer to get authoritative classification of
3862 // each token. Build a per-byte map of expected state.
3863 let tokens = tokenize_lossless(src);
3864 let mut expected = vec![SourceState::Code; src.len()];
3865 for (token, span) in &tokens {
3866 let classify = match token {
3867 Token::String(_) => Some(SourceState::InString),
3868 Token::Comment(_) | Token::Shebang(_) | Token::EmacsDirective(_) => {
3869 Some(SourceState::InComment)
3870 }
3871 _ => None,
3872 };
3873 if let Some(state) = classify {
3874 for byte in &mut expected[span.start..span.end] {
3875 *byte = state;
3876 }
3877 }
3878 }
3879
3880 // Run the state-machine classifier and compare per
3881 // byte. We skip ONLY the exact bytes where a
3882 // transition fires — the lexer includes those bytes
3883 // inside the resulting token while the state machine
3884 // tags them with the PRE-transition state (the
3885 // 'opener' is still Code, the closing LF is still
3886 // InComment). Tracking the transition indices
3887 // explicitly (rather than skipping every `"`/`;`/`%`
3888 // / newline byte) means a state-machine bug at any
3889 // non-transition `"`/`;`/`%` byte — e.g. inside a
3890 // comment or string — surfaces as a real failure
3891 // instead of being silently masked.
3892 let (actual, transitions) = classify_source_bytes_with_transitions(src);
3893
3894 for (i, (&want, &got)) in expected.iter().zip(actual.iter()).enumerate() {
3895 if transitions.contains(&i) {
3896 continue;
3897 }
3898 assert_eq!(
3899 want,
3900 got,
3901 "state-machine / lexer disagreement on fixture `{name}` \
3902 at byte {i} ({:?}): lexer said {want:?}, state machine said {got:?}",
3903 src.as_bytes()[i] as char
3904 );
3905 }
3906 }
3907 }
3908
3909 /// Walk `s` through the same state-machine logic the
3910 /// line-ending helpers use, returning a per-byte classification
3911 /// AND the set of byte indices where a state transition
3912 /// fired. The transition indices are the ONLY bytes where the
3913 /// state machine and the lexer can legitimately disagree (the
3914 /// off-by-one at opener / closer / terminator); callers
3915 /// comparing against the lexer should skip exactly those
3916 /// indices and assert agreement everywhere else.
3917 fn classify_source_bytes_with_transitions(
3918 s: &str,
3919 ) -> (Vec<SourceState>, std::collections::HashSet<usize>) {
3920 let (body, bom_len) = match s.strip_prefix('\u{FEFF}') {
3921 Some(rest) => (rest, '\u{FEFF}'.len_utf8()),
3922 None => (s, 0),
3923 };
3924 let mut out: Vec<SourceState> = vec![SourceState::Code; s.len()];
3925 let mut transitions = std::collections::HashSet::new();
3926 let mut chars = body.char_indices().peekable();
3927 let mut state = SourceState::Code;
3928 let mut prev_was_backslash = false;
3929 while let Some((rel_i, ch)) = chars.next() {
3930 let i = bom_len + rel_i;
3931 let peek = chars.peek().map(|&(_, c)| c);
3932 // Classify THIS byte under the state BEFORE advancing.
3933 for byte in &mut out[i..i + ch.len_utf8()] {
3934 *byte = state;
3935 }
3936 let prev_state = state;
3937 let next_state = advance_source_state(ch, peek, state, &mut prev_was_backslash);
3938 // Record only OPENING transitions and the comment-
3939 // closing newline, where the state machine and lexer
3940 // legitimately disagree on this single byte:
3941 // - Code → InString : opening `"` is Code-side but
3942 // the lexer puts it inside the STRING token.
3943 // - Code → InComment: opening `;` / `%` / `#!` /
3944 // `#+` is Code-side but the lexer puts it inside
3945 // the COMMENT / SHEBANG / EMACS_DIRECTIVE token.
3946 // - InComment → Code: the `\n` ending the comment is
3947 // classified InComment by the state machine but
3948 // sits OUTSIDE the comment token (the lexer's
3949 // `[^\n\r]*` excludes it).
3950 // The InString → Code transition (closing `"`) is NOT
3951 // a disagreement: the state machine still tags that
3952 // byte as InString (pre-transition), and the lexer
3953 // includes the closing `"` inside the STRING token.
3954 // Skipping it would silently mask a real bug.
3955 if next_state != state {
3956 let opening = matches!(prev_state, SourceState::Code)
3957 && matches!(next_state, SourceState::InString | SourceState::InComment);
3958 let comment_close = matches!(prev_state, SourceState::InComment)
3959 && matches!(next_state, SourceState::Code);
3960 if opening || comment_close {
3961 transitions.insert(i);
3962 // For a `#!` or `#+` opener the lexer's token
3963 // span begins at the `#`, so the second byte
3964 // (`!` / `+`) is also a "before the lexer's
3965 // token start" byte the state machine tags as
3966 // Code. Record it too.
3967 if matches!(ch, '#') && matches!(peek, Some('!' | '+')) {
3968 transitions.insert(i + 1);
3969 }
3970 }
3971 }
3972 state = next_state;
3973 }
3974 (out, transitions)
3975 }
3976
3977 #[test]
3978 fn canonicalize_directives_positive_count_check() {
3979 // Pin the success path of the count check: pass a real
3980 // multi-directive input through canonicalize_directives and
3981 // assert that the output round-trips to the SAME directive
3982 // count. Without this test, a regression that always
3983 // returned CountMismatch (e.g. `==` instead of `!=` on the
3984 // count comparison) would be caught only on production
3985 // calls, not in CI. Together with the Display test above,
3986 // this gives coverage of both arms of the count guard.
3987 use rustledger_core::format::FormatConfig;
3988 let cfg = FormatConfig::default();
3989 let src = "2024-01-01 open Assets:Cash\n2024-01-02 open Assets:Bank\n2024-01-03 close Assets:Cash\n";
3990 let parsed = crate::parse(src);
3991 assert_eq!(
3992 parsed.directives.len(),
3993 3,
3994 "fixture must parse to 3 directives"
3995 );
3996 let dirs: Vec<&rustledger_core::Directive> =
3997 parsed.directives.iter().map(|s| &s.value).collect();
3998 let formatted = super::canonicalize_directives(dirs.iter().copied(), &cfg)
3999 .expect("canonicalize_directives should succeed on this input");
4000 let reparsed = crate::parse(&formatted);
4001 assert_eq!(
4002 reparsed.directives.len(),
4003 3,
4004 "count check accepted but round-trip dropped directives: {formatted}"
4005 );
4006 }
4007
4008 // ---- format_node_range -----------------------------------------
4009
4010 /// Parse `source` via the same pipeline `format_source` uses
4011 /// so the resulting `SyntaxNode`'s `TextRange`s are in the
4012 /// same byte frame `format_node_range`'s `range` argument
4013 /// is expected to use (post-BOM-strip, post-CRLF-to-LF).
4014 /// Returns the syntax node + the normalized source text so
4015 /// tests can compute byte offsets by `.find()`.
4016 fn parse_for_range(source: &str) -> (crate::SyntaxNode, String) {
4017 let (stripped, _bom) = crate::bom::strip_leading(source);
4018 let normalized = crlf_to_lf_outside_strings(stripped).to_string();
4019 let sf = SourceFile::parse(&normalized);
4020 (sf.syntax().clone(), normalized)
4021 }
4022
4023 fn ts(n: usize) -> rowan::TextSize {
4024 rowan::TextSize::try_from(n).expect("offset fits TextSize")
4025 }
4026
4027 /// For any selection covering the whole file, the result text
4028 /// equals `format_node(node)`. Pins the round-trip invariant
4029 /// the design rests on: range formatting is the whole-file
4030 /// formatter restricted to a range, not a parallel canonical
4031 /// form.
4032 #[test]
4033 fn format_node_range_full_range_matches_format_node() {
4034 let source = "\
40352024-01-01 open Assets:Bank USD
40362024-01-15 * \"Coffee\"
4037 Assets:Bank -5.00 USD
4038 Expenses:Food
40392024-01-31 close Assets:Bank
4040";
4041 let (node, src) = parse_for_range(source);
4042 let full = rowan::TextRange::new(ts(0), ts(src.len()));
4043 let (snap, formatted) =
4044 format_node_range(&node, full).expect("full range must include all directives");
4045 assert_eq!(
4046 snap,
4047 rowan::TextRange::new(ts(0), ts(src.len())),
4048 "snap range should be the whole file's textual span"
4049 );
4050 assert_eq!(formatted, format_node(&node));
4051 }
4052
4053 /// A selection that hits only inter-directive whitespace
4054 /// (no directive intersected, no top-level comment
4055 /// intersected) returns `None` — the caller surfaces this
4056 /// as an empty `Vec<TextEdit>`.
4057 #[test]
4058 fn format_node_range_trivia_only_returns_none() {
4059 // The phase-2.0 Directive-Terminator Rule puts every
4060 // inter-directive blank line on the next directive's
4061 // leading trivia, so any byte index between two
4062 // directives is INSIDE the next directive's text_range.
4063 // The only way to reach a truly trivia-only selection
4064 // is a source that has no directives at all (file is
4065 // pure whitespace). That is the case worth pinning —
4066 // the LSP handler maps `None` to an empty
4067 // `Vec<TextEdit>`, which is exactly the right "nothing
4068 // to format" response for a whitespace-only buffer.
4069 let (empty, _) = parse_for_range("\n\n\n");
4070 let sel = rowan::TextRange::new(ts(0), ts(3));
4071 assert!(format_node_range(&empty, sel).is_none());
4072 }
4073
4074 /// Selecting only the first directive's content (the
4075 /// transaction) snaps to that directive and the second
4076 /// directive is left out of both the snap and the output.
4077 #[test]
4078 fn format_node_range_single_directive() {
4079 let source = "\
40802024-01-01 open Assets:Bank USD
40812024-01-15 * \"Coffee\"
4082 Assets:Bank -5.00 USD
4083 Expenses:Food
4084";
4085 let (node, src) = parse_for_range(source);
4086 // Position the selection inside the `open` line. Use
4087 // the byte offset of the word `open` so the test is
4088 // robust to whitespace changes in the fixture.
4089 let open_byte = src.find("open").expect("fixture contains 'open'");
4090 let sel = rowan::TextRange::new(ts(open_byte), ts(open_byte + "open".len()));
4091 let (snap, formatted) = format_node_range(&node, sel).expect("intersects 1 directive");
4092
4093 // Snap should start at byte 0 (the open directive's
4094 // text_range starts at the file's start) and end at
4095 // the open directive's terminating newline.
4096 let open_end = src.find('\n').expect("first directive has terminator") + 1;
4097 assert_eq!(snap.start(), ts(0));
4098 assert_eq!(snap.end(), ts(open_end));
4099 // Output is exactly the open directive's canonical form
4100 // + its `\n` terminator. No second-directive content.
4101 assert_eq!(formatted, "2024-01-01 open Assets:Bank USD\n");
4102 }
4103
4104 /// Multi-directive selection: the author's inter-directive
4105 /// blank lines are preserved (a blank stays a blank; grouped
4106 /// stays grouped), matching whole-file formatting (#1325).
4107 #[test]
4108 fn format_node_range_multi_directive_preserves_blank_lines() {
4109 // #1325: range formatting preserves the author's inter-directive
4110 // blank lines, identically to whole-file formatting. A source
4111 // with a blank between the two directives keeps it...
4112 let spaced = "\
41132024-01-01 open Assets:Bank USD
4114
41152024-01-31 close Assets:Bank
4116";
4117 let (node, src) = parse_for_range(spaced);
4118 let sel = rowan::TextRange::new(ts(0), ts(src.len()));
4119 let (snap, formatted) = format_node_range(&node, sel).expect("intersects 2 directives");
4120 assert_eq!(snap, rowan::TextRange::new(ts(0), ts(src.len())));
4121 assert_eq!(formatted, spaced, "the blank separator must be preserved");
4122
4123 // ...and a grouped source (no blank) stays grouped, rather than
4124 // having a separator inserted.
4125 let grouped = "\
41262024-01-01 open Assets:Bank USD
41272024-01-31 close Assets:Bank
4128";
4129 let (node2, src2) = parse_for_range(grouped);
4130 let sel2 = rowan::TextRange::new(ts(0), ts(src2.len()));
4131 let (_, formatted2) = format_node_range(&node2, sel2).expect("intersects 2 directives");
4132 assert_eq!(formatted2, grouped, "grouped directives must stay grouped");
4133 }
4134
4135 #[test]
4136 fn format_node_range_first_directive_in_snap_keeps_leading_blank() {
4137 // Regression (Copilot review of #1325): when the selection
4138 // covers only the SECOND directive, its predecessor sits outside
4139 // the snap, but the blank line between them is the second
4140 // directive's leading trivia and therefore inside the snapped
4141 // range. Range formatting must re-emit it, not silently delete
4142 // the blank line above the selection.
4143 let source = "2024-01-01 open Assets:Bank USD\n\n2024-01-31 close Assets:Bank\n";
4144 let (node, src) = parse_for_range(source);
4145 // Cursor inside the second (close) directive only.
4146 let close_byte = src.find("close").expect("fixture has 'close'");
4147 let cursor = rowan::TextRange::new(ts(close_byte), ts(close_byte));
4148 let (snap, formatted) = format_node_range(&node, cursor).expect("intersects close");
4149 // The leading blank is preserved in the replacement text...
4150 assert_eq!(formatted, "\n2024-01-31 close Assets:Bank\n");
4151 // ...so applying the edit leaves the blank line intact.
4152 let mut result = src;
4153 result.replace_range(
4154 usize::from(snap.start())..usize::from(snap.end()),
4155 &formatted,
4156 );
4157 assert_eq!(
4158 result, source,
4159 "range-formatting the second directive must not delete the blank above it"
4160 );
4161 }
4162
4163 /// Cursor-only (zero-width) selection inside a directive
4164 /// snaps to that directive. The cursor convention: inside
4165 /// or at the directive's start byte counts as inside;
4166 /// boundary at the directive's end belongs to the next
4167 /// child.
4168 #[test]
4169 fn format_node_range_cursor_inside_directive() {
4170 let source = "\
41712024-01-01 open Assets:Bank USD
41722024-01-31 close Assets:Bank
4173";
4174 let (node, src) = parse_for_range(source);
4175 // Cursor on the `c` of `close` (line 2 of the fixture).
4176 let close_byte = src.find("close").expect("fixture has 'close'");
4177 let cursor = rowan::TextRange::new(ts(close_byte), ts(close_byte));
4178 let (snap, formatted) = format_node_range(&node, cursor).expect("intersects close");
4179 // Snap starts at the close directive's text_range start.
4180 // Per Directive-Terminator Rule the second directive
4181 // OWNS the leading inter-directive trivia — so snap
4182 // starts immediately after the first directive's
4183 // terminator newline.
4184 let close_dir_start = src
4185 .find("\n2024-01-31")
4186 .map(|n| n + 1)
4187 .expect("close directive starts on its own line");
4188 assert_eq!(snap.start(), ts(close_dir_start));
4189 assert_eq!(snap.end(), ts(src.len()));
4190 assert_eq!(formatted, "2024-01-31 close Assets:Bank\n");
4191 }
4192
4193 /// Cursor exactly at the start of a directive snaps to
4194 /// that directive (start-boundary inclusion rule).
4195 #[test]
4196 fn format_node_range_cursor_at_directive_start_includes_directive() {
4197 let source = "\
41982024-01-01 open Assets:Bank USD
41992024-01-31 close Assets:Bank
4200";
4201 let (node, _src) = parse_for_range(source);
4202 // Cursor at byte 0 = start of first directive.
4203 let cursor = rowan::TextRange::new(ts(0), ts(0));
4204 let (_snap, formatted) = format_node_range(&node, cursor).expect("intersects open");
4205 // Only the OPEN should be formatted, not the close.
4206 assert!(formatted.starts_with("2024-01-01 open"));
4207 assert!(!formatted.contains("close"));
4208 }
4209
4210 /// Selection containing a top-level standalone comment
4211 /// (file-leading or between-directive comment that the
4212 /// trivia attachment policy puts on `SOURCE_FILE`) includes
4213 /// the comment in both the snap and the output.
4214 #[test]
4215 fn format_node_range_includes_top_level_comments() {
4216 let source = "\
4217; header
42182024-01-01 open Assets:Bank USD
4219";
4220 let (node, src) = parse_for_range(source);
4221 let sel = rowan::TextRange::new(ts(0), ts(src.len()));
4222 let (snap, formatted) = format_node_range(&node, sel).expect("intersects both");
4223 assert_eq!(snap, rowan::TextRange::new(ts(0), ts(src.len())));
4224 // Header comment, then directive on the next line. No
4225 // canonical blank between a file-level comment group
4226 // and a directive (matches format_node's policy).
4227 assert_eq!(formatted, "; header\n2024-01-01 open Assets:Bank USD\n");
4228 }
4229
4230 /// A selection that lands entirely inside an `ERROR_NODE`
4231 /// (no Directive intersected) returns None. Matches
4232 /// `format_node`'s policy of skipping `ERROR_NODE` children
4233 /// at the top level.
4234 #[test]
4235 fn format_node_range_error_node_only_returns_none() {
4236 // `}}}` at top level isn't a directive — the parser
4237 // wraps it in an ERROR_NODE.
4238 let source = "}}}\n";
4239 let (node, src) = parse_for_range(source);
4240 let sel = rowan::TextRange::new(ts(0), ts(src.len()));
4241 assert!(format_node_range(&node, sel).is_none());
4242 }
4243
4244 /// Past-EOF selection still works: the snap clamps to the
4245 /// last child that intersects within the file. (rowan's
4246 /// `TextRange` is bounded by usize but `format_node_range`
4247 /// doesn't validate `range` against file length — bytes past
4248 /// EOF can never intersect any child, so the rule is
4249 /// degenerate but well-defined.)
4250 #[test]
4251 fn format_node_range_past_eof_clamps() {
4252 let source = "2024-01-01 open Assets:Bank USD\n";
4253 let (node, src) = parse_for_range(source);
4254 let past_eof = rowan::TextRange::new(ts(src.len()), ts(src.len() + 1000));
4255 // The cursor / range is past EOF — no child intersects.
4256 assert!(format_node_range(&node, past_eof).is_none());
4257 // But a range that STRADDLES EOF still snaps to the
4258 // last intersecting directive.
4259 let straddle = rowan::TextRange::new(ts(0), ts(src.len() + 1000));
4260 let (snap, formatted) = format_node_range(&node, straddle).expect("intersects open");
4261 assert_eq!(snap, rowan::TextRange::new(ts(0), ts(src.len())));
4262 assert_eq!(formatted, "2024-01-01 open Assets:Bank USD\n");
4263 }
4264
4265 /// A cursor inside a posting (sub-directive position) snaps
4266 /// up to the enclosing transaction — the design pins
4267 /// "round to top-level directive boundaries, no finer."
4268 #[test]
4269 fn format_node_range_cursor_in_posting_snaps_to_transaction() {
4270 let source = "\
42712024-01-15 * \"Coffee\"
4272 Assets:Bank -5.00 USD
4273 Expenses:Food
4274";
4275 let (node, src) = parse_for_range(source);
4276 // Position the cursor on the `B` of `Bank` in the
4277 // first posting.
4278 let bank_byte = src.find("Bank").expect("fixture has Bank");
4279 let cursor = rowan::TextRange::new(ts(bank_byte), ts(bank_byte));
4280 let (snap, _formatted) = format_node_range(&node, cursor).expect("intersects transaction");
4281 // Snap covers the WHOLE transaction (start of file
4282 // through final posting's newline).
4283 assert_eq!(snap.start(), ts(0));
4284 assert_eq!(snap.end(), ts(src.len()));
4285 }
4286
4287 /// Selection straddling an `ERROR_NODE` between two valid
4288 /// directives: snap range would cover the union (including
4289 /// `ERROR_NODE` bytes), so `format_node_range` returns
4290 /// `None` instead of silently deleting the error content.
4291 ///
4292 /// This is the deliberate divergence from `format_node`'s
4293 /// whole-file policy. `format_source(broken_source)` does
4294 /// drop `ERROR_NODE` content — but that path's callers
4295 /// (`rledger format` CLI, FFI `format.entry`) opt into
4296 /// content loss by invoking the canonical-form pipeline. The
4297 /// per-handler LSP `textDocument/rangeFormatting` path has no
4298 /// such opt-in, so it refuses to delete user content the
4299 /// parser couldn't classify. See the function's rustdoc for
4300 /// the per-handler asymmetry rationale.
4301 #[test]
4302 fn format_node_range_bails_when_snap_covers_error_node() {
4303 let source = "\
43042024-01-01 open Assets:Bank USD
4305}}}garbage{{{
43062024-01-31 close Assets:Bank
4307";
4308 let (node, src) = parse_for_range(source);
4309 let sel = rowan::TextRange::new(ts(0), ts(src.len()));
4310 assert!(
4311 format_node_range(&node, sel).is_none(),
4312 "selection covering both directives + ERROR_NODE between them must bail \
4313 to avoid silently deleting the garbage line — got Some output",
4314 );
4315 }
4316
4317 /// Selection that intersects only the FIRST valid directive
4318 /// in a broken file (no `ERROR_NODE` byte in the snap range)
4319 /// still formats. Pins that the `ERROR_NODE` bail is precisely
4320 /// scoped to the snap range, not to "the file has any
4321 /// `ERROR_NODE` at all".
4322 #[test]
4323 fn format_node_range_formats_directive_when_snap_does_not_cover_error_node() {
4324 let source = "\
43252024-01-01 open Assets:Bank USD
4326}}}garbage{{{
43272024-01-31 close Assets:Bank
4328";
4329 let (node, src) = parse_for_range(source);
4330 // Selection covers ONLY the open directive (first line +
4331 // its terminator). The ERROR_NODE on line 1 sits at byte
4332 // offset == open_end (length of first line including \n)
4333 // onward, OUTSIDE the snap range.
4334 let open_end = src.find('\n').expect("first directive has newline") + 1;
4335 let sel = rowan::TextRange::new(ts(0), ts(open_end));
4336 let (snap, formatted) =
4337 format_node_range(&node, sel).expect("selection covers only the open");
4338 assert_eq!(snap.start(), ts(0));
4339 assert_eq!(snap.end(), ts(open_end));
4340 assert_eq!(formatted, "2024-01-01 open Assets:Bank USD\n");
4341 }
4342
4343 /// `format_node_with_alignment(node, compute_alignment(sf))` is
4344 /// byte-identical to `format_node(node)`. Pins the cache
4345 /// contract: passing the correct alignment is a pure
4346 /// optimization, NOT a behavior change.
4347 #[test]
4348 fn format_node_equals_format_node_with_alignment() {
4349 let fixtures: &[(&str, &str)] = &[
4350 ("empty", ""),
4351 ("open only", "2024-01-01 open Assets:Bank USD\n"),
4352 (
4353 "single txn",
4354 "\
43552024-01-15 * \"Coffee\"
4356 Assets:Bank -5.00 USD
4357 Expenses:Food
4358",
4359 ),
4360 (
4361 "multi txn varying widths",
4362 "\
43632024-01-15 * \"A\"
4364 Assets:Bank -5.00 USD
4365 Expenses:Food
43662024-02-15 * \"B\"
4367 Assets:Investment:Long:Path -123456.78 USD
4368 Expenses:Tax 100.00 USD
4369",
4370 ),
4371 ];
4372 for (label, source) in fixtures {
4373 let (node, _src) = parse_for_range(source);
4374 let source_file = SourceFile::cast(node.clone()).unwrap();
4375 let alignment = compute_alignment(&source_file);
4376 assert_eq!(
4377 format_node(&node),
4378 format_node_with_alignment(&node, alignment),
4379 "format_node_with_alignment must match format_node for {label}",
4380 );
4381 }
4382 }
4383
4384 /// `format_node_range_with_alignment(node, range, compute_alignment(sf))`
4385 /// matches `format_node_range(node, range)` byte-identically.
4386 /// Same shape as the previous test, for the range path.
4387 #[test]
4388 fn format_node_range_matches_format_node_range_with_alignment() {
4389 let source = "\
43902024-01-15 * \"A\"
4391 Assets:Bank -5.00 USD
4392 Expenses:Food
43932024-02-15 * \"B\"
4394 Assets:Investment:Long:Path -123456.78 USD
4395 Expenses:Tax 100.00 USD
4396";
4397 let (node, src) = parse_for_range(source);
4398 let source_file = SourceFile::cast(node.clone()).unwrap();
4399 let alignment = compute_alignment(&source_file);
4400 // Pin the equivalence on three ranges: whole file,
4401 // cursor inside the first transaction, cursor inside the
4402 // second.
4403 let sels = [
4404 rowan::TextRange::new(ts(0), ts(src.len())),
4405 rowan::TextRange::new(ts(0), ts(10)),
4406 rowan::TextRange::new(ts(src.len() - 10), ts(src.len())),
4407 ];
4408 for sel in sels {
4409 let uncached = format_node_range(&node, sel);
4410 let cached = format_node_range_with_alignment(&node, sel, alignment);
4411 assert_eq!(
4412 uncached, cached,
4413 "format_node_range_with_alignment must match \
4414 format_node_range for range {sel:?}",
4415 );
4416 }
4417 }
4418
4419 /// The cached `ParseResult::alignment` value matches what
4420 /// `format_node` would compute on the parsed tree. End-to-end
4421 /// regression: an LSP caller passing `parse_result.alignment`
4422 /// to `format_node_with_alignment` produces the same output
4423 /// as the bare `format_node` (uncached path).
4424 #[test]
4425 fn parse_result_alignment_drives_identical_format_output() {
4426 let source = "\
44272024-01-15 * \"Coffee\"
4428 Assets:Bank -5.00 USD
4429 Expenses:Food
4430";
4431 let parse_result = crate::parse(source);
4432 let node = parse_result.syntax_node();
4433 assert_eq!(
4434 format_node(&node),
4435 format_node_with_alignment(&node, parse_result.alignment),
4436 "ParseResult::alignment must drive identical format output to format_node",
4437 );
4438 }
4439
4440 /// `format_source_with_parsed(parse(s), s) == format_source(s)`
4441 /// byte-identical across a representative fixture set including
4442 /// CRLF and BOM-prefixed sources. This is the load-bearing
4443 /// equivalence for the LSP `format_document` / FFI
4444 /// `format.source` / WASM `ParsedLedger::format` migrations:
4445 /// they swap `format_source(source)` for
4446 /// `format_source_with_parsed(parse_result, source)` on the
4447 /// assumption that the two produce the same output. Without
4448 /// this test, a future converter or formatter change that
4449 /// silently diverged the two paths would break canonical-form
4450 /// expectations in production.
4451 #[test]
4452 fn format_source_with_parsed_matches_format_source() {
4453 let fixtures: &[(&str, &str)] = &[
4454 ("empty", ""),
4455 ("comment only", "; hello\n"),
4456 (
4457 "single transaction LF",
4458 "\
44592024-01-15 * \"Coffee\"
4460 Assets:Bank -5.00 USD
4461 Expenses:Food
4462",
4463 ),
4464 (
4465 "multi transaction varying widths LF",
4466 "\
44672024-01-15 * \"A\"
4468 Assets:Bank -5.00 USD
4469 Expenses:Food
44702024-02-15 * \"B\"
4471 Assets:Investment:Long:Path -123456.78 USD
4472 Expenses:Tax 100.00 USD
4473",
4474 ),
4475 (
4476 "arithmetic amounts LF",
4477 "\
44782024-01-15 * \"Split\"
4479 Assets:Bank -10.00 + 5.00 USD
4480 Expenses:Misc
4481",
4482 ),
4483 (
4484 "CRLF source",
4485 "2024-01-15 * \"Coffee\"\r\n Assets:Bank -5.00 USD\r\n Expenses:Food\r\n",
4486 ),
4487 ("BOM-prefixed", "\u{FEFF}2024-01-01 open Assets:Bank USD\n"),
4488 // BOM + CRLF — Windows-authored ledger with a BOM
4489 // prefix. `format_source` BOM-strips + CRLF→LF
4490 // normalizes before parsing. The cache path consumes
4491 // a CST that's BOM-stripped but NOT CRLF-normalized.
4492 // Byte-identity holds because the formatter rebuilds
4493 // canonical output from typed values (no trivia
4494 // passthrough).
4495 (
4496 "BOM + CRLF combination",
4497 "\u{FEFF}2024-01-15 * \"Coffee\"\r\n Assets:Bank -5.00 USD\r\n Expenses:Food\r\n",
4498 ),
4499 // Parse-error file — exercises the fallback. Without
4500 // the `errors.is_empty()` guard, the cache path would
4501 // emit text for ERROR_NODE-wrapped content while
4502 // `format_source` would drop those bytes; identity
4503 // would fail. The fallback delegates to
4504 // `format_source(source)` so identity holds.
4505 (
4506 "parse errors (exercises fallback)",
4507 "2024-01-15 * \"x\"\n Assets:Bank -5.00 USD\n}}}garbage\n",
4508 ),
4509 // Bare-`\r` (classic Mac) line terminators. The
4510 // `format_source` path normalizes bare-CR to LF via
4511 // `crlf_to_lf_outside_strings`, then parses cleanly.
4512 // `parse_via_cst` does NOT normalize bare-CR, so the
4513 // CST sees broken syntax and `parse_result.errors`
4514 // is non-empty — the fallback fires. Byte-identity
4515 // holds via the same `format_source` delegation.
4516 (
4517 "bare CR line terminators (exercises fallback)",
4518 "2024-01-01 open Assets:Bank USD\r2024-01-02 open Assets:Cash USD\r",
4519 ),
4520 ];
4521 for (label, source) in fixtures {
4522 let parse_result = crate::parse(source);
4523 let baseline = format_source(source);
4524 let cached = format_source_with_parsed(&parse_result, source);
4525 assert_eq!(
4526 cached, baseline,
4527 "format_source_with_parsed must match format_source for {label}: \
4528 baseline {baseline:?}, cached {cached:?}",
4529 );
4530 }
4531 }
4532
4533 /// Mismatched-pair safety: in debug builds, passing a
4534 /// length-mismatched `(parse_result, source)` pair panics via
4535 /// the `debug_assert_eq!`. Release builds silently emit text
4536 /// for the wrong buffer (the producer-only invariant is the
4537 /// caller's responsibility, documented in
4538 /// `ParseResult::alignment`).
4539 #[cfg(debug_assertions)]
4540 #[test]
4541 #[should_panic(expected = "source` whose length doesn't match")]
4542 fn format_source_with_parsed_panics_on_length_mismatch() {
4543 let parse_result = crate::parse("2024-01-01 open Assets:Bank USD\n");
4544 // Different length — debug_assert fires.
4545 let _ = format_source_with_parsed(&parse_result, "different");
4546 }
4547}