Skip to main content

rustledger_parser/cst/
convert.rs

1//! CST -> `ParseResult` converter.
2//!
3//! [`parse_via_cst`] is the implementation behind the public
4//! [`crate::parse`] entry point. It walks the structured CST from
5//! [`crate::parse_structured`] via the typed-AST surface in
6//! [`crate::cst::ast`] and produces the legacy AST-shaped
7//! [`ParseResult`] that downstream consumers (loader, booking,
8//! validate, query, LSP) consume.
9//!
10//! ## Conversion scope
11//!
12//! Per-directive converters: Open, Close, Commodity, Note,
13//! Document, Event, Query, Price, Balance, Pad, Custom, and
14//! Transaction (with its full posting / cost-spec / price-
15//! annotation / metadata / trailing-comments machinery).
16//!
17//! State-only directives (Pushtag / Poptag / Pushmeta / Popmeta)
18//! mutate `tag_stack` / `meta_stack` inherited by subsequent
19//! directives; mismatched-pop and unclosed-at-EOF emit specific
20//! `ParseErrorKind` variants. Arithmetic AMOUNT expressions
21//! (`120 / 3 USD` ≡ `40 USD`) are evaluated; the same logic
22//! powers numeric values in BALANCE and PRICE directives.
23//!
24//! Field-level extractors populate `ParseResult.options`,
25//! `.includes`, `.plugins`, `.comments`, `.currency_occurrences`,
26//! `.account_occurrences`.
27//!
28//! ## Error surfacing
29//!
30//! A single [`walk_descendants_once`] pass collects standalone
31//! comments, currency occurrences, account occurrences, and inline
32//! `ERROR_TOKEN` / mid-file-BOM errors. Specialized extractors run alongside for
33//! `ERROR_NODE` classification, transaction body errors, unclosed
34//! cost braces, indented top-level directives, and bare-currency
35//! values in custom directives.
36
37use rust_decimal::Decimal;
38use rustledger_core::cost::{CostNumber, CostSpec};
39use rustledger_core::directive::{PriceAnnotation, PriceKind};
40use rustledger_core::{
41    Account, Amount, Currency, Directive, IncompleteAmount, InternedStr, Link, MetaValue, Metadata,
42    NaiveDate, Posting, Span, Spanned, Tag, naive_date,
43};
44
45use crate::ParseResult;
46use crate::cst::ast::{
47    self, AstNode, AstToken, BalanceDirective, CloseDirective, CommodityDirective, CustomDirective,
48    DocumentDirective, EventDirective, IncludeDirective, MetaEntry, NoteDirective, OpenDirective,
49    OptionDirective, PadDirective, PluginDirective, PostingFlagKind, PriceDirective,
50    QueryDirective, SourceFile, Transaction as AstTransaction, TransactionFlagKind,
51};
52
53/// Parse Beancount source via the CST and produce the AST-shaped
54/// [`ParseResult`]. This is the implementation behind
55/// [`crate::parse`]; the public entry delegates here unconditionally.
56///
57/// See the module-level rustdoc for the conversion scope.
58#[must_use]
59pub fn parse_via_cst(source: &str) -> ParseResult {
60    // BOM detection mirrors the legacy parser's behavior: strip a
61    // leading 3-byte BOM from the source before tokenizing and
62    // record its presence in the result. Spans index the original
63    // source frame INCLUDING the BOM offset.
64    let (stripped, has_leading_bom) = crate::bom::strip_leading(source);
65    let bom_offset: u32 = if has_leading_bom { 3 } else { 0 };
66
67    let source_file = SourceFile::parse(stripped);
68
69    let mut directives: Vec<Spanned<Directive>> = Vec::new();
70    let mut directive_nodes: Vec<crate::SyntaxNode> = Vec::new();
71    let mut options: Vec<(String, String, Span)> = Vec::new();
72    let mut includes: Vec<(String, Span)> = Vec::new();
73    let mut plugins: Vec<(String, Option<String>, Span)> = Vec::new();
74    // Single-pass descendants walk that yields inline errors,
75    // top-level comments, and currency occurrences (replaces three
76    // separate `descendants_with_tokens` walks at 3·O(N) → 1·O(N)).
77    let DescendantsWalkResult {
78        inline_errors,
79        top_level_comments,
80        currency_occurrences,
81        account_occurrences,
82    } = walk_descendants_once(&source_file, bom_offset);
83
84    // Fused single pass over the top-level children replaces the
85    // five former per-child traversals (error-node, transaction-body,
86    // indented-directive, custom-value diagnostics + section-marker
87    // comments). See `walk_top_level_once`.
88    let TopLevelWalkResult {
89        errors: top_level_errors,
90        section_marker_comments,
91    } = walk_top_level_once(&source_file, stripped, bom_offset);
92
93    let mut comments: Vec<Spanned<String>> = top_level_comments;
94    comments.extend(section_marker_comments);
95    // Merge in source order; the two helpers' classifiers are
96    // disjoint today (STAR-first vs COMMENT-kind-first) but
97    // dedup-by-start keeps the invariant local.
98    comments.sort_by_key(|s| s.span.start);
99    comments.dedup_by_key(|s| s.span.start);
100    let mut errors = top_level_errors;
101    errors.extend(extract_unclosed_cost_brace_errors(&source_file, bom_offset));
102    errors.extend(inline_errors);
103    let warnings = Vec::new();
104
105    // pushtag/poptag/pushmeta/popmeta state. The legacy parser
106    // maintains a stack across directives; each Transaction
107    // inherits the active pushed-tag set, and EVERY directive
108    // inherits the active pushed-meta set. We pair each entry
109    // with the originating directive's span so unclosed-at-EOF
110    // diagnostics can point at the offending push.
111    let mut tag_stack: Vec<(Tag, Span)> = Vec::new();
112    // Vec-of-tuples (NOT a `Metadata` map) so legacy semantics
113    // are preserved: `pushmeta x: 1` then `pushmeta x: 2` should
114    // shadow (peek returns 2) and `popmeta x` should pop the
115    // most recent, leaving x=1 active. A HashMap would have lost
116    // the shadowed entry on the second push.
117    let mut meta_stack: Vec<(String, MetaValue, Span)> = Vec::new();
118
119    for directive in source_file.directives() {
120        // Helper to push a successfully-converted directive
121        // alongside its CST node so the post-pass span fixup
122        // can index them in parallel.
123        let cst_node = directive.syntax().clone();
124        // `is_directive_producing` tracks whether THIS arm is
125        // expected to emit a `Spanned<Directive>` (the 12
126        // directive types). The catch-all below uses it to
127        // surface a `SyntaxError` when a producing converter
128        // returned `None` without emitting a more specific
129        // diagnostic - the silent-drop class of bug the integ
130        // tests caught for `2024-01-01 open` (no account),
131        // `balance Assets:X` (no amount), etc.
132        let is_directive_producing = matches!(
133            directive,
134            ast::Directive::Open(_)
135                | ast::Directive::Close(_)
136                | ast::Directive::Commodity(_)
137                | ast::Directive::Note(_)
138                | ast::Directive::Document(_)
139                | ast::Directive::Event(_)
140                | ast::Directive::Query(_)
141                | ast::Directive::Price(_)
142                | ast::Directive::Balance(_)
143                | ast::Directive::Pad(_)
144                | ast::Directive::Custom(_)
145                | ast::Directive::Transaction(_)
146        );
147        let errors_before = errors.len();
148        let pushed_directive = match directive {
149            ast::Directive::Open(node) => convert_open(&node, bom_offset, &mut errors),
150            ast::Directive::Close(node) => convert_close(&node, bom_offset, &mut errors),
151            ast::Directive::Commodity(node) => convert_commodity(&node, bom_offset, &mut errors),
152            ast::Directive::Note(node) => convert_note(&node, bom_offset, &mut errors),
153            ast::Directive::Document(node) => convert_document(&node, bom_offset, &mut errors),
154            ast::Directive::Event(node) => convert_event(&node, bom_offset, &mut errors),
155            ast::Directive::Query(node) => convert_query(&node, bom_offset, &mut errors),
156            ast::Directive::Price(node) => convert_price(&node, bom_offset, &mut errors),
157            ast::Directive::Balance(node) => convert_balance(&node, bom_offset, &mut errors),
158            ast::Directive::Pad(node) => convert_pad(&node, bom_offset, &mut errors),
159            ast::Directive::Custom(node) => convert_custom(&node, bom_offset, &mut errors),
160            ast::Directive::Transaction(node) => {
161                convert_transaction(&node, bom_offset, &mut errors)
162            }
163            ast::Directive::Option(node) => {
164                if let Some(triple) = convert_option(&node, bom_offset) {
165                    options.push(triple);
166                }
167                None
168            }
169            ast::Directive::Include(node) => {
170                if let Some(pair) = convert_include(&node, bom_offset) {
171                    includes.push(pair);
172                }
173                None
174            }
175            ast::Directive::Plugin(node) => {
176                if let Some(triple) = convert_plugin(&node, bom_offset) {
177                    plugins.push(triple);
178                }
179                None
180            }
181            // State-only side effects: mutate the inherited
182            // tag/meta sets that apply to subsequent directives.
183            ast::Directive::Pushtag(node) => {
184                if let Some(tag_token) = node.tag() {
185                    let span = node_span(node.syntax(), bom_offset);
186                    tag_stack.push((Tag::new(tag_token.text().trim_start_matches('#')), span));
187                }
188                None
189            }
190            ast::Directive::Poptag(node) => {
191                if let Some(tag_token) = node.tag() {
192                    let name = tag_token.text().trim_start_matches('#');
193                    if let Some(pos) = tag_stack.iter().rposition(|(t, _)| t.as_str() == name) {
194                        tag_stack.remove(pos);
195                    } else {
196                        errors.push(crate::ParseError::new(
197                            crate::ParseErrorKind::InvalidPoptag(name.to_string()),
198                            node_span(node.syntax(), bom_offset),
199                        ));
200                    }
201                }
202                None
203            }
204            ast::Directive::Pushmeta(node) => {
205                if let Some(key_token) = node.key() {
206                    let key = key_token.text_without_colon().to_string();
207                    let value = pushmeta_value(node.syntax());
208                    let span = node_span(node.syntax(), bom_offset);
209                    meta_stack.push((key, value, span));
210                }
211                None
212            }
213            ast::Directive::Popmeta(node) => {
214                if let Some(key_token) = node.key() {
215                    let key = key_token.text_without_colon().to_string();
216                    if let Some(pos) = meta_stack.iter().rposition(|(k, _, _)| k == &key) {
217                        meta_stack.remove(pos);
218                    } else {
219                        errors.push(crate::ParseError::new(
220                            crate::ParseErrorKind::InvalidPopmeta(key),
221                            node_span(node.syntax(), bom_offset),
222                        ));
223                    }
224                }
225                None
226            }
227        };
228        if let Some(mut spanned) = pushed_directive {
229            apply_inherited_state(&mut spanned.value, &tag_stack, &meta_stack);
230            directives.push(spanned);
231            directive_nodes.push(cst_node);
232        } else if is_directive_producing && errors.len() == errors_before {
233            // Producing converter silently dropped the directive
234            // (typically: a required field like an account on
235            // `open`, an amount on `balance`, or a source account
236            // on `pad` was missing). Mirror the legacy parser's
237            // top-level error-recovery path which emits a
238            // `SyntaxError("unexpected input")` for the failed
239            // span so downstream tooling sees the same shape.
240            errors.push(crate::ParseError::new(
241                crate::ParseErrorKind::SyntaxError("unexpected input".to_string()),
242                node_span(&cst_node, bom_offset),
243            ));
244        }
245    }
246
247    // Unclosed pushtag/pushmeta at EOF - legacy emits one error
248    // per leftover stack entry, pointing at the originating push
249    // directive's span.
250    for (tag, span) in &tag_stack {
251        errors.push(crate::ParseError::new(
252            crate::ParseErrorKind::UnclosedPushtag(tag.as_str().to_string()),
253            *span,
254        ));
255    }
256    for (key, _, span) in &meta_stack {
257        errors.push(crate::ParseError::new(
258            crate::ParseErrorKind::UnclosedPushmeta(key.clone()),
259            *span,
260        ));
261    }
262    errors.sort_by_key(|e| e.span.start);
263
264    // Post-pass: align directive spans with the legacy parser's
265    // convention (skip leading trivia, extend through inter-
266    // directive trivia to the next directive's start).
267    fixup_directive_spans(&source_file, bom_offset, &directive_nodes, &mut directives);
268
269    // Pre-compute the file-wide formatter alignment from the
270    // same `source_file` we just walked, so the formatter (and
271    // every LSP handler that calls it) can skip the O(N_postings)
272    // re-walk on every format request. See
273    // `ParseResult::alignment` rustdoc for the cache contract;
274    // the equivalence with a fresh `compute_alignment` call is
275    // pinned by `parse_result_alignment_cache::*` (lib.rs tests).
276    let alignment = crate::cst::format::compute_alignment(&source_file);
277
278    // Capture the green root before we drop `source_file`. The
279    // `.green()` call returns a Cow so we promote to owned with
280    // `into_owned()`; the resulting `GreenNode` is reference-
281    // counted internally, cheap to clone, and `Send + Sync` -
282    // safe to stash in `Arc<ParseResult>` that the LSP shares
283    // across threads.
284    let syntax_root = source_file.syntax().green().into_owned();
285
286    ParseResult {
287        directives,
288        options,
289        includes,
290        plugins,
291        comments,
292        errors,
293        warnings,
294        currency_occurrences,
295        account_occurrences,
296        has_leading_bom,
297        syntax_root,
298        alignment,
299    }
300}
301
302// ---- Directive converters --------------------------------------
303
304/// Valid booking methods per beancount v3 - must match the
305/// whitelist legacy `parser::parse_open_directive` enforces. An
306/// `open` directive whose explicit booking string isn't on this
307/// list is rejected (directive dropped, `InvalidBookingMethod`
308/// error emitted) by both the legacy parser and `convert_open`.
309const VALID_BOOKING_METHODS: &[&str] = &[
310    "FIFO",
311    "STRICT",
312    "STRICT_WITH_SIZE",
313    "LIFO",
314    "HIFO",
315    "NONE",
316    "AVERAGE",
317];
318
319fn convert_open(
320    node: &OpenDirective,
321    bom_offset: u32,
322    errors: &mut Vec<crate::ParseError>,
323) -> Option<Spanned<Directive>> {
324    let date = parse_directive_date(&node.date()?, errors, bom_offset)?;
325    let account = Account::new(node.account()?.text());
326    let currencies: Vec<Currency> = node.currencies().map(|c| Currency::new(c.text())).collect();
327    let booking = node
328        .booking_method()
329        .and_then(|s| s.text_unquoted().map(String::from));
330    let span = node_span(node.syntax(), bom_offset);
331    if let Some(b) = &booking
332        && !VALID_BOOKING_METHODS.contains(&b.as_str())
333    {
334        errors.push(crate::ParseError::new(
335            crate::ParseErrorKind::InvalidBookingMethod(b.clone()),
336            span,
337        ));
338        return None;
339    }
340    let meta = convert_meta_entries(node.syntax());
341
342    let open = rustledger_core::directive::Open {
343        date,
344        account,
345        currencies,
346        booking,
347        meta,
348    };
349    Some(Spanned::new(Directive::Open(open), span))
350}
351
352fn convert_close(
353    node: &CloseDirective,
354    bom_offset: u32,
355    errors: &mut Vec<crate::ParseError>,
356) -> Option<Spanned<Directive>> {
357    let date = parse_directive_date(&node.date()?, errors, bom_offset)?;
358    let account = Account::new(node.account()?.text());
359    let meta = convert_meta_entries(node.syntax());
360
361    let close = rustledger_core::directive::Close {
362        date,
363        account,
364        meta,
365    };
366    let span = node_span(node.syntax(), bom_offset);
367    Some(Spanned::new(Directive::Close(close), span))
368}
369
370fn convert_commodity(
371    node: &CommodityDirective,
372    bom_offset: u32,
373    errors: &mut Vec<crate::ParseError>,
374) -> Option<Spanned<Directive>> {
375    let date = parse_directive_date(&node.date()?, errors, bom_offset)?;
376    let currency = Currency::new(node.currency()?.text());
377    let meta = convert_meta_entries(node.syntax());
378
379    let commodity = rustledger_core::directive::Commodity {
380        date,
381        currency,
382        meta,
383    };
384    let span = node_span(node.syntax(), bom_offset);
385    Some(Spanned::new(Directive::Commodity(commodity), span))
386}
387
388fn convert_note(
389    node: &NoteDirective,
390    bom_offset: u32,
391    errors: &mut Vec<crate::ParseError>,
392) -> Option<Spanned<Directive>> {
393    let date = parse_directive_date(&node.date()?, errors, bom_offset)?;
394    let account = Account::new(node.account()?.text());
395    let comment = node.text()?.text_unquoted()?.to_string();
396    let meta = convert_meta_entries(node.syntax());
397
398    let note = rustledger_core::directive::Note {
399        date,
400        account,
401        comment,
402        meta,
403    };
404    let span = node_span(node.syntax(), bom_offset);
405    Some(Spanned::new(Directive::Note(note), span))
406}
407
408fn convert_document(
409    node: &DocumentDirective,
410    bom_offset: u32,
411    errors: &mut Vec<crate::ParseError>,
412) -> Option<Spanned<Directive>> {
413    let date = parse_directive_date(&node.date()?, errors, bom_offset)?;
414    let account = Account::new(node.account()?.text());
415    let path = node.path()?.text_unquoted()?.to_string();
416    // Trailing tags/links on the document header (legacy
417    // `parse_document_directive` collects them in a loop after
418    // the path STRING). TAG / LINK tokens only appear in the
419    // header (not in META_ENTRY children, which are walked
420    // separately below), so a direct-child token walk that
421    // stops at the first NEWLINE captures them in source order.
422    let mut tags: Vec<Tag> = Vec::new();
423    let mut links: Vec<Link> = Vec::new();
424    for el in node.syntax().children_with_tokens() {
425        let rowan::NodeOrToken::Token(t) = el else {
426            continue;
427        };
428        match t.kind() {
429            crate::SyntaxKind::NEWLINE => break,
430            crate::SyntaxKind::TAG => {
431                tags.push(Tag::new(t.text().trim_start_matches('#')));
432            }
433            crate::SyntaxKind::LINK => {
434                links.push(Link::new(t.text().trim_start_matches('^')));
435            }
436            _ => {}
437        }
438    }
439    let meta = convert_meta_entries(node.syntax());
440
441    let document = rustledger_core::directive::Document {
442        date,
443        account,
444        path,
445        tags,
446        links,
447        meta,
448    };
449    let span = node_span(node.syntax(), bom_offset);
450    Some(Spanned::new(Directive::Document(document), span))
451}
452
453fn convert_event(
454    node: &EventDirective,
455    bom_offset: u32,
456    errors: &mut Vec<crate::ParseError>,
457) -> Option<Spanned<Directive>> {
458    let date = parse_directive_date(&node.date()?, errors, bom_offset)?;
459    let event_type = node.event_type()?.text_unquoted()?.to_string();
460    let value = node.value()?.text_unquoted()?.to_string();
461    let meta = convert_meta_entries(node.syntax());
462
463    let event = rustledger_core::directive::Event {
464        date,
465        event_type,
466        value,
467        meta,
468    };
469    let span = node_span(node.syntax(), bom_offset);
470    Some(Spanned::new(Directive::Event(event), span))
471}
472
473fn convert_query(
474    node: &QueryDirective,
475    bom_offset: u32,
476    errors: &mut Vec<crate::ParseError>,
477) -> Option<Spanned<Directive>> {
478    let date = parse_directive_date(&node.date()?, errors, bom_offset)?;
479    let name = node.name()?.text_unquoted()?.to_string();
480    let query = node.query()?.text_unquoted()?.to_string();
481    let meta = convert_meta_entries(node.syntax());
482
483    let q = rustledger_core::directive::Query {
484        date,
485        name,
486        query,
487        meta,
488    };
489    let span = node_span(node.syntax(), bom_offset);
490    Some(Spanned::new(Directive::Query(q), span))
491}
492
493fn convert_price(
494    node: &PriceDirective,
495    bom_offset: u32,
496    errors: &mut Vec<crate::ParseError>,
497) -> Option<Spanned<Directive>> {
498    let date = parse_directive_date(&node.date()?, errors, bom_offset)?;
499    let base_currency = Currency::new(node.base_currency()?.text());
500    // Same arithmetic support as `convert_balance`: a price
501    // directive's value can use `+`, `-`, `*`, `/`, and parens.
502    let number = directive_arithmetic_value(node.syntax()).or_else(|| {
503        let mut n = parse_decimal_token(node.number()?.text())?;
504        if node_has_minus_before_number(node.syntax()) {
505            n = -n;
506        }
507        Some(n)
508    })?;
509    let quote_currency = Currency::new(node.quote_currency()?.text());
510    let amount = Amount::new(number, quote_currency);
511    let meta = convert_meta_entries(node.syntax());
512
513    let price = rustledger_core::directive::Price {
514        date,
515        currency: base_currency,
516        amount,
517        meta,
518    };
519    let span = node_span(node.syntax(), bom_offset);
520    Some(Spanned::new(Directive::Price(price), span))
521}
522
523fn convert_balance(
524    node: &BalanceDirective,
525    bom_offset: u32,
526    errors: &mut Vec<crate::ParseError>,
527) -> Option<Spanned<Directive>> {
528    let date = parse_directive_date(&node.date()?, errors, bom_offset)?;
529    let account = Account::new(node.account()?.text());
530    // Beancount accepts arithmetic in the balance assertion's
531    // value (`balance Assets:X 0.25 + 0.75 GBP` ≡ 1.00 GBP).
532    // Falls back to the first NUMBER token if the expression
533    // can't be evaluated, with the legacy sign-flip behavior.
534    let number = directive_arithmetic_value(node.syntax()).or_else(|| {
535        let mut n = parse_decimal_token(node.number()?.text())?;
536        if node_has_minus_before_number(node.syntax()) {
537            n = -n;
538        }
539        Some(n)
540    })?;
541    let currency = Currency::new(node.currency()?.text());
542    let amount = Amount::new(number, currency);
543    let tolerance = extract_balance_tolerance(node.syntax());
544    let meta = convert_meta_entries(node.syntax());
545
546    let balance = rustledger_core::directive::Balance {
547        date,
548        account,
549        amount,
550        tolerance,
551        meta,
552    };
553    let span = node_span(node.syntax(), bom_offset);
554    Some(Spanned::new(Directive::Balance(balance), span))
555}
556
557/// Balance directives may include an explicit tolerance via a
558/// `~` (TILDE) token followed by a NUMBER. The typed-AST surface
559/// surfaces NUMBER via `number()` (which returns the FIRST one,
560/// the asserted balance); the tolerance NUMBER comes second.
561/// Walk raw tokens until TILDE, then collect the next NUMBER.
562fn extract_balance_tolerance(node: &crate::SyntaxNode) -> Option<Decimal> {
563    let mut past_tilde = false;
564    for el in node.children_with_tokens() {
565        let rowan::NodeOrToken::Token(t) = el else {
566            continue;
567        };
568        if past_tilde && t.kind() == crate::SyntaxKind::NUMBER {
569            return parse_decimal_token(t.text());
570        }
571        if t.kind() == crate::SyntaxKind::TILDE {
572            past_tilde = true;
573        }
574    }
575    None
576}
577
578fn convert_pad(
579    node: &PadDirective,
580    bom_offset: u32,
581    errors: &mut Vec<crate::ParseError>,
582) -> Option<Spanned<Directive>> {
583    let date = parse_directive_date(&node.date()?, errors, bom_offset)?;
584    let account = Account::new(node.target_account()?.text());
585    let source_account = Account::new(node.source_account()?.text());
586    let meta = convert_meta_entries(node.syntax());
587
588    let pad = rustledger_core::directive::Pad {
589        date,
590        account,
591        source_account,
592        meta,
593    };
594    let span = node_span(node.syntax(), bom_offset);
595    Some(Spanned::new(Directive::Pad(pad), span))
596}
597
598fn convert_custom(
599    node: &CustomDirective,
600    bom_offset: u32,
601    errors: &mut Vec<crate::ParseError>,
602) -> Option<Spanned<Directive>> {
603    let date = parse_directive_date(&node.date()?, errors, bom_offset)?;
604    let custom_type = node.custom_type()?.text_unquoted()?.to_string();
605    let values = extract_custom_values(node.syntax());
606    let meta = convert_meta_entries(node.syntax());
607
608    let custom = rustledger_core::directive::Custom {
609        date,
610        custom_type,
611        values,
612        meta,
613    };
614    let span = node_span(node.syntax(), bom_offset);
615    Some(Spanned::new(Directive::Custom(custom), span))
616}
617
618/// Walk the heterogeneous value tokens after the `custom "type"`
619/// header. The legacy parser tries each value type in this order:
620/// string > account > bool > amount (NUMBER+CURRENCY) > number >
621/// date > currency. We replicate that priority on the flat token
622/// stream, with one structural pass that pairs an immediately-
623/// adjacent NUMBER+CURRENCY into an [`Amount`].
624fn extract_custom_values(node: &crate::SyntaxNode) -> Vec<MetaValue> {
625    let mut values = Vec::new();
626    let mut seen_type_string = false;
627    // Collect tokens by kind, skipping trivia. We do a two-pass:
628    // first form Amount pairs (NUMBER + CURRENCY adjacent, ignoring
629    // whitespace), then emit remaining tokens individually.
630    let raw: Vec<rowan::SyntaxToken<crate::BeancountLanguage>> = node
631        .children_with_tokens()
632        .filter_map(rowan::NodeOrToken::into_token)
633        .filter(|t| {
634            !matches!(
635                t.kind(),
636                crate::SyntaxKind::WHITESPACE
637                    | crate::SyntaxKind::NEWLINE
638                    | crate::SyntaxKind::COMMENT
639            )
640        })
641        .collect();
642
643    let mut i = 0;
644    while i < raw.len() {
645        let t = &raw[i];
646        // Skip the directive's header tokens (DATE, CUSTOM_KW, and
647        // the first STRING which is the custom-type name).
648        if !seen_type_string {
649            if t.kind() == crate::SyntaxKind::STRING {
650                seen_type_string = true;
651            }
652            i += 1;
653            continue;
654        }
655        match t.kind() {
656            crate::SyntaxKind::STRING => {
657                if let Some(s) = strip_string_quotes(t.text()) {
658                    values.push(MetaValue::String(s.to_string()));
659                }
660            }
661            crate::SyntaxKind::ACCOUNT => {
662                values.push(MetaValue::Account(Account::new(t.text())));
663            }
664            crate::SyntaxKind::BOOL_TRUE => values.push(MetaValue::Bool(true)),
665            crate::SyntaxKind::BOOL_FALSE => values.push(MetaValue::Bool(false)),
666            crate::SyntaxKind::NUMBER => {
667                // Look ahead for an adjacent CURRENCY -> Amount.
668                if let Some(next) = raw.get(i + 1)
669                    && next.kind() == crate::SyntaxKind::CURRENCY
670                    && let Some(num) = parse_decimal_token(t.text())
671                {
672                    let curr = Currency::new(next.text());
673                    values.push(MetaValue::Amount(Amount::new(num, curr)));
674                    i += 2;
675                    continue;
676                }
677                if let Some(num) = parse_decimal_token(t.text()) {
678                    values.push(MetaValue::Number(num));
679                }
680            }
681            crate::SyntaxKind::DATE => {
682                if let Some(date) = parse_date_token(t.text()) {
683                    values.push(MetaValue::Date(date));
684                }
685            }
686            crate::SyntaxKind::CURRENCY => {
687                values.push(MetaValue::Currency(Currency::new(t.text())));
688            }
689            _ => {}
690        }
691        i += 1;
692    }
693    values
694}
695
696fn strip_string_quotes(raw: &str) -> Option<&str> {
697    let bytes = raw.as_bytes();
698    if bytes.len() < 2 || bytes[0] != b'"' || bytes[bytes.len() - 1] != b'"' {
699        return None;
700    }
701    Some(&raw[1..raw.len() - 1])
702}
703
704fn convert_option(node: &OptionDirective, bom_offset: u32) -> Option<(String, String, Span)> {
705    let key = node.key()?.text_unquoted()?.to_string();
706    let value = node.value()?.text_unquoted()?.to_string();
707    Some((
708        key,
709        value,
710        single_line_directive_span(node.syntax(), bom_offset),
711    ))
712}
713
714fn convert_include(node: &IncludeDirective, bom_offset: u32) -> Option<(String, Span)> {
715    let path = node.path()?.text_unquoted()?.to_string();
716    Some((path, single_line_directive_span(node.syntax(), bom_offset)))
717}
718
719fn convert_plugin(
720    node: &PluginDirective,
721    bom_offset: u32,
722) -> Option<(String, Option<String>, Span)> {
723    let module = node.module()?.text_unquoted()?.to_string();
724    let config = node
725        .config()
726        .and_then(|c| c.text_unquoted().map(String::from));
727    Some((
728        module,
729        config,
730        single_line_directive_span(node.syntax(), bom_offset),
731    ))
732}
733
734// ---- Transaction + Posting + sub-nodes -------------------------
735
736fn convert_transaction(
737    node: &AstTransaction,
738    bom_offset: u32,
739    errors: &mut Vec<crate::ParseError>,
740) -> Option<Spanned<Directive>> {
741    let date = parse_directive_date(&node.date()?, errors, bom_offset)?;
742
743    // Flag: explicit (TransactionFlag) or implied (leading STRING
744    // with no flag token; defaults to '*').
745    let flag = node.flag().map_or('*', |f| flag_char_from_transaction(&f));
746
747    // Header strings: with 2 -> payee + narration; with 1 ->
748    // narration-only; with 3+ -> ambiguous (typed-AST surface
749    // returns None for both, matching the round-2 review fix).
750    let strings: Vec<String> = node
751        .strings()
752        .filter_map(|s| s.text_unquoted().map(String::from))
753        .collect();
754    let (payee_str, narration_str) = match strings.len() {
755        0 => (None, String::new()),
756        1 => (None, strings.into_iter().next().unwrap()),
757        2 => {
758            let mut it = strings.into_iter();
759            let p = it.next().unwrap();
760            let n = it.next().unwrap();
761            (Some(p), n)
762        }
763        // 3+ strings: surface only the last as narration; the
764        // middle ones are unreachable through this typed shape
765        // (matches the round-2 docstring).
766        _ => (None, strings.last().cloned().unwrap_or_default()),
767    };
768
769    let payee = payee_str.map(InternedStr::from);
770    let narration = InternedStr::from(narration_str);
771
772    // Tags / links from the TRANSACTION node: the typed AST
773    // accessor `tags()`/`links()` is scoped to the header region.
774    // Trailing TAG / LINK tokens appearing on body lines (after
775    // the header NEWLINE, OUTSIDE any POSTING / META_ENTRY child
776    // node) are also part of the transaction's tag/link set per
777    // Beancount semantics - `extract_transaction_body_errors`
778    // already exempts them from the malformed-body diagnostic for
779    // this reason. Aggregate them here so they don't silently
780    // disappear.
781    let mut tags: Vec<Tag> = node
782        .tags()
783        .map(|t| Tag::new(t.text().trim_start_matches('#')))
784        .collect();
785    let mut links: Vec<Link> = node
786        .links()
787        .map(|l| Link::new(l.text().trim_start_matches('^')))
788        .collect();
789    for el in node.syntax().children_with_tokens() {
790        let rowan::NodeOrToken::Token(t) = el else {
791            // Nodes (POSTING / META_ENTRY) own their own internal
792            // tokens; we don't recurse into them.
793            continue;
794        };
795        match t.kind() {
796            crate::SyntaxKind::TAG => {
797                let stripped = t.text().trim_start_matches('#');
798                let new_tag = Tag::new(stripped);
799                if !tags.contains(&new_tag) {
800                    tags.push(new_tag);
801                }
802            }
803            crate::SyntaxKind::LINK => {
804                let stripped = t.text().trim_start_matches('^');
805                let new_link = Link::new(stripped);
806                if !links.contains(&new_link) {
807                    links.push(new_link);
808                }
809            }
810            _ => {}
811        }
812    }
813
814    // Transaction-level metadata (META_ENTRY children directly on
815    // the TRANSACTION node, NOT on POSTING children).
816    let meta = convert_meta_entries(node.syntax());
817
818    // Postings + pre-posting comments. The CST puts inter-
819    // posting trivia (including `; comment` lines) as flat
820    // tokens DIRECT under TRANSACTION between two POSTING
821    // nodes. Walk in source order: COMMENT tokens accumulate
822    // into `pending`, then attach to the next POSTING node's
823    // `comments` field when we reach it. Tokens before the
824    // header NEWLINE are skipped (they're transaction-header
825    // content). Comments that remain in `pending` after the
826    // final posting belong to the transaction itself
827    // (legacy: `txn.trailing_comments = pending_comments`).
828    let (postings, trailing_comments) = collect_postings_with_comments(node, bom_offset, errors);
829
830    // Deprecated `|` separator between payee and narration: a
831    // PIPE token in the header region. Legacy treats this as a
832    // recoverable warning-shaped error (`DeprecatedPipeSymbol`)
833    // and keeps the directive, so we do the same here.
834    if header_has_pipe(node) {
835        errors.push(crate::ParseError::new(
836            crate::ParseErrorKind::DeprecatedPipeSymbol,
837            node_span(node.syntax(), bom_offset),
838        ));
839    }
840
841    let txn = rustledger_core::directive::Transaction {
842        date,
843        flag,
844        payee,
845        narration,
846        tags,
847        links,
848        meta,
849        postings,
850        trailing_comments,
851    };
852    let span = node_span(node.syntax(), bom_offset);
853    Some(Spanned::new(Directive::Transaction(txn), span))
854}
855
856/// Returns true if the TRANSACTION header (direct-child tokens
857/// up to the first NEWLINE) contains a `PIPE` token. The legacy
858/// parser surfaces a `DeprecatedPipeSymbol` diagnostic for this
859/// shape; the CST lexer classifies `|` as `PIPE`, so we just
860/// scan the header directly.
861fn header_has_pipe(node: &AstTransaction) -> bool {
862    for el in node.syntax().children_with_tokens() {
863        let rowan::NodeOrToken::Token(t) = el else {
864            continue;
865        };
866        if t.kind() == crate::SyntaxKind::NEWLINE {
867            return false;
868        }
869        if t.kind() == crate::SyntaxKind::PIPE {
870            return true;
871        }
872    }
873    false
874}
875
876/// Walk a `TRANSACTION`'s children in source order, attaching any
877/// inter-posting `; comment` lines that appear as flat tokens
878/// between `POSTING` nodes to the NEXT posting's `comments`
879/// field. Matches the legacy parser, which collects
880/// `pending_comments` while reading the body and applies them to
881/// the next posting it parses.
882///
883/// Tokens before the header-terminator NEWLINE belong to the
884/// transaction header (date/flag/strings/tags/links) and are
885/// skipped.
886///
887/// Returns `(postings, trailing_comments)`: the second element is
888/// any pending comments left over AFTER the final posting, which
889/// legacy assigns to `Transaction::trailing_comments`.
890fn collect_postings_with_comments(
891    node: &AstTransaction,
892    bom_offset: u32,
893    errors: &mut Vec<crate::ParseError>,
894) -> (Vec<Spanned<Posting>>, Vec<String>) {
895    let mut out = Vec::new();
896    let mut pending: Vec<String> = Vec::new();
897    let mut past_header = false;
898    for el in node.syntax().children_with_tokens() {
899        match el {
900            rowan::NodeOrToken::Token(t) => {
901                if !past_header {
902                    if t.kind() == crate::SyntaxKind::NEWLINE {
903                        past_header = true;
904                    }
905                    continue;
906                }
907                if is_comment_kind(t.kind()) {
908                    pending.push(t.text().to_string());
909                } else if !is_trivia_kind(t.kind())
910                    && !matches!(t.kind(), crate::SyntaxKind::TAG | crate::SyntaxKind::LINK)
911                {
912                    // Non-trivia, non-comment token in the
913                    // transaction body that's NOT inside a
914                    // POSTING / META_ENTRY child node = malformed
915                    // body line (caught separately by
916                    // `extract_transaction_body_errors`). Treat
917                    // the same as a failed POSTING: clear pending
918                    // so the malformed line's preceding comments
919                    // don't migrate onto the next valid posting.
920                    //
921                    // EXEMPT TAG / LINK: trailing tags/links on
922                    // transaction body lines (after the header)
923                    // are valid Beancount - they extend the
924                    // transaction's tag/link set without being
925                    // a new posting. Treating them as malformed
926                    // would drop legitimate preceding comments
927                    // that belong to the NEXT posting. The same
928                    // exemption appears in
929                    // `extract_transaction_body_errors`, which
930                    // does the parallel "is this a malformed
931                    // body line?" classification.
932                    pending.clear();
933                }
934            }
935            rowan::NodeOrToken::Node(n) => {
936                if !past_header {
937                    // META_ENTRY or POSTING before the header
938                    // NEWLINE shouldn't happen in well-formed
939                    // input; treat any child node as "past the
940                    // header" if we somehow encounter one.
941                    past_header = true;
942                }
943                if let Some(p) = ast::Posting::cast(n) {
944                    if let Some(mut spanned) = convert_posting(&p, bom_offset, errors) {
945                        if !pending.is_empty() {
946                            spanned.value.comments = std::mem::take(&mut pending);
947                        }
948                        out.push(spanned);
949                    } else {
950                        // Failed posting consumes any pending
951                        // inter-posting comments - they belonged
952                        // to it. Without this clear, a malformed
953                        // posting's preceding comments would
954                        // migrate forward and attach to the NEXT
955                        // successful posting, misattributing them
956                        // visibly to the wrong account line.
957                        pending.clear();
958                    }
959                }
960                // META_ENTRY child nodes: comments collected so
961                // far don't apply to them (they're transaction
962                // metadata). Drop them.
963            }
964        }
965    }
966    (out, pending)
967}
968
969fn flag_char_from_transaction(flag: &ast::TransactionFlag) -> char {
970    match flag.classify() {
971        TransactionFlagKind::Star | TransactionFlagKind::Txn => '*',
972        TransactionFlagKind::Pending => '!',
973        TransactionFlagKind::Hash => '#',
974        TransactionFlagKind::Letter | TransactionFlagKind::CurrencyLetter => {
975            flag.text().chars().next().unwrap_or('*')
976        }
977    }
978}
979
980fn convert_posting(
981    node: &ast::Posting,
982    bom_offset: u32,
983    errors: &mut Vec<crate::ParseError>,
984) -> Option<Spanned<Posting>> {
985    let account = Account::new(node.account()?.text());
986
987    let flag = node.flag().map(|f| flag_char_from_posting(&f));
988
989    // A well-formed posting has AT MOST one `AMOUNT` child node
990    // (the units). The CST builder will accept input like
991    // `Expenses:Food  5 USD + 3 USD` and produce TWO sibling
992    // `AMOUNT` nodes joined by a flat PLUS token, because the
993    // grammar doesn't enforce that PLUS between two complete
994    // amounts is invalid. `Posting::amount()` returns only the
995    // first via `first_child`, so without this guard the second
996    // amount (and the joining `+`) would be silently dropped and
997    // the user's transaction would balance against the wrong
998    // number. Emit a `SyntaxError` pointing at the trailing
999    // siblings and keep the first amount.
1000    let mut amount_children = node
1001        .syntax()
1002        .children()
1003        .filter(|n| ast::Amount::can_cast(n.kind()));
1004    let first_amount = amount_children.next();
1005    let first_amount_end: Option<u32> = first_amount.as_ref().map(|n| n.text_range().end().into());
1006    let mut sibling_start: Option<u32> = None;
1007    let mut sibling_end: u32 = 0;
1008    for extra in amount_children {
1009        let range = extra.text_range();
1010        let start_u32: u32 = range.start().into();
1011        let end_u32: u32 = range.end().into();
1012        if sibling_start.is_none() {
1013            sibling_start = Some(start_u32);
1014        }
1015        sibling_end = end_u32;
1016    }
1017    if let Some(start_u32) = sibling_start {
1018        // Extend the span back to the end of the FIRST AMOUNT so
1019        // the diagnostic underline covers any joining operator
1020        // (`+`, `*`, whitespace) between the kept amount and the
1021        // orphans. Without this, a user sees only `3 USD` in
1022        // `5 USD + 3 USD` highlighted - and may not realize the
1023        // `+ 3 USD` together is what needs to be removed.
1024        let underline_start = first_amount_end.unwrap_or(start_u32);
1025        let span = Span::new(
1026            (underline_start + bom_offset) as usize,
1027            (sibling_end + bom_offset) as usize,
1028        );
1029        errors.push(crate::ParseError::new(
1030            crate::ParseErrorKind::SyntaxError(
1031                "unexpected trailing tokens after posting amount".to_string(),
1032            ),
1033            span,
1034        ));
1035    }
1036    let units = first_amount
1037        .and_then(ast::Amount::cast)
1038        .and_then(|amt| convert_amount_to_incomplete(&amt, errors, bom_offset));
1039    let cost = node.cost_spec().map(|cs| convert_cost_spec(&cs));
1040    let price = node
1041        .price_annotation()
1042        .map(|pa| convert_price_annotation(&pa, errors, bom_offset));
1043    let meta = convert_meta_entries(node.syntax());
1044
1045    // Trailing comments on the posting line: COMMENT direct-
1046    // child tokens BEFORE the terminator NEWLINE. The legacy
1047    // parser collects same-line `;` content into
1048    // `posting.trailing_comments`.
1049    let trailing_comments: Vec<String> = node
1050        .syntax()
1051        .children_with_tokens()
1052        .filter_map(rowan::NodeOrToken::into_token)
1053        .take_while(|t| t.kind() != crate::SyntaxKind::NEWLINE)
1054        .filter(|t| is_comment_kind(t.kind()))
1055        .map(|t| t.text().to_string())
1056        .collect();
1057
1058    let posting = Posting {
1059        account,
1060        units,
1061        cost,
1062        price,
1063        flag,
1064        meta,
1065        comments: Vec::new(),
1066        trailing_comments,
1067    };
1068    let span = posting_span(node.syntax(), bom_offset);
1069    Some(Spanned::new(posting, span))
1070}
1071
1072fn flag_char_from_posting(flag: &ast::PostingFlag) -> char {
1073    match flag.classify() {
1074        PostingFlagKind::Star => '*',
1075        PostingFlagKind::Pending => '!',
1076        PostingFlagKind::Hash => '#',
1077        PostingFlagKind::Letter | PostingFlagKind::CurrencyLetter => {
1078            flag.text().chars().next().unwrap_or('*')
1079        }
1080    }
1081}
1082
1083/// Convert an AMOUNT node into an [`IncompleteAmount`]. Returns
1084/// `None` if neither a number nor a currency is present (which
1085/// shouldn't happen for a well-formed AMOUNT, but matches the
1086/// lossless CST contract). Sign is folded into the number.
1087///
1088/// **Arithmetic limitation**: when the AMOUNT contains an
1089/// arithmetic expression (`100+5 USD`), only the FIRST `NUMBER`
1090/// is used. A proper expression evaluator is deferred - none of
1091/// the directive types we currently handle outside of postings
1092/// use AMOUNT shapes that the legacy parser would have evaluated
1093/// differently.
1094fn convert_amount_to_incomplete(
1095    amt: &ast::Amount,
1096    errors: &mut Vec<crate::ParseError>,
1097    bom_offset: u32,
1098) -> Option<IncompleteAmount> {
1099    // Arithmetic AMOUNT expressions (`120 / 3 USD`, `(1+2) USD`):
1100    // run the recursive-descent evaluator on the flat token
1101    // stream. Fast-path plain `NUMBER CURRENCY` shapes to keep
1102    // the common case allocation-free.
1103    let number = if amt.is_arithmetic() {
1104        let evaluated = evaluate_amount_expression(amt);
1105        if evaluated.is_none() {
1106            // `is_arithmetic` was true but the evaluator gave up
1107            // (decimal overflow, division by zero, malformed
1108            // expression, unbalanced parens). Without this
1109            // emission the amount silently degrades to
1110            // `CurrencyOnly` and the user only sees a downstream
1111            // "transaction doesn't balance" - masking the actual
1112            // root cause. Pin the span to the AMOUNT node so the
1113            // diagnostic underlines the offending expression.
1114            let range = amt.syntax().text_range();
1115            let start: u32 = range.start().into();
1116            let end: u32 = range.end().into();
1117            let span = Span::new((start + bom_offset) as usize, (end + bom_offset) as usize);
1118            errors.push(crate::ParseError::new(
1119                crate::ParseErrorKind::SyntaxError(
1120                    "invalid arithmetic expression in amount (overflow, division by zero, or malformed)"
1121                        .to_string(),
1122                ),
1123                span,
1124            ));
1125        }
1126        evaluated
1127    } else {
1128        amt.number().and_then(|n| {
1129            let parsed = parse_decimal_token(n.text());
1130            if parsed.is_none() {
1131                // Symmetry with the arithmetic-failure path: when
1132                // a plain NUMBER token in an AMOUNT can't be
1133                // turned into a Decimal (e.g., 30+ digits - the
1134                // lexer's NUMBER regex has no max length but
1135                // `rust_decimal`'s 28-digit ceiling rejects it),
1136                // surface a diagnostic instead of silently
1137                // degrading to `CurrencyOnly`. Without this the
1138                // user only sees "transaction doesn't balance"
1139                // and never learns the parser dropped a number.
1140                let range = n.syntax().text_range();
1141                let start: u32 = range.start().into();
1142                let end: u32 = range.end().into();
1143                let span = Span::new((start + bom_offset) as usize, (end + bom_offset) as usize);
1144                errors.push(crate::ParseError::new(
1145                    crate::ParseErrorKind::SyntaxError(
1146                        "invalid number in amount (likely exceeds 28-digit Decimal precision)"
1147                            .to_string(),
1148                    ),
1149                    span,
1150                ));
1151            }
1152            let mut value = parsed?;
1153            if let Some(sign) = amt.sign()
1154                && sign.is_minus()
1155            {
1156                value = -value;
1157            }
1158            Some(value)
1159        })
1160    };
1161    let currency = amt.currency().map(|c| Currency::new(c.text()));
1162    match (number, currency) {
1163        (Some(n), Some(c)) => Some(IncompleteAmount::Complete(Amount::new(n, c))),
1164        (Some(n), None) => Some(IncompleteAmount::NumberOnly(n)),
1165        (None, Some(c)) => Some(IncompleteAmount::CurrencyOnly(c)),
1166        (None, None) => None,
1167    }
1168}
1169
1170/// Evaluate the arithmetic expression inside an `AMOUNT` node and
1171/// return the resulting decimal. Returns `None` when evaluation
1172/// fails (division by zero, decimal overflow, malformed parens,
1173/// missing operand).
1174///
1175/// AMOUNT children are flat tokens (no expression sub-tree): a
1176/// sequence of `NUMBER`, `PLUS`, `MINUS`, `STAR`, `SLASH`,
1177/// `L_PAREN`, `R_PAREN`, and a trailing `CURRENCY` at depth 0
1178/// that's the amount's currency rather than part of the
1179/// expression. The currency is stripped first; the rest goes
1180/// through recursive descent mirroring legacy
1181/// `parser::parse_expr` / `parse_term` / `parse_primary`.
1182///
1183/// Operator precedence and unary handling match Python beancount:
1184/// `*` and `/` bind tighter than `+` and `-`; a leading or post-
1185/// operator `-` is unary negation.
1186fn evaluate_amount_expression(amt: &ast::Amount) -> Option<Decimal> {
1187    let tokens = amount_expression_tokens(amt);
1188    let mut cursor = 0usize;
1189    let value = parse_arith_expr(&tokens, &mut cursor)?;
1190    // Trailing tokens after a successful parse mean the expression
1191    // is malformed (`1+2 3 USD`); refuse rather than silently
1192    // dropping them.
1193    if cursor != tokens.len() {
1194        return None;
1195    }
1196    Some(value)
1197}
1198
1199/// Evaluate the arithmetic expression that appears as the
1200/// numeric value of a `BALANCE` / `PRICE` directive, returning
1201/// the resulting decimal or `None` if not arithmetic (single
1202/// NUMBER, callers fall back to `parse_decimal_token`).
1203///
1204/// Unlike `AMOUNT`, these directives don't wrap their value in
1205/// a dedicated node - the tokens are flat under the directive
1206/// node. The relevant region is from the FIRST `NUMBER` token up
1207/// to (but not including) the FIRST `CURRENCY` token at paren-
1208/// depth 0 (the amount currency). For BALANCE, this correctly
1209/// stops before any trailing `~ NUMBER [CURRENCY]` tolerance
1210/// region too.
1211///
1212/// Returns `Some` only when the slice contains at least one
1213/// arithmetic operator (`+`, `-`, `*`, `/`) or parens - for a
1214/// bare single `NUMBER`, returns `None` so the caller can use
1215/// the existing fast path (which preserves the legacy sign-flip
1216/// behavior).
1217fn directive_arithmetic_value(node: &crate::SyntaxNode) -> Option<Decimal> {
1218    let raw: Vec<crate::SyntaxToken> = node
1219        .children_with_tokens()
1220        .filter_map(rowan::NodeOrToken::into_token)
1221        .filter(|t| !is_trivia_kind(t.kind()))
1222        .skip_while(|t| t.kind() != crate::SyntaxKind::NUMBER)
1223        .collect();
1224    let mut depth: i32 = 0;
1225    let mut first_currency_idx: Option<usize> = None;
1226    for (i, t) in raw.iter().enumerate() {
1227        match t.kind() {
1228            crate::SyntaxKind::L_PAREN => depth += 1,
1229            crate::SyntaxKind::R_PAREN => depth -= 1,
1230            crate::SyntaxKind::CURRENCY if depth == 0 && first_currency_idx.is_none() => {
1231                first_currency_idx = Some(i);
1232            }
1233            _ => {}
1234        }
1235    }
1236    let end = first_currency_idx.unwrap_or(raw.len());
1237    let tokens: Vec<crate::SyntaxToken> = raw.into_iter().take(end).collect();
1238    // Fast-path: zero or one token = no arithmetic.
1239    let has_op = tokens.iter().any(|t| {
1240        matches!(
1241            t.kind(),
1242            crate::SyntaxKind::PLUS
1243                | crate::SyntaxKind::MINUS
1244                | crate::SyntaxKind::STAR
1245                | crate::SyntaxKind::SLASH
1246                | crate::SyntaxKind::L_PAREN
1247        )
1248    });
1249    if !has_op {
1250        return None;
1251    }
1252    let mut cursor = 0usize;
1253    let value = parse_arith_expr(&tokens, &mut cursor)?;
1254    if cursor != tokens.len() {
1255        return None;
1256    }
1257    Some(value)
1258}
1259
1260/// Collect AMOUNT's expression tokens - every non-trivia direct-
1261/// child token EXCEPT the trailing `CURRENCY` at paren-depth 0
1262/// (which is the amount's currency, not part of the expression).
1263/// Parens at any depth are preserved so `parse_arith_primary` can
1264/// recurse through them.
1265fn amount_expression_tokens(amt: &ast::Amount) -> Vec<crate::SyntaxToken> {
1266    let raw: Vec<crate::SyntaxToken> = amt
1267        .syntax()
1268        .children_with_tokens()
1269        .filter_map(rowan::NodeOrToken::into_token)
1270        .filter(|t| !is_trivia_kind(t.kind()))
1271        .collect();
1272    // Find the index of the LAST `CURRENCY` at depth 0 - same
1273    // disambiguator as `Amount::currency()`. Tokens before that
1274    // index form the arithmetic expression.
1275    let mut depth: i32 = 0;
1276    let mut trailing_currency_idx: Option<usize> = None;
1277    for (i, t) in raw.iter().enumerate() {
1278        match t.kind() {
1279            crate::SyntaxKind::L_PAREN => depth += 1,
1280            crate::SyntaxKind::R_PAREN => depth -= 1,
1281            crate::SyntaxKind::CURRENCY if depth == 0 => trailing_currency_idx = Some(i),
1282            _ => {}
1283        }
1284    }
1285    let end = trailing_currency_idx.unwrap_or(raw.len());
1286    raw.into_iter().take(end).collect()
1287}
1288
1289/// `expr := term (('+' | '-') term)*` - left-associative.
1290fn parse_arith_expr(tokens: &[crate::SyntaxToken], cursor: &mut usize) -> Option<Decimal> {
1291    let mut result = parse_arith_term(tokens, cursor)?;
1292    while let Some(op) = tokens.get(*cursor).map(crate::SyntaxToken::kind) {
1293        match op {
1294            crate::SyntaxKind::PLUS => {
1295                *cursor += 1;
1296                let rhs = parse_arith_term(tokens, cursor)?;
1297                result = result.checked_add(rhs)?;
1298            }
1299            crate::SyntaxKind::MINUS => {
1300                *cursor += 1;
1301                let rhs = parse_arith_term(tokens, cursor)?;
1302                result = result.checked_sub(rhs)?;
1303            }
1304            _ => break,
1305        }
1306    }
1307    Some(result)
1308}
1309
1310/// `term := primary (('*' | '/') primary)*` - left-associative.
1311fn parse_arith_term(tokens: &[crate::SyntaxToken], cursor: &mut usize) -> Option<Decimal> {
1312    let mut result = parse_arith_primary(tokens, cursor)?;
1313    while let Some(op) = tokens.get(*cursor).map(crate::SyntaxToken::kind) {
1314        match op {
1315            crate::SyntaxKind::STAR => {
1316                *cursor += 1;
1317                let rhs = parse_arith_primary(tokens, cursor)?;
1318                result = result.checked_mul(rhs)?;
1319            }
1320            crate::SyntaxKind::SLASH => {
1321                *cursor += 1;
1322                let rhs = parse_arith_primary(tokens, cursor)?;
1323                if rhs.is_zero() {
1324                    return None;
1325                }
1326                result = result.checked_div(rhs)?;
1327            }
1328            _ => break,
1329        }
1330    }
1331    Some(result)
1332}
1333
1334/// `primary := '(' expr ')' | '-' primary | '+' primary | NUMBER`.
1335fn parse_arith_primary(tokens: &[crate::SyntaxToken], cursor: &mut usize) -> Option<Decimal> {
1336    let t = tokens.get(*cursor)?;
1337    match t.kind() {
1338        crate::SyntaxKind::L_PAREN => {
1339            *cursor += 1;
1340            let inner = parse_arith_expr(tokens, cursor)?;
1341            // Mandatory closer; bail (returning None) on unbalance
1342            // - `Amount::currency()` already refuses to surface a
1343            // currency for unbalanced parens, so the amount as a
1344            // whole degrades cleanly to `NumberOnly`/`None`.
1345            let close = tokens.get(*cursor)?;
1346            if close.kind() != crate::SyntaxKind::R_PAREN {
1347                return None;
1348            }
1349            *cursor += 1;
1350            Some(inner)
1351        }
1352        crate::SyntaxKind::MINUS => {
1353            *cursor += 1;
1354            let inner = parse_arith_primary(tokens, cursor)?;
1355            Some(-inner)
1356        }
1357        crate::SyntaxKind::PLUS => {
1358            *cursor += 1;
1359            parse_arith_primary(tokens, cursor)
1360        }
1361        crate::SyntaxKind::NUMBER => {
1362            let value = parse_decimal_token(t.text())?;
1363            *cursor += 1;
1364            Some(value)
1365        }
1366        _ => None,
1367    }
1368}
1369
1370fn convert_cost_spec(cs: &ast::CostSpec) -> CostSpec {
1371    let merge = cs.is_merge();
1372    let is_total = cs.is_total();
1373
1374    // `{N # T CCY}` form: the value AFTER the `#` is the total
1375    // (per-unit `N` is informationally redundant and the booker
1376    // derives it from `T / |units|`). Pin this here so the form
1377    // is semantically equivalent to `{{T CCY}}` (matching Python
1378    // beancount). Without this, the FIRST `NUMBER` would be
1379    // surfaced as `PerUnit{N}` and the post-`#` total would be
1380    // silently dropped - inverting the post-booking value of
1381    // every cost-basis read of this spec form.
1382    let post_hash_total = cost_total_after_hash(cs);
1383
1384    let cost_number = if let Some(total) = post_hash_total {
1385        Some(CostNumber::Total { value: total })
1386    } else {
1387        let number = cs.number().and_then(|n| parse_decimal_token(n.text()));
1388        match (number, is_total) {
1389            (Some(v), true) => Some(CostNumber::Total { value: v }),
1390            (Some(v), false) => Some(CostNumber::PerUnit { value: v }),
1391            (None, _) => None,
1392        }
1393    };
1394
1395    let currency = cs.currency().map(|c| Currency::new(c.text()));
1396    let date = cs.date().and_then(|d| parse_date_token(d.text()));
1397    let label = cs.label().and_then(|s| s.text_unquoted().map(String::from));
1398
1399    CostSpec {
1400        number: cost_number,
1401        currency,
1402        date,
1403        label,
1404        merge,
1405    }
1406}
1407
1408/// Detect the `{N # T CCY}` cost-spec shape (a `HASH` token
1409/// between two `NUMBER` tokens at the cost-spec's depth) and
1410/// return `T` as a `Decimal`. Returns `None` for every other
1411/// shape - `{N CCY}`, `{{T CCY}}`, `{#}`, etc.
1412fn cost_total_after_hash(cs: &ast::CostSpec) -> Option<Decimal> {
1413    let mut seen_number = false;
1414    let mut past_hash = false;
1415    for el in cs.syntax().children_with_tokens() {
1416        let rowan::NodeOrToken::Token(t) = el else {
1417            continue;
1418        };
1419        match t.kind() {
1420            crate::SyntaxKind::NUMBER if !seen_number => {
1421                seen_number = true;
1422            }
1423            crate::SyntaxKind::HASH if seen_number => {
1424                past_hash = true;
1425            }
1426            crate::SyntaxKind::NUMBER if past_hash => {
1427                return parse_decimal_token(t.text());
1428            }
1429            _ => {}
1430        }
1431    }
1432    None
1433}
1434
1435fn convert_price_annotation(
1436    pa: &ast::PriceAnnotation,
1437    errors: &mut Vec<crate::ParseError>,
1438    bom_offset: u32,
1439) -> PriceAnnotation {
1440    let kind = if pa.is_total() {
1441        PriceKind::Total
1442    } else {
1443        PriceKind::Unit
1444    };
1445    let amount = pa
1446        .amount()
1447        .and_then(|a| convert_amount_to_incomplete(&a, errors, bom_offset));
1448    PriceAnnotation { kind, amount }
1449}
1450
1451// ---- Metadata extraction ---------------------------------------
1452
1453/// Extract the [`Metadata`] map from the directive node's
1454/// `META_ENTRY` sub-line children. Matches the legacy parser's
1455/// behavior: each entry's key (with trailing `:` stripped) maps
1456/// to a typed [`MetaValue`] derived from the value tokens.
1457fn convert_meta_entries(node: &crate::SyntaxNode) -> Metadata {
1458    let mut meta = Metadata::default();
1459    for entry in node.children().filter_map(MetaEntry::cast) {
1460        let Some(key_token) = entry.key() else {
1461            continue;
1462        };
1463        let key = key_token.text_without_colon().to_string();
1464        let value = meta_value_from_entry(&entry);
1465        meta.insert(key, value);
1466    }
1467    meta
1468}
1469
1470/// Returns true if a node's flat direct-child tokens contain a
1471/// `MINUS` BEFORE the first `NUMBER`. Used to detect signed
1472/// numeric values in directives like Balance / Price whose typed-
1473/// AST accessors return the unsigned NUMBER token only.
1474fn node_has_minus_before_number(node: &crate::SyntaxNode) -> bool {
1475    for el in node.children_with_tokens() {
1476        let rowan::NodeOrToken::Token(t) = el else {
1477            continue;
1478        };
1479        match t.kind() {
1480            crate::SyntaxKind::MINUS => return true,
1481            crate::SyntaxKind::NUMBER => return false,
1482            _ => {}
1483        }
1484    }
1485    false
1486}
1487
1488/// Returns true if a `META_ENTRY`'s value tokens contain a `MINUS`
1489/// before the first `NUMBER`. Used by `meta_value_from_entry` to
1490/// detect signed-number values like `precision: -1` which the
1491/// legacy parser handles via `parse_signed_number`.
1492fn meta_entry_has_minus_sign(entry: &MetaEntry) -> bool {
1493    let mut past_key = false;
1494    for el in entry.syntax().children_with_tokens() {
1495        let rowan::NodeOrToken::Token(t) = el else {
1496            continue;
1497        };
1498        if !past_key {
1499            if t.kind() == crate::SyntaxKind::META_KEY {
1500                past_key = true;
1501            }
1502            continue;
1503        }
1504        match t.kind() {
1505            crate::SyntaxKind::MINUS => return true,
1506            crate::SyntaxKind::NUMBER => return false,
1507            _ => {}
1508        }
1509    }
1510    false
1511}
1512
1513/// Discriminate the value tokens under a `META_ENTRY` into a
1514/// typed [`MetaValue`]. Matches the legacy parser's preference
1515/// order: string > number > date > account > currency > tag >
1516/// link > bool > none.
1517fn meta_value_from_entry(entry: &MetaEntry) -> MetaValue {
1518    if let Some(s) = entry.value_string()
1519        && let Some(text) = s.text_unquoted()
1520    {
1521        return MetaValue::String(text.to_string());
1522    }
1523    if let Some(n) = entry.value_number()
1524        && let Some(mut decimal) = parse_decimal_token(n.text())
1525    {
1526        // A MINUS direct-child token (signed value) negates the
1527        // number. Legacy parses `precision: -1` as Number(-1);
1528        // we need the same.
1529        if meta_entry_has_minus_sign(entry) {
1530            decimal = -decimal;
1531        }
1532        // `0.50 USD` style: NUMBER + CURRENCY together → Amount.
1533        // Plain NUMBER without CURRENCY → Number. Matches legacy
1534        // parser priority where parse_amount runs before
1535        // parse_signed_number.
1536        if let Some(c) = entry.value_currency() {
1537            return MetaValue::Amount(Amount::new(decimal, Currency::new(c.text())));
1538        }
1539        return MetaValue::Number(decimal);
1540    }
1541    if let Some(d) = entry.value_date()
1542        && let Some(date) = parse_date_token(d.text())
1543    {
1544        return MetaValue::Date(date);
1545    }
1546    if let Some(a) = entry.value_account() {
1547        return MetaValue::Account(Account::new(a.text()));
1548    }
1549    if let Some(c) = entry.value_currency() {
1550        return MetaValue::Currency(Currency::new(c.text()));
1551    }
1552    if let Some(b) = entry.value_bool() {
1553        return MetaValue::Bool(b);
1554    }
1555    // Tags and Links inside meta entries: walk raw tokens. The
1556    // typed-AST surface doesn't (yet) expose dedicated accessors,
1557    // so we scan direct token children.
1558    for tok in entry.syntax().children_with_tokens() {
1559        let rowan::NodeOrToken::Token(t) = tok else {
1560            continue;
1561        };
1562        match t.kind() {
1563            crate::SyntaxKind::TAG => {
1564                let stripped = t.text().trim_start_matches('#');
1565                return MetaValue::Tag(Tag::new(stripped));
1566            }
1567            crate::SyntaxKind::LINK => {
1568                let stripped = t.text().trim_start_matches('^');
1569                return MetaValue::Link(Link::new(stripped));
1570            }
1571            _ => {}
1572        }
1573    }
1574    MetaValue::None
1575}
1576
1577// ---- Inherited state (pushtag/poptag/pushmeta/popmeta) ---------
1578
1579/// Merge active pushed-tag and pushed-meta state into a freshly
1580/// converted directive's value. Mirrors the legacy parser's
1581/// `apply_pushed_tags` + `apply_pushed_meta`: tags apply ONLY to
1582/// `Transaction`; meta applies to every directive's `meta` field.
1583///
1584/// The meta stack is a `Vec` (not a map) to preserve shadow/pop
1585/// semantics - `pushmeta x: 1; pushmeta x: 2; popmeta x` should
1586/// leave `x = 1` active, which a map-replacing-on-insert can't
1587/// express. Iterating in push order and inserting into the
1588/// directive's meta means later entries naturally win, matching
1589/// "topmost-shadow wins" behavior.
1590fn apply_inherited_state(
1591    value: &mut Directive,
1592    tag_stack: &[(Tag, Span)],
1593    meta_stack: &[(String, MetaValue, Span)],
1594) {
1595    if let Directive::Transaction(txn) = value {
1596        for (tag, _) in tag_stack {
1597            if !txn.tags.contains(tag) {
1598                txn.tags.push(tag.clone());
1599            }
1600        }
1601    }
1602    if meta_stack.is_empty() {
1603        return;
1604    }
1605    let meta = match value {
1606        Directive::Transaction(d) => &mut d.meta,
1607        Directive::Balance(d) => &mut d.meta,
1608        Directive::Open(d) => &mut d.meta,
1609        Directive::Close(d) => &mut d.meta,
1610        Directive::Commodity(d) => &mut d.meta,
1611        Directive::Pad(d) => &mut d.meta,
1612        Directive::Event(d) => &mut d.meta,
1613        Directive::Query(d) => &mut d.meta,
1614        Directive::Note(d) => &mut d.meta,
1615        Directive::Document(d) => &mut d.meta,
1616        Directive::Price(d) => &mut d.meta,
1617        Directive::Custom(d) => &mut d.meta,
1618    };
1619    for (k, v, _) in meta_stack {
1620        meta.insert(k.clone(), v.clone());
1621    }
1622}
1623
1624/// Extract the value tokens after the `META_KEY` of a Pushmeta
1625/// directive into a typed [`MetaValue`]. Walks the directive's
1626/// direct-child tokens (the directive isn't a `META_ENTRY` so the
1627/// typed-AST accessors aren't reusable).
1628fn pushmeta_value(node: &crate::SyntaxNode) -> MetaValue {
1629    for el in node.children_with_tokens() {
1630        let rowan::NodeOrToken::Token(t) = el else {
1631            continue;
1632        };
1633        match t.kind() {
1634            crate::SyntaxKind::STRING => {
1635                if let Some(s) = strip_string_quotes(t.text()) {
1636                    return MetaValue::String(s.to_string());
1637                }
1638            }
1639            crate::SyntaxKind::NUMBER => {
1640                if let Some(n) = parse_decimal_token(t.text()) {
1641                    return MetaValue::Number(n);
1642                }
1643            }
1644            crate::SyntaxKind::DATE => {
1645                if let Some(d) = parse_date_token(t.text()) {
1646                    return MetaValue::Date(d);
1647                }
1648            }
1649            crate::SyntaxKind::ACCOUNT => return MetaValue::Account(Account::new(t.text())),
1650            crate::SyntaxKind::CURRENCY => return MetaValue::Currency(Currency::new(t.text())),
1651            crate::SyntaxKind::BOOL_TRUE => return MetaValue::Bool(true),
1652            crate::SyntaxKind::BOOL_FALSE => return MetaValue::Bool(false),
1653            crate::SyntaxKind::TAG => {
1654                return MetaValue::Tag(Tag::new(t.text().trim_start_matches('#')));
1655            }
1656            crate::SyntaxKind::LINK => {
1657                return MetaValue::Link(Link::new(t.text().trim_start_matches('^')));
1658            }
1659            _ => {}
1660        }
1661    }
1662    MetaValue::None
1663}
1664
1665// ---- ParseResult.comments --------------------------------------
1666
1667/// Comment-like syntax kinds that the legacy parser surfaces as
1668/// `ParseResult.comments` entries when they appear at the top
1669/// level (outside any directive's content).
1670const fn is_comment_kind(kind: crate::SyntaxKind) -> bool {
1671    matches!(
1672        kind,
1673        crate::SyntaxKind::COMMENT
1674            | crate::SyntaxKind::PERCENT_COMMENT
1675            | crate::SyntaxKind::SHEBANG
1676            | crate::SyntaxKind::EMACS_DIRECTIVE
1677    )
1678}
1679
1680/// Output of the fused top-level pass [`walk_top_level_once`].
1681struct TopLevelWalkResult {
1682    errors: Vec<crate::ParseError>,
1683    section_marker_comments: Vec<Spanned<String>>,
1684}
1685
1686/// Single walk over `source_file`'s direct children that runs
1687/// every per-directive diagnostic in one pass, replacing five
1688/// separate `source_file.syntax().children()` traversals
1689/// (`extract_error_node_errors`, `extract_transaction_body_errors`,
1690/// `extract_indented_directive_errors`, `extract_custom_value_errors`,
1691/// `extract_section_marker_comments`). Each former pass re-walked
1692/// the top-level child list and materialized a fresh red node per
1693/// directive; on a large ledger that is 5·O(N) red-node churn for
1694/// work that is naturally per-child. The checks are independent and
1695/// all diagnostics are span-sorted by the caller, so fusing them is
1696/// order-preserving.
1697fn walk_top_level_once(
1698    source_file: &SourceFile,
1699    stripped: &str,
1700    bom_offset: u32,
1701) -> TopLevelWalkResult {
1702    let mut errors: Vec<crate::ParseError> = Vec::new();
1703    let mut section_marker_comments: Vec<Spanned<String>> = Vec::new();
1704    for child in source_file.syntax().children() {
1705        let kind = child.kind();
1706        // Applies to every recognized directive node (incl. CUSTOM).
1707        if ast::Directive::can_cast(kind) {
1708            indented_directive_check(&child, stripped, bom_offset, &mut errors);
1709        }
1710        match kind {
1711            crate::SyntaxKind::CUSTOM_DIRECTIVE => {
1712                custom_value_check(&child, bom_offset, &mut errors);
1713            }
1714            crate::SyntaxKind::TRANSACTION => {
1715                transaction_body_check(&child, bom_offset, &mut errors);
1716            }
1717            crate::SyntaxKind::ERROR_NODE => {
1718                error_node_check(&child, stripped, bom_offset, &mut errors);
1719                section_marker_check(&child, bom_offset, &mut section_marker_comments);
1720            }
1721            _ => {}
1722        }
1723    }
1724    TopLevelWalkResult {
1725        errors,
1726        section_marker_comments,
1727    }
1728}
1729
1730/// Walk every `COST_SPEC` node in the tree and emit a
1731/// `SyntaxError("unclosed cost specification: missing '}'")` for
1732/// any spec whose opener (`{`, `{{`, or `{#`) doesn't have a
1733/// matching closer at the spec's depth-0. Mirrors the legacy
1734/// parser's deferred-error emission at `parser.rs:705-707` so a
1735/// `10 AAPL {150 USD\n` posting or an EOF-truncated cost block
1736/// surfaces a diagnostic instead of silently producing a half-
1737/// built cost spec.
1738fn extract_unclosed_cost_brace_errors(
1739    source_file: &SourceFile,
1740    bom_offset: u32,
1741) -> Vec<crate::ParseError> {
1742    let mut out = Vec::new();
1743    for cs in source_file.syntax().descendants() {
1744        if cs.kind() != crate::SyntaxKind::COST_SPEC {
1745            continue;
1746        }
1747        let mut has_opener = false;
1748        let mut has_closer = false;
1749        for el in cs.children_with_tokens() {
1750            let rowan::NodeOrToken::Token(t) = el else {
1751                continue;
1752            };
1753            match t.kind() {
1754                crate::SyntaxKind::L_BRACE
1755                | crate::SyntaxKind::L_DOUBLE_BRACE
1756                | crate::SyntaxKind::L_BRACE_HASH => has_opener = true,
1757                crate::SyntaxKind::R_BRACE | crate::SyntaxKind::R_DOUBLE_BRACE => has_closer = true,
1758                _ => {}
1759            }
1760        }
1761        if has_opener && !has_closer {
1762            out.push(crate::ParseError::new(
1763                crate::ParseErrorKind::SyntaxError(
1764                    "unclosed cost specification: missing '}'".to_string(),
1765                ),
1766                node_span(&cs, bom_offset),
1767            ));
1768        }
1769    }
1770    out
1771}
1772
1773/// Walk every top-level directive in `source_file` and emit a
1774/// `SyntaxError("top-level directive must start at column 0")`
1775/// for any whose content (first non-trivia token) starts at a
1776/// non-zero column. Per the Beancount language spec, top-level
1777/// directives are required to begin at column 0; indentation is
1778/// reserved for postings and metadata inside a transaction body.
1779///
1780/// The CST grammar happily accepts an indented `open` / `balance`
1781/// / etc., which is why this surfaces at converter level instead
1782/// of as a lex/parse error.
1783fn indented_directive_check(
1784    child: &crate::SyntaxNode,
1785    stripped: &str,
1786    bom_offset: u32,
1787    out: &mut Vec<crate::ParseError>,
1788) {
1789    // Caller dispatches: `child` is a recognized directive node.
1790    // Find the directive's content start - the first non-
1791    // trivia token. Leading WHITESPACE / NEWLINE / COMMENT
1792    // can land inside the directive node per the Directive-
1793    // Terminator Rule's inter-directive trivia attachment.
1794    let Some(content) = child
1795        .children_with_tokens()
1796        .filter_map(rowan::NodeOrToken::into_token)
1797        .find(|t| !is_trivia_kind(t.kind()))
1798    else {
1799        return;
1800    };
1801    let content_start: usize = u32::from(content.text_range().start()) as usize;
1802    // Column = offset since the last NEWLINE in the source,
1803    // or since byte 0 if this is the first line. >0 means
1804    // the directive's first content token has leading WS on
1805    // its own line - that's the indent error.
1806    // Find the line start by scanning the BYTES before `content_start`, not by
1807    // slicing the `str`. On malformed/error-recovered input a token's start
1808    // offset can land inside a multi-byte UTF-8 char, and
1809    // `stripped[..content_start]` would then panic ("not a char boundary").
1810    // Byte slicing is boundary-agnostic, and a newline (`\n`) is always a single
1811    // ASCII byte, so the found position is a valid offset. `.get(..)` also guards
1812    // a (theoretical) out-of-bounds offset. Regression: fuzz_regressions.rs.
1813    let line_start = stripped
1814        .as_bytes()
1815        .get(..content_start)
1816        .and_then(|bytes| bytes.iter().rposition(|&b| b == b'\n'))
1817        .map_or(0, |nl| nl + 1);
1818    if content_start > line_start {
1819        let end: u32 = content.text_range().end().into();
1820        let span = Span::new(
1821            (line_start as u32 + bom_offset) as usize,
1822            (end + bom_offset) as usize,
1823        );
1824        out.push(crate::ParseError::new(
1825            crate::ParseErrorKind::SyntaxError(
1826                "top-level directive must start at column 0".to_string(),
1827            ),
1828            span,
1829        ));
1830    }
1831}
1832
1833/// Walk each `CUSTOM` directive and emit a `SyntaxError` for
1834/// every bare `CURRENCY` token in the value position (a CURRENCY
1835/// not paired with a preceding NUMBER as an Amount).
1836///
1837/// Per the Beancount language spec, custom-directive values are
1838/// limited to string / date / decimal / amount / boolean -
1839/// `bean-check` rejects a bare currency literal with a syntax
1840/// error. Rustledger's `extract_custom_values` has historically
1841/// been more lenient, accepting ACCOUNT / TAG / LINK in value
1842/// position too; we keep that extension (it's covered by the
1843/// existing `test_parse_custom_directive` integration test) but
1844/// surface a diagnostic for the bare-CURRENCY case so the
1845/// compat metric reflects bean-check's exit-code rejection on
1846/// shapes like `custom "x" 10 USD "y" NZD …`.
1847fn custom_value_check(
1848    child: &crate::SyntaxNode,
1849    bom_offset: u32,
1850    out: &mut Vec<crate::ParseError>,
1851) {
1852    // Caller dispatches: `child` is a CUSTOM_DIRECTIVE.
1853    {
1854        // Collect non-trivia tokens, then skip past the
1855        // directive's header: DATE, CUSTOM_KW, and the first
1856        // STRING (the custom-type name). Everything after that
1857        // is values.
1858        let raw: Vec<crate::SyntaxToken> = child
1859            .children_with_tokens()
1860            .filter_map(rowan::NodeOrToken::into_token)
1861            .filter(|t| !is_trivia_kind(t.kind()))
1862            .collect();
1863        let mut seen_type_string = false;
1864        let mut i = 0;
1865        while i < raw.len() {
1866            let t = &raw[i];
1867            if !seen_type_string {
1868                if t.kind() == crate::SyntaxKind::STRING {
1869                    seen_type_string = true;
1870                }
1871                i += 1;
1872                continue;
1873            }
1874            if t.kind() == crate::SyntaxKind::CURRENCY {
1875                // Only flag BARE CURRENCY - one that doesn't
1876                // follow a NUMBER (Amount-pairing). The Amount
1877                // pairing is handled by `extract_custom_values`
1878                // via i+1 lookahead, so a CURRENCY that's NOT
1879                // preceded by a NUMBER at i-1 is bare.
1880                let preceded_by_number = i > 0 && raw[i - 1].kind() == crate::SyntaxKind::NUMBER;
1881                if !preceded_by_number {
1882                    let range = t.text_range();
1883                    let start: u32 = range.start().into();
1884                    let end: u32 = range.end().into();
1885                    let span =
1886                        Span::new((start + bom_offset) as usize, (end + bom_offset) as usize);
1887                    out.push(crate::ParseError::new(
1888                        crate::ParseErrorKind::SyntaxError(
1889                            "bare currency literal is not a valid custom directive value"
1890                                .to_string(),
1891                        ),
1892                        span,
1893                    ));
1894                }
1895            }
1896            i += 1;
1897        }
1898    }
1899}
1900
1901/// Walk a `TRANSACTION` body and emit a `SyntaxError` for any body
1902/// line that contains flat catch-all tokens (e.g., an
1903/// unrecognized identifier where a posting was expected).
1904/// Matches the legacy parser, which fails its inner posting
1905/// parser on such lines and recovers by skipping to the next
1906/// NEWLINE while emitting a `SyntaxError`.
1907fn transaction_body_check(
1908    child: &crate::SyntaxNode,
1909    bom_offset: u32,
1910    out: &mut Vec<crate::ParseError>,
1911) {
1912    // Caller dispatches: `child` is a TRANSACTION.
1913    {
1914        // Skip past the header NEWLINE, then look for catch-all
1915        // tokens (non-trivia, non-comment) appearing on lines
1916        // OUTSIDE POSTING / META_ENTRY child nodes.
1917        // Track whether we've SEEN at least one non-trivia
1918        // header token (DATE / flag / STRING / etc.); only AFTER
1919        // that does the next NEWLINE count as the header
1920        // terminator. Otherwise leading-trivia NEWLINEs from the
1921        // Directive-Terminator Rule would falsely trip
1922        // past_header on the very first iteration.
1923        let mut past_header = false;
1924        let mut saw_header_content = false;
1925        let mut line_start: Option<u32> = None;
1926        let mut line_has_content = false;
1927        for el in child.children_with_tokens() {
1928            match el {
1929                rowan::NodeOrToken::Token(t) => {
1930                    if !past_header {
1931                        if t.kind() == crate::SyntaxKind::NEWLINE {
1932                            if saw_header_content {
1933                                past_header = true;
1934                            }
1935                        } else if !is_trivia_kind(t.kind()) {
1936                            saw_header_content = true;
1937                        }
1938                        continue;
1939                    }
1940                    let range = t.text_range();
1941                    let start: u32 = range.start().into();
1942                    let end: u32 = range.end().into();
1943                    if line_start.is_none() {
1944                        line_start = Some(start);
1945                    }
1946                    if t.kind() == crate::SyntaxKind::NEWLINE {
1947                        if line_has_content && let Some(ls) = line_start {
1948                            // Skip leading WHITESPACE in the span.
1949                            let span =
1950                                Span::new((ls + bom_offset) as usize, (end + bom_offset) as usize);
1951                            // Find first non-whitespace position
1952                            // for a tighter span matching legacy.
1953                            out.push(crate::ParseError::new(
1954                                crate::ParseErrorKind::SyntaxError("unexpected input".to_string()),
1955                                span,
1956                            ));
1957                        }
1958                        line_start = None;
1959                        line_has_content = false;
1960                    } else if !is_trivia_kind(t.kind())
1961                        && !is_comment_kind(t.kind())
1962                        && !matches!(t.kind(), crate::SyntaxKind::TAG | crate::SyntaxKind::LINK)
1963                    {
1964                        // TAG / LINK on body lines is valid
1965                        // Beancount syntax (tags/links after the
1966                        // first line continue the transaction's
1967                        // tag/link list). Don't flag as
1968                        // unexpected-input.
1969                        line_has_content = true;
1970                    }
1971                }
1972                rowan::NodeOrToken::Node(_) => {
1973                    // POSTING / META_ENTRY: not catch-all. Reset.
1974                    line_start = None;
1975                    line_has_content = false;
1976                    if !past_header {
1977                        past_header = true;
1978                    }
1979                }
1980            }
1981        }
1982    }
1983}
1984
1985/// Walk an `ERROR_NODE` and emit a
1986/// `ParseError` for each line that is NEITHER a section marker
1987/// (`*`-starting) NOR a column-0 comment. The variant emitted
1988/// mirrors the legacy parser's error-recovery classifier
1989/// (`parser.rs:2186-2249`): BOM-in-line → `BomInDirectiveBody`
1990/// (with `BOM_REMOVAL_HINT`); Unicode-character account →
1991/// `InvalidAccount`; otherwise → `SyntaxError("unexpected
1992/// input")`. `stripped` is the post-BOM-strip source so token
1993/// `text_range` indices into it correctly.
1994fn error_node_check(
1995    child: &crate::SyntaxNode,
1996    stripped: &str,
1997    bom_offset: u32,
1998    out: &mut Vec<crate::ParseError>,
1999) {
2000    // Caller dispatches: `child` is an ERROR_NODE.
2001    {
2002        let mut line_start: Option<u32> = None;
2003        let mut first_non_trivia: Option<crate::SyntaxKind> = None;
2004        for el in child.children_with_tokens() {
2005            let rowan::NodeOrToken::Token(t) = el else {
2006                continue;
2007            };
2008            let range = t.text_range();
2009            let start: u32 = range.start().into();
2010            let end: u32 = range.end().into();
2011            if line_start.is_none() {
2012                line_start = Some(start);
2013            }
2014            if t.kind() == crate::SyntaxKind::NEWLINE {
2015                // Decide the line's classification.
2016                let is_section = matches!(first_non_trivia, Some(crate::SyntaxKind::STAR));
2017                let is_comment = matches!(first_non_trivia, Some(k) if is_comment_kind(k));
2018                if !is_section
2019                    && !is_comment
2020                    && first_non_trivia.is_some()
2021                    && let Some(ls) = line_start
2022                {
2023                    // Legacy span INCLUDES the terminator NEWLINE
2024                    // (skip_to_newline consumes it before
2025                    // span_from is called).
2026                    let span = Span::new((ls + bom_offset) as usize, (end + bom_offset) as usize);
2027                    let line_text = stripped.get(ls as usize..end as usize).unwrap_or("");
2028                    let primary = classify_recovery_error(line_text, span);
2029                    let primary_is_bom =
2030                        matches!(primary.kind, crate::ParseErrorKind::BomInDirectiveBody);
2031                    out.push(primary);
2032                    // Additive secondary `BomInDirectiveBody` when
2033                    // a different primary diagnostic (Unicode
2034                    // account / generic syntax) already fired AND
2035                    // the line ALSO contains a BOM byte. Matches
2036                    // legacy `parser.rs:2258-2263`: without this,
2037                    // a Windows-exported line with both problems
2038                    // surfaces only the actionable root cause and
2039                    // the user has no clue the invisible BOM byte
2040                    // is also corrupting the line.
2041                    if !primary_is_bom && line_text.contains(crate::bom::BOM_CHAR) {
2042                        out.push(
2043                            crate::ParseError::new(crate::ParseErrorKind::BomInDirectiveBody, span)
2044                                .with_hint(crate::diagnostics::BOM_REMOVAL_HINT),
2045                        );
2046                    }
2047                }
2048                line_start = None;
2049                first_non_trivia = None;
2050                continue;
2051            }
2052            if first_non_trivia.is_none() && !is_trivia_kind(t.kind()) {
2053                first_non_trivia = Some(t.kind());
2054            }
2055        }
2056    }
2057}
2058
2059/// Pick the most specific `ParseError` variant for an
2060/// error-recovery line, mirroring the legacy parser's classifier
2061/// at `parser.rs:2186-2249`:
2062/// 1. A Unicode-character account (`Assets:Café:…`) → primary
2063///    `InvalidAccount` - it's the actionable root cause.
2064/// 2. A mid-file BOM byte (`U+FEFF`) → `BomInDirectiveBody` with
2065///    `BOM_REMOVAL_HINT` so miette surfaces the remediation step.
2066/// 3. Anything else → `SyntaxError("unexpected input")`.
2067///
2068/// Order matters: a Windows-exported file with a Unicode account
2069/// AND an internal BOM gets the Unicode-account diagnostic
2070/// (the BOM is usually a side effect, not the root cause).
2071fn classify_recovery_error(line_text: &str, span: Span) -> crate::ParseError {
2072    if let Some(account) = crate::diagnostics::find_unicode_account(line_text) {
2073        return crate::ParseError::new(
2074            crate::ParseErrorKind::InvalidAccount(account.to_string()),
2075            span,
2076        );
2077    }
2078    if line_text.contains(crate::bom::BOM_CHAR) {
2079        return crate::ParseError::new(crate::ParseErrorKind::BomInDirectiveBody, span)
2080            .with_hint(crate::diagnostics::BOM_REMOVAL_HINT);
2081    }
2082    crate::ParseError::new(
2083        crate::ParseErrorKind::SyntaxError("unexpected input".to_string()),
2084        span,
2085    )
2086}
2087
2088/// Walk every descendant token and emit a `ParseError` for each
2089/// `ERROR_TOKEN` (or BOM-containing token) that lands inside an
2090/// otherwise-valid directive node - i.e., NOT inside an
2091/// `ERROR_NODE` ancestor. Catches lexer-reject bytes the
2092/// outer recovery path misses:
2093/// - `.` in `.50 USD` (leading-decimal in posting amount) →
2094///   `SyntaxError`.
2095/// - Mid-file U+FEFF byte inside a recognized directive (e.g.,
2096///   `open Assets:Bank \u{FEFF}USD`) → `BomInDirectiveBody` with
2097///   `BOM_REMOVAL_HINT`.
2098///
2099/// The leading `SyntaxKind::BOM` token is skipped (the
2100/// legitimate strict-byte-0 BOM is already tracked by
2101/// `has_leading_bom`). `ERROR_NODE` descendants are skipped -
2102/// `extract_error_node_errors` / `classify_recovery_error`
2103/// already cover those.
2104/// Result of the fused descendants-walk visitor that powers
2105/// `walk_descendants_once`.
2106struct DescendantsWalkResult {
2107    inline_errors: Vec<crate::ParseError>,
2108    top_level_comments: Vec<Spanned<String>>,
2109    currency_occurrences: Vec<Spanned<Currency>>,
2110    account_occurrences: Vec<Spanned<rustledger_core::Account>>,
2111}
2112
2113/// Fused single-pass visitor over `source_file`'s descendants -
2114/// replaces three separate walks (`extract_inline_token_errors`,
2115/// `extract_top_level_comments`, `extract_currency_occurrences`)
2116/// with one traversal. Each walk had its own per-token cost; the
2117/// LSP runs them on every keystroke, so collapsing 3·O(N) → 1·O(N)
2118/// matters at editor-edge latencies. The state of each former
2119/// walk is maintained inline below.
2120fn walk_descendants_once(source_file: &SourceFile, bom_offset: u32) -> DescendantsWalkResult {
2121    let mut inline_errors: Vec<crate::ParseError> = Vec::new();
2122    let mut top_level_comments: Vec<Spanned<String>> = Vec::new();
2123    let mut currency_occurrences: Vec<Spanned<Currency>> = Vec::new();
2124    let mut account_occurrences: Vec<Spanned<rustledger_core::Account>> = Vec::new();
2125
2126    // `extract_top_level_comments` state: column-0 tracking.
2127    let mut preceded_by_ws = false;
2128
2129    for el in source_file.syntax().descendants_with_tokens() {
2130        let rowan::NodeOrToken::Token(t) = el else {
2131            // `extract_top_level_comments` used the Node arm to
2132            // reset preceded_by_ws when entering a recognized
2133            // directive. Keep that behavior - directive leading
2134            // trivia still gets column-0-classified correctly.
2135            if let rowan::NodeOrToken::Node(n) = el
2136                && ast::Directive::can_cast(n.kind())
2137            {
2138                preceded_by_ws = false;
2139            }
2140            continue;
2141        };
2142
2143        // ---- `extract_top_level_comments` state machine -------
2144        match t.kind() {
2145            crate::SyntaxKind::NEWLINE => preceded_by_ws = false,
2146            crate::SyntaxKind::WHITESPACE => preceded_by_ws = true,
2147            k if is_comment_kind(k) => {
2148                if !preceded_by_ws {
2149                    let range = t.text_range();
2150                    let start: u32 = range.start().into();
2151                    let end: u32 = range.end().into();
2152                    let span =
2153                        Span::new((start + bom_offset) as usize, (end + bom_offset) as usize);
2154                    top_level_comments.push(Spanned::new(t.text().to_string(), span));
2155                }
2156            }
2157            _ => {
2158                preceded_by_ws = false;
2159            }
2160        }
2161
2162        // ---- `extract_inline_token_errors` + currency walks ---
2163        if t.kind() == crate::SyntaxKind::BOM {
2164            continue;
2165        }
2166        // ERROR_NODE-ancestor check is only consulted for tokens
2167        // whose downstream emission depends on it (CURRENCY, BOM-
2168        // text-containing, ERROR_TOKEN). For well-formed source
2169        // most tokens fall into none of those buckets - gating
2170        // the per-token `parent_ancestors` walk on relevance
2171        // saves an O(depth) probe per WHITESPACE/NEWLINE/comment
2172        // token, which dominates token counts in real ledgers.
2173        let kind = t.kind();
2174        let has_bom = t.text().contains(crate::bom::BOM_CHAR);
2175        let is_error_token = kind == crate::SyntaxKind::ERROR_TOKEN;
2176        let needs_in_error_check = matches!(
2177            kind,
2178            crate::SyntaxKind::CURRENCY | crate::SyntaxKind::ACCOUNT
2179        ) || has_bom
2180            || is_error_token;
2181        if !needs_in_error_check {
2182            continue;
2183        }
2184        let in_error_node = t
2185            .parent_ancestors()
2186            .any(|a| a.kind() == crate::SyntaxKind::ERROR_NODE);
2187
2188        // CURRENCY occurrences: only outside ERROR_NODE.
2189        if kind == crate::SyntaxKind::CURRENCY && !in_error_node {
2190            let range = t.text_range();
2191            let start: u32 = range.start().into();
2192            let end: u32 = range.end().into();
2193            let span = Span::new((start + bom_offset) as usize, (end + bom_offset) as usize);
2194            currency_occurrences.push(Spanned::new(Currency::new(t.text()), span));
2195        }
2196
2197        // ACCOUNT occurrences: only outside ERROR_NODE. The same
2198        // rationale as CURRENCY applies - the lexer classifies an
2199        // `ACCOUNT` token by its character shape independent of
2200        // whether the surrounding directive parses cleanly, and
2201        // source-position-aware tooling (LSP rename / references /
2202        // document-highlight) wants the token as the user typed it
2203        // even during a mid-edit broken state.
2204        if kind == crate::SyntaxKind::ACCOUNT && !in_error_node {
2205            let range = t.text_range();
2206            let start: u32 = range.start().into();
2207            let end: u32 = range.end().into();
2208            let span = Span::new((start + bom_offset) as usize, (end + bom_offset) as usize);
2209            account_occurrences.push(Spanned::new(rustledger_core::Account::new(t.text()), span));
2210        }
2211
2212        // Inline errors: BOM byte in a recognized directive
2213        // (-> BomInDirectiveBody + hint) or ERROR_TOKEN inside a
2214        // recognized directive (-> SyntaxError). Both skip when
2215        // already inside an ERROR_NODE (handled by the recovery
2216        // classifier).
2217        if (!has_bom && !is_error_token) || in_error_node {
2218            continue;
2219        }
2220        let range = t.text_range();
2221        let start: u32 = range.start().into();
2222        let end: u32 = range.end().into();
2223        let span = Span::new((start + bom_offset) as usize, (end + bom_offset) as usize);
2224        if has_bom {
2225            inline_errors.push(
2226                crate::ParseError::new(crate::ParseErrorKind::BomInDirectiveBody, span)
2227                    .with_hint(crate::diagnostics::BOM_REMOVAL_HINT),
2228            );
2229        } else {
2230            inline_errors.push(crate::ParseError::new(
2231                crate::ParseErrorKind::SyntaxError("unexpected input".to_string()),
2232                span,
2233            ));
2234        }
2235    }
2236
2237    DescendantsWalkResult {
2238        inline_errors,
2239        top_level_comments,
2240        currency_occurrences,
2241        account_occurrences,
2242    }
2243}
2244
2245/// Emit empty-string comments for org-mode section-marker
2246/// lines (`* Heading`, `** Subheading`) inside `ERROR_NODE`
2247/// children. The legacy parser's `parse_entry` matches
2248/// `Token::Star` and emits `Comment(String::new(), line_span)`;
2249/// the structured CST wraps these lines in `ERROR_NODE`s so we
2250/// have to walk them and synthesize the same shape.
2251fn section_marker_check(
2252    child: &crate::SyntaxNode,
2253    bom_offset: u32,
2254    out: &mut Vec<Spanned<String>>,
2255) {
2256    // Caller dispatches: `child` is an ERROR_NODE.
2257    // Walk tokens line-by-line. A line starts at the start
2258    // of the first token after a NEWLINE (or at the node's
2259    // start) and ends at the next NEWLINE (inclusive).
2260    let mut line_start: Option<u32> = None;
2261    let mut first_non_trivia: Option<crate::SyntaxKind> = None;
2262    for el in child.children_with_tokens() {
2263        let rowan::NodeOrToken::Token(t) = el else {
2264            continue;
2265        };
2266        let range = t.text_range();
2267        let start: u32 = range.start().into();
2268        let end: u32 = range.end().into();
2269        if line_start.is_none() {
2270            line_start = Some(start);
2271        }
2272        if t.kind() == crate::SyntaxKind::NEWLINE {
2273            if first_non_trivia == Some(crate::SyntaxKind::STAR)
2274                && let Some(ls) = line_start
2275            {
2276                let span = Span::new((ls + bom_offset) as usize, (end + bom_offset) as usize);
2277                out.push(Spanned::new(String::new(), span));
2278            }
2279            line_start = None;
2280            first_non_trivia = None;
2281            continue;
2282        }
2283        if first_non_trivia.is_none() && !is_trivia_kind(t.kind()) {
2284            first_non_trivia = Some(t.kind());
2285        }
2286    }
2287}
2288
2289// `extract_top_level_comments` and `extract_currency_occurrences`
2290// are folded into `walk_descendants_once` above - see the
2291// comments there for the column-0 / ERROR_NODE-exclusion rules.
2292
2293// ---- Token parsing helpers -------------------------------------
2294
2295/// Parse a date token, accepting the same shapes as the legacy
2296/// parser: canonical `YYYY-MM-DD`, slash-separated `YYYY/M/D`,
2297/// and single-digit month/day. Returns `None` when the token
2298/// can't be turned into a real calendar date (invalid month,
2299/// invalid day for the given month, etc.).
2300fn parse_date_token(text: &str) -> Option<NaiveDate> {
2301    // Fast path: canonical "YYYY-MM-DD".
2302    if text.len() == 10
2303        && text.as_bytes()[4] == b'-'
2304        && text.as_bytes()[7] == b'-'
2305        && let (Ok(y), Ok(m), Ok(d)) = (
2306            text[0..4].parse::<i32>(),
2307            text[5..7].parse::<u32>(),
2308            text[8..10].parse::<u32>(),
2309        )
2310    {
2311        return naive_date(y, m, d);
2312    }
2313    // Slow path: share legacy's normalizer so single-digit
2314    // month/day (`2024-1-15`, `2024-01-5`) and slash separators
2315    // are accepted everywhere the legacy parser accepts them.
2316    crate::diagnostics::normalize_date_str(text)
2317        .parse::<NaiveDate>()
2318        .ok()
2319}
2320
2321/// Parse a directive's `DATE` token. On success returns the
2322/// `NaiveDate`; on a token whose calendar values don't form a
2323/// real date (`2024-13-01`, Feb 29 in a non-leap year) emits
2324/// `InvalidDateValue` with the legacy parser's human-readable
2325/// message and returns `None`. This mirrors
2326/// `parser.rs:181-182` so the CST and legacy parsers surface the
2327/// same diagnostics for malformed dates in directive position.
2328fn parse_directive_date(
2329    date_tok: &ast::Date,
2330    errors: &mut Vec<crate::ParseError>,
2331    bom_offset: u32,
2332) -> Option<NaiveDate> {
2333    let text = date_tok.text();
2334    if let Some(d) = parse_date_token(text) {
2335        return Some(d);
2336    }
2337    let range = date_tok.syntax().text_range();
2338    let start: u32 = range.start().into();
2339    let end: u32 = range.end().into();
2340    let span = Span::new((start + bom_offset) as usize, (end + bom_offset) as usize);
2341    errors.push(crate::ParseError::new(
2342        crate::ParseErrorKind::InvalidDateValue(crate::diagnostics::describe_invalid_date(text)),
2343        span,
2344    ));
2345    None
2346}
2347
2348/// Parse a numeric token. Tolerates leading sign and thousands-
2349/// separator commas (legacy parser drops them).
2350fn parse_decimal_token(text: &str) -> Option<Decimal> {
2351    use std::str::FromStr;
2352    let cleaned: String;
2353    let s = if text.contains(',') {
2354        cleaned = text.replace(',', "");
2355        cleaned.as_str()
2356    } else {
2357        text
2358    };
2359    Decimal::from_str(s).ok()
2360}
2361
2362// ---- Span helpers ----------------------------------------------
2363
2364/// Convert a CST node's [`rowan::TextRange`] (relative to the
2365/// post-BOM source frame) into a [`Span`] in the original-source
2366/// frame.
2367fn node_span(node: &crate::SyntaxNode, bom_offset: u32) -> Span {
2368    let range = node.text_range();
2369    let start: u32 = range.start().into();
2370    let end: u32 = range.end().into();
2371    Span::new((start + bom_offset) as usize, (end + bom_offset) as usize)
2372}
2373
2374/// Trivia kinds that don't count toward a span's start/end when
2375/// matching the legacy parser's span convention.
2376///
2377/// Covers WHITESPACE / NEWLINE plus EVERY comment-trivia kind
2378/// (`COMMENT`, `PERCENT_COMMENT`, `SHEBANG`, `EMACS_DIRECTIVE`)
2379/// so files with ledger-style `%` comments or org-mode
2380/// `#!`/`#+` lines have the same span/header-tracking behavior
2381/// as files with only `;` comments. Mirrors
2382/// `SyntaxKind::is_trivia()` minus `BOM` - a mid-file BOM byte
2383/// is an error to surface (`extract_inline_token_errors` /
2384/// `classify_recovery_error`), not trivia to silently skip.
2385const fn is_trivia_kind(kind: crate::SyntaxKind) -> bool {
2386    matches!(
2387        kind,
2388        crate::SyntaxKind::WHITESPACE
2389            | crate::SyntaxKind::NEWLINE
2390            | crate::SyntaxKind::COMMENT
2391            | crate::SyntaxKind::PERCENT_COMMENT
2392            | crate::SyntaxKind::SHEBANG
2393            | crate::SyntaxKind::EMACS_DIRECTIVE
2394    )
2395}
2396
2397/// Span policy for `Posting`: the legacy parser ends the posting
2398/// span at the position just before the line's terminating
2399/// NEWLINE. The CST node's range INCLUDES the terminator
2400/// NEWLINE; trim it by using the NEWLINE token's start position.
2401/// We look at the FIRST direct-child NEWLINE token because
2402/// posting-attached metadata sub-lines (which have their own
2403/// inner NEWLINEs) come after the line terminator and shouldn't
2404/// extend the posting-line span.
2405fn posting_span(node: &crate::SyntaxNode, bom_offset: u32) -> Span {
2406    let range = node.text_range();
2407    let start: u32 = range.start().into();
2408    let end_raw: u32 = range.end().into();
2409    // Postings have no inter-directive leading trivia: their
2410    // first direct-child NEWLINE IS the terminator.
2411    let end = node
2412        .children_with_tokens()
2413        .filter_map(rowan::NodeOrToken::into_token)
2414        .find(|t| t.kind() == crate::SyntaxKind::NEWLINE)
2415        .map_or(end_raw, |t| u32::from(t.text_range().start()));
2416    Span::new((start + bom_offset) as usize, (end + bom_offset) as usize)
2417}
2418
2419/// Span policy for non-Directive single-line constructs that
2420/// participate in inter-directive trivia attachment (Option,
2421/// Include, Plugin). Unlike Posting these may have leading
2422/// trivia (blank-line NEWLINEs, comments) inside the node from
2423/// the Directive-Terminator Rule. Start at the first non-trivia
2424/// content token; end at the first NEWLINE after that.
2425fn single_line_directive_span(node: &crate::SyntaxNode, bom_offset: u32) -> Span {
2426    let range = node.text_range();
2427    let start_raw: u32 = range.start().into();
2428    let end_raw: u32 = range.end().into();
2429    let mut content_start: Option<u32> = None;
2430    let mut terminator: Option<u32> = None;
2431    for t in node
2432        .children_with_tokens()
2433        .filter_map(rowan::NodeOrToken::into_token)
2434    {
2435        if content_start.is_none() {
2436            if !is_trivia_kind(t.kind()) {
2437                content_start = Some(u32::from(t.text_range().start()));
2438            }
2439        } else if t.kind() == crate::SyntaxKind::NEWLINE {
2440            terminator = Some(u32::from(t.text_range().start()));
2441            break;
2442        }
2443    }
2444    let start = content_start.unwrap_or(start_raw);
2445    let end = terminator.unwrap_or(end_raw);
2446    Span::new((start + bom_offset) as usize, (end + bom_offset) as usize)
2447}
2448
2449/// Span policy for top-level directives: legacy directives start
2450/// at the first content character (skipping leading trivia from
2451/// the Directive-Terminator Rule) and extend through any
2452/// inter-directive trivia up to where the NEXT directive begins.
2453/// Computed in a post-pass since each directive's end depends on
2454/// the next one's start.
2455fn fixup_directive_spans(
2456    source_file: &SourceFile,
2457    bom_offset: u32,
2458    converted_nodes: &[crate::SyntaxNode],
2459    directives: &mut [Spanned<Directive>],
2460) {
2461    debug_assert_eq!(
2462        converted_nodes.len(),
2463        directives.len(),
2464        "converted_nodes and directives must be parallel arrays"
2465    );
2466
2467    // Walk EVERY top-level Directive-castable child (including
2468    // pushtag/poptag/pushmeta/popmeta that we filter out of the
2469    // ParseResult) so the "next directive's start" boundary used
2470    // for span end-fixup matches the legacy parser: there, each
2471    // visible directive's span ends at the next /input/
2472    // directive's start, regardless of whether that next
2473    // directive is preserved.
2474    let all_starts: Vec<(usize, usize)> = source_file
2475        .syntax()
2476        .children()
2477        .filter(|n| ast::Directive::can_cast(n.kind()))
2478        .map(|n| {
2479            let raw_start: u32 = n.text_range().start().into();
2480            let content_start = n
2481                .descendants_with_tokens()
2482                .filter_map(rowan::NodeOrToken::into_token)
2483                .find(|t| !is_trivia_kind(t.kind()))
2484                .map_or_else(
2485                    || (raw_start + bom_offset) as usize,
2486                    |t| (u32::from(t.text_range().start()) + bom_offset) as usize,
2487                );
2488            ((raw_start + bom_offset) as usize, content_start)
2489        })
2490        .collect();
2491
2492    let source_end: usize =
2493        (u32::from(source_file.syntax().text_range().end()) + bom_offset) as usize;
2494
2495    // For each converted directive, find its position in the all
2496    // list by raw_start (which is unique per CST node), then use
2497    // the NEXT all_starts content_start as its span end.
2498    //
2499    // INVARIANT: every node in `converted_nodes` was yielded by
2500    // `source_file.directives()`, which is the same iteration
2501    // `all_starts` filters from. So `position` always succeeds in
2502    // well-formed input. Falling back to the node's own
2503    // `text_range` rather than panicking keeps the parser usable
2504    // when a future change to the typed-AST surface ever de-syncs
2505    // those two enumerations - a `panic!()` reachable from user
2506    // input is a `#![forbid(unsafe_code)]`-class regression for an
2507    // LSP/WASM consumer.
2508    for (i, spanned) in directives.iter_mut().enumerate() {
2509        let node = &converted_nodes[i];
2510        let raw_start: usize = (u32::from(node.text_range().start()) + bom_offset) as usize;
2511        let node_end: usize = (u32::from(node.text_range().end()) + bom_offset) as usize;
2512        if let Some(pos) = all_starts.iter().position(|(rs, _)| *rs == raw_start) {
2513            let start = all_starts[pos].1;
2514            let end = all_starts
2515                .get(pos + 1)
2516                .map_or(source_end, |(_, content)| *content);
2517            spanned.span = Span::new(start, end);
2518        } else {
2519            // Defensive fallback: match the success-path
2520            // convention by also trimming leading trivia. Without
2521            // this trim the fallback span would underline blank
2522            // lines / column-0 comments above the directive when
2523            // LSP/miette renders the diagnostic, even though the
2524            // directive itself starts further down.
2525            let content_start = node
2526                .descendants_with_tokens()
2527                .filter_map(rowan::NodeOrToken::into_token)
2528                .find(|t| !is_trivia_kind(t.kind()))
2529                .map_or(raw_start, |t| {
2530                    (u32::from(t.text_range().start()) + bom_offset) as usize
2531                });
2532            spanned.span = Span::new(content_start, node_end);
2533        }
2534    }
2535}
2536
2537#[cfg(test)]
2538mod tests {
2539    use super::*;
2540
2541    fn assert_directive_count(result: &ParseResult, expected: usize) {
2542        assert_eq!(
2543            result.directives.len(),
2544            expected,
2545            "directive count mismatch: {:#?}",
2546            result.directives
2547        );
2548    }
2549
2550    #[test]
2551    fn open_directive_basic() {
2552        let src = "2024-01-15 open Assets:Cash\n";
2553        let result = parse_via_cst(src);
2554        assert_directive_count(&result, 1);
2555        let Directive::Open(open) = &result.directives[0].value else {
2556            panic!("expected Open, got {:?}", result.directives[0].value);
2557        };
2558        assert_eq!(open.date, naive_date(2024, 1, 15).unwrap());
2559        assert_eq!(open.account.as_str(), "Assets:Cash");
2560        assert!(open.currencies.is_empty());
2561        assert!(open.booking.is_none());
2562        assert!(open.meta.is_empty());
2563    }
2564
2565    #[test]
2566    fn open_directive_with_currencies_and_booking() {
2567        let src = "2024-01-15 open Assets:Brokerage USD,EUR \"STRICT\"\n";
2568        let result = parse_via_cst(src);
2569        assert_directive_count(&result, 1);
2570        let Directive::Open(open) = &result.directives[0].value else {
2571            panic!("expected Open");
2572        };
2573        let currencies: Vec<&str> = open.currencies.iter().map(Currency::as_str).collect();
2574        assert_eq!(currencies, vec!["USD", "EUR"]);
2575        assert_eq!(open.booking.as_deref(), Some("STRICT"));
2576    }
2577
2578    #[test]
2579    fn open_directive_with_metadata() {
2580        let src = "2024-01-15 open Assets:Cash\n  note: \"main checking\"\n  number: 42\n";
2581        let result = parse_via_cst(src);
2582        assert_directive_count(&result, 1);
2583        let Directive::Open(open) = &result.directives[0].value else {
2584            panic!("expected Open");
2585        };
2586        assert_eq!(
2587            open.meta.get("note"),
2588            Some(&MetaValue::String("main checking".to_string()))
2589        );
2590        assert_eq!(
2591            open.meta.get("number"),
2592            Some(&MetaValue::Number(Decimal::from(42)))
2593        );
2594    }
2595
2596    #[test]
2597    fn close_directive_basic() {
2598        let src = "2024-12-31 close Assets:Cash\n";
2599        let result = parse_via_cst(src);
2600        assert_directive_count(&result, 1);
2601        let Directive::Close(close) = &result.directives[0].value else {
2602            panic!("expected Close, got {:?}", result.directives[0].value);
2603        };
2604        assert_eq!(close.date, naive_date(2024, 12, 31).unwrap());
2605        assert_eq!(close.account.as_str(), "Assets:Cash");
2606    }
2607
2608    #[test]
2609    fn commodity_directive_basic() {
2610        let src = "2024-01-01 commodity HOOL\n";
2611        let result = parse_via_cst(src);
2612        assert_directive_count(&result, 1);
2613        let Directive::Commodity(c) = &result.directives[0].value else {
2614            panic!("expected Commodity");
2615        };
2616        assert_eq!(c.currency.as_str(), "HOOL");
2617    }
2618
2619    #[test]
2620    fn bom_offset_is_included_in_spans() {
2621        let src = "\u{FEFF}2024-01-15 open Assets:Cash\n";
2622        let result = parse_via_cst(src);
2623        assert!(result.has_leading_bom);
2624        let span = result.directives[0].span;
2625        assert_eq!(span.start, 3, "span should include BOM offset");
2626    }
2627
2628    #[test]
2629    fn note_directive_basic() {
2630        let src = "2024-01-15 note Assets:Cash \"deposit received\"\n";
2631        let result = parse_via_cst(src);
2632        assert_directive_count(&result, 1);
2633        let Directive::Note(note) = &result.directives[0].value else {
2634            panic!("expected Note");
2635        };
2636        assert_eq!(note.date, naive_date(2024, 1, 15).unwrap());
2637        assert_eq!(note.account.as_str(), "Assets:Cash");
2638        assert_eq!(note.comment, "deposit received");
2639    }
2640
2641    #[test]
2642    fn document_directive_basic() {
2643        let src = "2024-01-15 document Assets:Cash \"/path/to/file.pdf\"\n";
2644        let result = parse_via_cst(src);
2645        assert_directive_count(&result, 1);
2646        let Directive::Document(d) = &result.directives[0].value else {
2647            panic!("expected Document");
2648        };
2649        assert_eq!(d.account.as_str(), "Assets:Cash");
2650        assert_eq!(d.path, "/path/to/file.pdf");
2651        // tags/links currently unimplemented - pin as empty.
2652        assert!(d.tags.is_empty());
2653        assert!(d.links.is_empty());
2654    }
2655
2656    #[test]
2657    fn event_directive_basic() {
2658        let src = "2024-01-15 event \"location\" \"Berlin\"\n";
2659        let result = parse_via_cst(src);
2660        assert_directive_count(&result, 1);
2661        let Directive::Event(e) = &result.directives[0].value else {
2662            panic!("expected Event");
2663        };
2664        assert_eq!(e.event_type, "location");
2665        assert_eq!(e.value, "Berlin");
2666    }
2667
2668    #[test]
2669    fn query_directive_basic() {
2670        let src = "2024-01-15 query \"income\" \"SELECT account, sum(position)\"\n";
2671        let result = parse_via_cst(src);
2672        assert_directive_count(&result, 1);
2673        let Directive::Query(q) = &result.directives[0].value else {
2674            panic!("expected Query");
2675        };
2676        assert_eq!(q.name, "income");
2677        assert_eq!(q.query, "SELECT account, sum(position)");
2678    }
2679
2680    #[test]
2681    fn price_directive_basic() {
2682        let src = "2024-01-15 price USD 1.10 EUR\n";
2683        let result = parse_via_cst(src);
2684        assert_directive_count(&result, 1);
2685        let Directive::Price(p) = &result.directives[0].value else {
2686            panic!("expected Price");
2687        };
2688        assert_eq!(p.currency.as_str(), "USD");
2689        assert_eq!(p.amount.number, Decimal::new(110, 2));
2690        assert_eq!(p.amount.currency.as_str(), "EUR");
2691    }
2692
2693    #[test]
2694    fn balance_directive_basic() {
2695        let src = "2024-06-30 balance Assets:Cash 100.00 USD\n";
2696        let result = parse_via_cst(src);
2697        assert_directive_count(&result, 1);
2698        let Directive::Balance(b) = &result.directives[0].value else {
2699            panic!("expected Balance");
2700        };
2701        assert_eq!(b.account.as_str(), "Assets:Cash");
2702        assert_eq!(b.amount.number, Decimal::new(10000, 2));
2703        assert_eq!(b.amount.currency.as_str(), "USD");
2704        assert!(b.tolerance.is_none());
2705    }
2706
2707    #[test]
2708    fn balance_directive_with_explicit_tolerance() {
2709        let src = "2024-06-30 balance Assets:Cash 100.00 ~ 0.05 USD\n";
2710        let result = parse_via_cst(src);
2711        assert_directive_count(&result, 1);
2712        let Directive::Balance(b) = &result.directives[0].value else {
2713            panic!("expected Balance");
2714        };
2715        assert_eq!(b.amount.number, Decimal::new(10000, 2));
2716        assert_eq!(b.tolerance, Some(Decimal::new(5, 2)));
2717    }
2718
2719    #[test]
2720    fn pad_directive_basic() {
2721        let src = "2024-01-01 pad Assets:Cash Equity:Opening-Balances\n";
2722        let result = parse_via_cst(src);
2723        assert_directive_count(&result, 1);
2724        let Directive::Pad(p) = &result.directives[0].value else {
2725            panic!("expected Pad");
2726        };
2727        assert_eq!(p.account.as_str(), "Assets:Cash");
2728        assert_eq!(p.source_account.as_str(), "Equity:Opening-Balances");
2729    }
2730
2731    #[test]
2732    fn custom_directive_basic() {
2733        let src = "2024-01-01 custom \"budget\" \"food\" 500 USD\n";
2734        let result = parse_via_cst(src);
2735        assert_directive_count(&result, 1);
2736        let Directive::Custom(c) = &result.directives[0].value else {
2737            panic!("expected Custom");
2738        };
2739        assert_eq!(c.custom_type, "budget");
2740        assert_eq!(c.values.len(), 2);
2741        assert_eq!(c.values[0], MetaValue::String("food".to_string()));
2742        // 500 USD becomes an Amount (NUMBER + CURRENCY adjacent).
2743        let MetaValue::Amount(amt) = &c.values[1] else {
2744            panic!("expected Amount, got {:?}", c.values[1]);
2745        };
2746        assert_eq!(amt.number, Decimal::from(500));
2747        assert_eq!(amt.currency.as_str(), "USD");
2748    }
2749
2750    #[test]
2751    fn custom_directive_heterogeneous_values() {
2752        let src = "2024-01-01 custom \"test\" Assets:Cash TRUE 42 2024-06-15\n";
2753        let result = parse_via_cst(src);
2754        let Directive::Custom(c) = &result.directives[0].value else {
2755            panic!("expected Custom");
2756        };
2757        assert_eq!(c.values.len(), 4);
2758        assert!(matches!(c.values[0], MetaValue::Account(_)));
2759        assert_eq!(c.values[1], MetaValue::Bool(true));
2760        assert_eq!(c.values[2], MetaValue::Number(Decimal::from(42)));
2761        assert!(matches!(c.values[3], MetaValue::Date(_)));
2762    }
2763
2764    #[test]
2765    fn option_directive_populates_options_field() {
2766        let src = "option \"title\" \"My Ledger\"\n";
2767        let result = parse_via_cst(src);
2768        assert_directive_count(&result, 0);
2769        assert_eq!(result.options.len(), 1);
2770        assert_eq!(result.options[0].0, "title");
2771        assert_eq!(result.options[0].1, "My Ledger");
2772    }
2773
2774    #[test]
2775    fn include_directive_populates_includes_field() {
2776        let src = "include \"shared.beancount\"\n";
2777        let result = parse_via_cst(src);
2778        assert_directive_count(&result, 0);
2779        assert_eq!(result.includes.len(), 1);
2780        assert_eq!(result.includes[0].0, "shared.beancount");
2781    }
2782
2783    #[test]
2784    fn plugin_directive_with_config() {
2785        let src = "plugin \"my.plugin\" \"cfg\"\n";
2786        let result = parse_via_cst(src);
2787        assert_directive_count(&result, 0);
2788        assert_eq!(result.plugins.len(), 1);
2789        assert_eq!(result.plugins[0].0, "my.plugin");
2790        assert_eq!(result.plugins[0].1.as_deref(), Some("cfg"));
2791    }
2792
2793    #[test]
2794    fn plugin_directive_without_config() {
2795        let src = "plugin \"my.plugin\"\n";
2796        let result = parse_via_cst(src);
2797        assert_eq!(result.plugins.len(), 1);
2798        assert_eq!(result.plugins[0].0, "my.plugin");
2799        assert!(result.plugins[0].1.is_none());
2800    }
2801
2802    // ---- Transaction converter tests ------------------------------
2803
2804    #[test]
2805    fn transaction_basic_two_postings() {
2806        let src = "2024-01-15 * \"Coffee Shop\" \"Morning coffee\"\n  \
2807                   Expenses:Food:Coffee  5.00 USD\n  \
2808                   Assets:Cash\n";
2809        let result = parse_via_cst(src);
2810        assert_directive_count(&result, 1);
2811        let Directive::Transaction(t) = &result.directives[0].value else {
2812            panic!("expected Transaction");
2813        };
2814        assert_eq!(t.date, naive_date(2024, 1, 15).unwrap());
2815        assert_eq!(t.flag, '*');
2816        assert_eq!(
2817            t.payee.as_ref().map(InternedStr::as_str),
2818            Some("Coffee Shop")
2819        );
2820        assert_eq!(t.narration.as_str(), "Morning coffee");
2821        assert_eq!(t.postings.len(), 2);
2822
2823        let p0 = &t.postings[0].value;
2824        assert_eq!(p0.account.as_str(), "Expenses:Food:Coffee");
2825        let Some(IncompleteAmount::Complete(amt)) = &p0.units else {
2826            panic!("expected complete units, got {:?}", p0.units);
2827        };
2828        assert_eq!(amt.number, Decimal::new(500, 2));
2829        assert_eq!(amt.currency.as_str(), "USD");
2830
2831        let p1 = &t.postings[1].value;
2832        assert_eq!(p1.account.as_str(), "Assets:Cash");
2833        assert!(p1.units.is_none(), "auto-posting has no units");
2834    }
2835
2836    #[test]
2837    fn transaction_narration_only_no_payee() {
2838        let src = "2024-01-15 ! \"Pending\"\n  Assets:Cash  -5 USD\n";
2839        let result = parse_via_cst(src);
2840        let Directive::Transaction(t) = &result.directives[0].value else {
2841            panic!("expected Transaction");
2842        };
2843        assert_eq!(t.flag, '!');
2844        assert!(t.payee.is_none());
2845        assert_eq!(t.narration.as_str(), "Pending");
2846    }
2847
2848    #[test]
2849    fn transaction_implied_flag_via_leading_string() {
2850        let src = "2024-01-15 \"Implied\"\n  Assets:Cash  -5 USD\n";
2851        let result = parse_via_cst(src);
2852        let Directive::Transaction(t) = &result.directives[0].value else {
2853            panic!("expected Transaction");
2854        };
2855        assert_eq!(t.flag, '*', "implied flag defaults to *");
2856    }
2857
2858    #[test]
2859    fn transaction_with_tags_and_links() {
2860        let src = "2024-01-15 * \"Coffee\" #daily ^trip1\n  Assets:Cash  -5 USD\n";
2861        let result = parse_via_cst(src);
2862        let Directive::Transaction(t) = &result.directives[0].value else {
2863            panic!("expected Transaction");
2864        };
2865        assert_eq!(t.tags.len(), 1);
2866        assert_eq!(t.tags[0].as_str(), "daily");
2867        assert_eq!(t.links.len(), 1);
2868        assert_eq!(t.links[0].as_str(), "trip1");
2869    }
2870
2871    #[test]
2872    fn transaction_with_signed_amount() {
2873        let src = "2024-01-15 * \"x\"\n  Assets:Cash  -5.00 USD\n";
2874        let result = parse_via_cst(src);
2875        let Directive::Transaction(t) = &result.directives[0].value else {
2876            panic!("expected Transaction");
2877        };
2878        let Some(IncompleteAmount::Complete(amt)) = &t.postings[0].value.units else {
2879            panic!("expected complete units");
2880        };
2881        assert_eq!(amt.number, Decimal::new(-500, 2));
2882    }
2883
2884    #[test]
2885    fn transaction_with_posting_flag() {
2886        let src = "2024-01-15 * \"x\"\n  ! Assets:Cash  -5 USD\n";
2887        let result = parse_via_cst(src);
2888        let Directive::Transaction(t) = &result.directives[0].value else {
2889            panic!("expected Transaction");
2890        };
2891        assert_eq!(t.postings[0].value.flag, Some('!'));
2892    }
2893
2894    #[test]
2895    fn transaction_with_cost_spec_per_unit() {
2896        let src = "2024-01-15 * \"buy\"\n  \
2897                   Assets:Inv  10 HOOL {500.00 USD}\n  \
2898                   Assets:Cash\n";
2899        let result = parse_via_cst(src);
2900        let Directive::Transaction(t) = &result.directives[0].value else {
2901            panic!("expected Transaction");
2902        };
2903        let cost = t.postings[0].value.cost.as_ref().expect("cost spec");
2904        assert!(!cost.merge);
2905        let Some(CostNumber::PerUnit { value }) = &cost.number else {
2906            panic!("expected PerUnit");
2907        };
2908        assert_eq!(*value, Decimal::new(50000, 2));
2909        assert_eq!(cost.currency.as_ref().unwrap().as_str(), "USD");
2910    }
2911
2912    #[test]
2913    fn transaction_with_cost_spec_total() {
2914        let src = "2024-01-15 * \"buy\"\n  \
2915                   Assets:Inv  10 HOOL {{5000 USD}}\n  \
2916                   Assets:Cash\n";
2917        let result = parse_via_cst(src);
2918        let Directive::Transaction(t) = &result.directives[0].value else {
2919            panic!("expected Transaction");
2920        };
2921        let cost = t.postings[0].value.cost.as_ref().expect("cost spec");
2922        let Some(CostNumber::Total { value }) = &cost.number else {
2923            panic!("expected Total");
2924        };
2925        assert_eq!(*value, Decimal::from(5000));
2926    }
2927
2928    #[test]
2929    fn transaction_with_price_annotation_unit() {
2930        let src = "2024-01-15 * \"buy\"\n  \
2931                   Assets:Inv  10 HOOL @ 510 USD\n  \
2932                   Assets:Cash\n";
2933        let result = parse_via_cst(src);
2934        let Directive::Transaction(t) = &result.directives[0].value else {
2935            panic!("expected Transaction");
2936        };
2937        let price = t.postings[0]
2938            .value
2939            .price
2940            .as_ref()
2941            .expect("price annotation");
2942        assert!(price.is_unit());
2943        let Some(IncompleteAmount::Complete(amt)) = &price.amount else {
2944            panic!("expected complete price amount");
2945        };
2946        assert_eq!(amt.number, Decimal::from(510));
2947        assert_eq!(amt.currency.as_str(), "USD");
2948    }
2949
2950    #[test]
2951    fn transaction_with_price_annotation_total() {
2952        let src = "2024-01-15 * \"buy\"\n  \
2953                   Assets:Inv  10 HOOL @@ 5100 USD\n  \
2954                   Assets:Cash\n";
2955        let result = parse_via_cst(src);
2956        let Directive::Transaction(t) = &result.directives[0].value else {
2957            panic!("expected Transaction");
2958        };
2959        let price = t.postings[0]
2960            .value
2961            .price
2962            .as_ref()
2963            .expect("price annotation");
2964        assert!(!price.is_unit(), "@@ is total form");
2965    }
2966
2967    // ---- regression tests for review findings (#1281) ----------
2968
2969    #[test]
2970    fn document_directive_preserves_tags_and_links() {
2971        // Finding 1: convert_document was filling tags/links empty
2972        // unconditionally. Legacy parse_document_directive collects
2973        // trailing `#tag` / `^link` tokens after the path STRING.
2974        let src = "2024-06-01 document Assets:Bank \"stmt.pdf\" #quarter1 ^scan42 #urgent\n";
2975        let result = parse_via_cst(src);
2976        assert!(result.errors.is_empty(), "errors: {:?}", result.errors);
2977        let Directive::Document(doc) = &result.directives[0].value else {
2978            panic!("expected Document");
2979        };
2980        let tags: Vec<&str> = doc.tags.iter().map(Tag::as_str).collect();
2981        let links: Vec<&str> = doc.links.iter().map(Link::as_str).collect();
2982        assert_eq!(tags, vec!["quarter1", "urgent"]);
2983        assert_eq!(links, vec!["scan42"]);
2984    }
2985
2986    #[test]
2987    fn open_directive_rejects_invalid_booking_method() {
2988        // Finding 2: convert_open accepted any booking string; legacy
2989        // validates against [FIFO, STRICT, STRICT_WITH_SIZE, LIFO,
2990        // HIFO, NONE, AVERAGE] and on mismatch drops the directive
2991        // AND emits InvalidBookingMethod.
2992        let src = "2024-01-01 open Assets:Bank USD \"GARBAGE\"\n";
2993        let result = parse_via_cst(src);
2994        assert_eq!(result.directives.len(), 0, "directive should be dropped");
2995        assert_eq!(result.errors.len(), 1);
2996        let err = &result.errors[0];
2997        assert!(
2998            matches!(
2999                &err.kind,
3000                crate::ParseErrorKind::InvalidBookingMethod(s) if s == "GARBAGE"
3001            ),
3002            "expected InvalidBookingMethod, got {:?}",
3003            err.kind,
3004        );
3005    }
3006
3007    #[test]
3008    fn open_directive_accepts_all_valid_booking_methods() {
3009        for method in VALID_BOOKING_METHODS {
3010            let src = format!("2024-01-01 open Assets:Bank USD \"{method}\"\n");
3011            let result = parse_via_cst(&src);
3012            assert!(
3013                result.errors.is_empty(),
3014                "{method} rejected: {:?}",
3015                result.errors
3016            );
3017            let Directive::Open(open) = &result.directives[0].value else {
3018                panic!("{method}: expected Open");
3019            };
3020            assert_eq!(open.booking.as_deref(), Some(*method));
3021        }
3022    }
3023
3024    #[test]
3025    fn unclosed_pushtag_at_eof_emits_diagnostic() {
3026        // Finding 3: legacy emits one UnclosedPushtag per leftover
3027        // tag at EOF, pointing at the originating push directive.
3028        let src = "pushtag #active\n2024-01-01 open Assets:Bank USD\n";
3029        let result = parse_via_cst(src);
3030        let unclosed: Vec<_> = result
3031            .errors
3032            .iter()
3033            .filter_map(|e| match &e.kind {
3034                crate::ParseErrorKind::UnclosedPushtag(t) => Some(t.clone()),
3035                _ => None,
3036            })
3037            .collect();
3038        assert_eq!(unclosed, vec!["active".to_string()]);
3039    }
3040
3041    #[test]
3042    fn unclosed_pushmeta_at_eof_emits_diagnostic() {
3043        // Finding 4: same as pushtag, for pushmeta.
3044        let src = "pushmeta location: \"NYC\"\n2024-01-01 open Assets:Bank USD\n";
3045        let result = parse_via_cst(src);
3046        let unclosed: Vec<_> = result
3047            .errors
3048            .iter()
3049            .filter_map(|e| match &e.kind {
3050                crate::ParseErrorKind::UnclosedPushmeta(k) => Some(k.clone()),
3051                _ => None,
3052            })
3053            .collect();
3054        assert_eq!(unclosed, vec!["location".to_string()]);
3055    }
3056
3057    #[test]
3058    fn invalid_poptag_on_mismatch_emits_diagnostic() {
3059        // Finding 5: poptag for a tag never pushed should error,
3060        // not silently no-op.
3061        let src = "pushtag #foo\npoptag #bar\npoptag #foo\n";
3062        let result = parse_via_cst(src);
3063        let mismatches: Vec<_> = result
3064            .errors
3065            .iter()
3066            .filter_map(|e| match &e.kind {
3067                crate::ParseErrorKind::InvalidPoptag(t) => Some(t.clone()),
3068                _ => None,
3069            })
3070            .collect();
3071        assert_eq!(mismatches, vec!["bar".to_string()]);
3072        // and the matching #foo poptag should leave NO unclosed
3073        // diagnostic - i.e. the stack is empty after the matched pop.
3074        let leftover: Vec<_> = result
3075            .errors
3076            .iter()
3077            .filter(|e| matches!(e.kind, crate::ParseErrorKind::UnclosedPushtag(_)))
3078            .collect();
3079        assert!(leftover.is_empty(), "unexpected leftover: {leftover:?}");
3080    }
3081
3082    #[test]
3083    fn invalid_popmeta_on_mismatch_emits_diagnostic() {
3084        // Finding 6: popmeta for a key never pushed should error,
3085        // not silently no-op. Also checks Vec-stack shadow semantics:
3086        // pushmeta x: 1; pushmeta x: 2; popmeta x leaves x=1 active.
3087        let src = "pushmeta location: \"NYC\"\npopmeta nope:\npopmeta location:\n";
3088        let result = parse_via_cst(src);
3089        let mismatches: Vec<_> = result
3090            .errors
3091            .iter()
3092            .filter_map(|e| match &e.kind {
3093                crate::ParseErrorKind::InvalidPopmeta(k) => Some(k.clone()),
3094                _ => None,
3095            })
3096            .collect();
3097        assert_eq!(mismatches, vec!["nope".to_string()]);
3098        let leftover: Vec<_> = result
3099            .errors
3100            .iter()
3101            .filter(|e| matches!(e.kind, crate::ParseErrorKind::UnclosedPushmeta(_)))
3102            .collect();
3103        assert!(leftover.is_empty(), "unexpected leftover: {leftover:?}");
3104    }
3105
3106    #[test]
3107    fn pushmeta_shadow_pop_restores_prior_value() {
3108        // Vec-stack semantics (the reason meta_stack isn't a HashMap):
3109        // shadow-pop must restore the prior value, not delete the key.
3110        let src = "pushmeta loc: \"NYC\"\n\
3111                   pushmeta loc: \"LDN\"\n\
3112                   popmeta loc:\n\
3113                   2024-01-01 open Assets:Bank USD\n\
3114                   popmeta loc:\n";
3115        let result = parse_via_cst(src);
3116        let Directive::Open(open) = &result.directives[0].value else {
3117            panic!("expected Open");
3118        };
3119        assert_eq!(
3120            open.meta.get("loc"),
3121            Some(&MetaValue::String("NYC".to_string())),
3122            "shadow pop should restore NYC, got {:?}",
3123            open.meta.get("loc"),
3124        );
3125    }
3126
3127    #[test]
3128    fn error_recovery_classifies_bom_in_directive_body() {
3129        // Finding 7: error-recovery path should distinguish BOM-in-
3130        // line from a generic SyntaxError so users see the
3131        // BOM-removal hint instead of "unexpected input".
3132        let src = "garbage\u{FEFF}content\n";
3133        let result = parse_via_cst(src);
3134        let bom_errors: Vec<_> = result
3135            .errors
3136            .iter()
3137            .filter(|e| matches!(e.kind, crate::ParseErrorKind::BomInDirectiveBody))
3138            .collect();
3139        assert_eq!(bom_errors.len(), 1, "errors: {:?}", result.errors);
3140        assert!(
3141            bom_errors[0].hint.is_some(),
3142            "BomInDirectiveBody should carry BOM_REMOVAL_HINT",
3143        );
3144    }
3145
3146    #[test]
3147    fn error_recovery_emits_both_invalid_account_and_bom_for_dual_line() {
3148        // Round-2 finding: legacy `parser.rs:2258-2263` emits a
3149        // SECONDARY `BomInDirectiveBody` whenever the line ALSO
3150        // contains a BOM byte and the primary diagnostic isn't
3151        // BOM itself. Without this, a Windows-exported file with
3152        // a Unicode account AND an internal BOM loses the BOM
3153        // hint entirely.
3154        let src = "garbage Assets:Café\u{FEFF}content\n";
3155        let result = parse_via_cst(src);
3156        let invalid_account_count = result
3157            .errors
3158            .iter()
3159            .filter(|e| matches!(e.kind, crate::ParseErrorKind::InvalidAccount(_)))
3160            .count();
3161        let bom_count = result
3162            .errors
3163            .iter()
3164            .filter(|e| matches!(e.kind, crate::ParseErrorKind::BomInDirectiveBody))
3165            .count();
3166        assert_eq!(
3167            invalid_account_count, 1,
3168            "expected one InvalidAccount: {:?}",
3169            result.errors
3170        );
3171        assert_eq!(
3172            bom_count, 1,
3173            "expected secondary BomInDirectiveBody: {:?}",
3174            result.errors
3175        );
3176        // The secondary BOM diagnostic must carry the hint so
3177        // miette renders the remediation step.
3178        let bom_err = result
3179            .errors
3180            .iter()
3181            .find(|e| matches!(e.kind, crate::ParseErrorKind::BomInDirectiveBody))
3182            .unwrap();
3183        assert!(bom_err.hint.is_some());
3184    }
3185
3186    #[test]
3187    fn error_recovery_classifies_unicode_account() {
3188        // Finding 7: a Unicode-character account name (Assets:Café)
3189        // should surface as InvalidAccount, not generic SyntaxError.
3190        // We embed it in a malformed line so the parser routes to
3191        // the error-recovery path.
3192        let src = "garbage Assets:Café content\n";
3193        let result = parse_via_cst(src);
3194        let unicode_errors: Vec<_> = result
3195            .errors
3196            .iter()
3197            .filter_map(|e| match &e.kind {
3198                crate::ParseErrorKind::InvalidAccount(s) => Some(s.clone()),
3199                _ => None,
3200            })
3201            .collect();
3202        assert_eq!(unicode_errors, vec!["Assets:Café".to_string()]);
3203    }
3204
3205    #[test]
3206    fn transaction_with_pipe_emits_deprecated_pipe_symbol() {
3207        // Finding 7 (transaction path): legacy emits
3208        // DeprecatedPipeSymbol when a `|` separates payee/narration.
3209        let src = "2024-01-15 * \"Acme\" | \"invoice\"\n  Assets:Cash  -5 USD\n  Expenses:X\n";
3210        let result = parse_via_cst(src);
3211        let pipe_count = result
3212            .errors
3213            .iter()
3214            .filter(|e| matches!(e.kind, crate::ParseErrorKind::DeprecatedPipeSymbol))
3215            .count();
3216        assert_eq!(pipe_count, 1, "errors: {:?}", result.errors);
3217        // and the transaction itself is kept (legacy behavior).
3218        assert_eq!(result.directives.len(), 1);
3219    }
3220
3221    #[test]
3222    fn transaction_trailing_comments_after_final_posting() {
3223        // Finding 8: comments that appear AFTER the last posting
3224        // but inside the transaction body belong to
3225        // Transaction::trailing_comments, not lost.
3226        let src = "2024-01-15 * \"x\"\n  \
3227                   Assets:Cash  -5 USD\n  \
3228                   Expenses:X\n  \
3229                   ; trailing one\n  \
3230                   ; trailing two\n";
3231        let result = parse_via_cst(src);
3232        let Directive::Transaction(t) = &result.directives[0].value else {
3233            panic!("expected Transaction");
3234        };
3235        assert_eq!(
3236            t.trailing_comments.len(),
3237            2,
3238            "got: {:?}",
3239            t.trailing_comments
3240        );
3241        assert!(t.trailing_comments[0].contains("trailing one"));
3242        assert!(t.trailing_comments[1].contains("trailing two"));
3243    }
3244
3245    // ---- arithmetic AMOUNT evaluation (phase 3.7 flip blocker) -
3246
3247    #[test]
3248    fn posting_amount_evaluates_division() {
3249        // Regression for `test_arithmetic_expressions_consistency`:
3250        // `120 / 3 USD` must evaluate to 40 USD so the transaction
3251        // balances. Without this the CST flip breaks every ledger
3252        // using arithmetic split syntax.
3253        let src = "2024-01-15 * \"split\"\n  \
3254                   Expenses:Food   120 / 3 USD\n  \
3255                   Assets:Bank    -40 USD\n";
3256        let result = parse_via_cst(src);
3257        assert!(result.errors.is_empty(), "errors: {:?}", result.errors);
3258        let Directive::Transaction(t) = &result.directives[0].value else {
3259            panic!("expected Transaction");
3260        };
3261        let Some(IncompleteAmount::Complete(amt)) = &t.postings[0].value.units else {
3262            panic!("expected complete amount on posting 0");
3263        };
3264        assert_eq!(amt.number, Decimal::from(40));
3265        assert_eq!(amt.currency.as_str(), "USD");
3266    }
3267
3268    #[test]
3269    fn posting_amount_evaluates_addition_and_multiplication_precedence() {
3270        // `2 + 3 * 4 USD` = 14 USD (standard precedence).
3271        let src = "2024-01-15 * \"x\"\n  \
3272                   Expenses:X   2 + 3 * 4 USD\n  \
3273                   Assets:Y   -14 USD\n";
3274        let result = parse_via_cst(src);
3275        assert!(result.errors.is_empty(), "errors: {:?}", result.errors);
3276        let Directive::Transaction(t) = &result.directives[0].value else {
3277            panic!("expected Transaction");
3278        };
3279        let Some(IncompleteAmount::Complete(amt)) = &t.postings[0].value.units else {
3280            panic!("expected complete amount");
3281        };
3282        assert_eq!(amt.number, Decimal::from(14));
3283    }
3284
3285    #[test]
3286    fn posting_amount_evaluates_parens_override_precedence() {
3287        // `(2 + 3) * 4 USD` = 20 USD.
3288        let src = "2024-01-15 * \"x\"\n  \
3289                   Expenses:X   (2 + 3) * 4 USD\n  \
3290                   Assets:Y   -20 USD\n";
3291        let result = parse_via_cst(src);
3292        assert!(result.errors.is_empty(), "errors: {:?}", result.errors);
3293        let Directive::Transaction(t) = &result.directives[0].value else {
3294            panic!("expected Transaction");
3295        };
3296        let Some(IncompleteAmount::Complete(amt)) = &t.postings[0].value.units else {
3297            panic!("expected complete amount");
3298        };
3299        assert_eq!(amt.number, Decimal::from(20));
3300    }
3301
3302    #[test]
3303    fn posting_amount_evaluates_subtraction_left_associative() {
3304        // `10 - 3 - 2 USD` = 5 USD (left-associative, not 9).
3305        let src = "2024-01-15 * \"x\"\n  \
3306                   Expenses:X   10 - 3 - 2 USD\n  \
3307                   Assets:Y   -5 USD\n";
3308        let result = parse_via_cst(src);
3309        assert!(result.errors.is_empty(), "errors: {:?}", result.errors);
3310        let Directive::Transaction(t) = &result.directives[0].value else {
3311            panic!("expected Transaction");
3312        };
3313        let Some(IncompleteAmount::Complete(amt)) = &t.postings[0].value.units else {
3314            panic!("expected complete amount");
3315        };
3316        assert_eq!(amt.number, Decimal::from(5));
3317    }
3318
3319    #[test]
3320    fn posting_amount_division_by_zero_drops_number() {
3321        // `5 / 0 USD` - legacy returns parse error; we return None
3322        // from the evaluator, which degrades to CurrencyOnly here.
3323        // The transaction won't balance and downstream validation
3324        // surfaces that as the user-facing error.
3325        let src = "2024-01-15 * \"x\"\n  \
3326                   Expenses:X   5 / 0 USD\n  \
3327                   Assets:Y\n";
3328        let result = parse_via_cst(src);
3329        let Directive::Transaction(t) = &result.directives[0].value else {
3330            panic!("expected Transaction");
3331        };
3332        // Either the units degrade to CurrencyOnly (number lost)
3333        // or to None - both are acceptable since the input is
3334        // semantically invalid. The strict assertion is that we
3335        // DON'T silently return 5 (the first NUMBER) as the value.
3336        match &t.postings[0].value.units {
3337            None | Some(IncompleteAmount::CurrencyOnly(_)) => {}
3338            other => panic!("div-by-zero leaked: {other:?}"),
3339        }
3340    }
3341
3342    // ---- round-8 final compat regressions (#1282 flip) ---------
3343
3344    #[test]
3345    fn indented_top_level_directive_emits_error() {
3346        // A top-level directive that starts at column N>0 is a
3347        // syntax error per the Beancount spec; the CST grammar
3348        // accepts it silently, so the converter has to surface
3349        // the diagnostic at directive-content-start position.
3350        let src = "2020-07-28 open Assets:Foo\n  2020-07-28 open Assets:Bar\n";
3351        let result = parse_via_cst(src);
3352        let indent_errs = result
3353            .errors
3354            .iter()
3355            .filter(|e| match &e.kind {
3356                crate::ParseErrorKind::SyntaxError(s) => s.contains("column 0"),
3357                _ => false,
3358            })
3359            .count();
3360        assert_eq!(
3361            indent_errs, 1,
3362            "expected one column-0 diagnostic, got: {:?}",
3363            result.errors
3364        );
3365    }
3366
3367    #[test]
3368    fn indented_directive_after_blank_line_still_emits_error() {
3369        // Same as above but with a blank line between the
3370        // first directive and the indented one - the blank line
3371        // shouldn't mask the indentation error.
3372        let src = "2020-07-28 open Assets:Foo\n\n  2020-07-28 open Assets:Bar\n";
3373        let result = parse_via_cst(src);
3374        let indent_errs = result
3375            .errors
3376            .iter()
3377            .filter(|e| match &e.kind {
3378                crate::ParseErrorKind::SyntaxError(s) => s.contains("column 0"),
3379                _ => false,
3380            })
3381            .count();
3382        assert_eq!(indent_errs, 1, "errors: {:?}", result.errors);
3383    }
3384
3385    #[test]
3386    fn top_level_directive_at_column_0_no_diagnostic() {
3387        // Sanity: well-formed top-level directives must NOT
3388        // trigger the indent diagnostic.
3389        let src = "2020-07-28 open Assets:Foo\n2020-07-28 open Assets:Bar\n";
3390        let result = parse_via_cst(src);
3391        assert!(result.errors.is_empty(), "errors: {:?}", result.errors);
3392    }
3393
3394    #[test]
3395    fn custom_directive_with_bare_currency_emits_error() {
3396        // `bean-check` rejects bare currency literals in custom
3397        // value position; the CST converter mirrors that.
3398        let src = "2025-01-01 custom \"x\" 10 USD \"y\" NZD\n";
3399        let result = parse_via_cst(src);
3400        let bare_curr_errs = result
3401            .errors
3402            .iter()
3403            .filter(|e| match &e.kind {
3404                crate::ParseErrorKind::SyntaxError(s) => s.contains("bare currency"),
3405                _ => false,
3406            })
3407            .count();
3408        assert_eq!(
3409            bare_curr_errs, 1,
3410            "expected one bare-currency diagnostic, got: {:?}",
3411            result.errors
3412        );
3413    }
3414
3415    #[test]
3416    fn custom_directive_with_amount_no_error() {
3417        // Sanity: `10 USD` (NUMBER + CURRENCY paired as Amount)
3418        // is a valid custom value and must NOT trigger the
3419        // bare-currency diagnostic.
3420        let src = "2025-01-01 custom \"x\" 10 USD\n";
3421        let result = parse_via_cst(src);
3422        assert!(result.errors.is_empty(), "errors: {:?}", result.errors);
3423    }
3424
3425    // ---- round-7 compat regressions (#1282 flip) ---------------
3426
3427    #[test]
3428    fn balance_assertion_evaluates_arithmetic_value() {
3429        // PR #1282 compat regression: rledger emitted a balance
3430        // failure for `Assets:X  0.25+ 0.75 GBP` because only
3431        // the first NUMBER (0.25) was used as the assertion
3432        // target. CST converters for BALANCE/PRICE now evaluate
3433        // arithmetic the same way posting AMOUNTs do.
3434        let src = "2024-01-01 open Assets:X GBP\n\
3435                   2024-01-01 open Equity:Open GBP\n\
3436                   2024-01-02 * \"deposit\"\n  \
3437                   Assets:X         1.00 GBP\n  \
3438                   Equity:Open     -1.00 GBP\n\
3439                   2024-01-03 balance Assets:X  0.25 + 0.75 GBP\n";
3440        let result = parse_via_cst(src);
3441        assert!(result.errors.is_empty(), "errors: {:?}", result.errors);
3442        let bal = result
3443            .directives
3444            .iter()
3445            .find_map(|d| match &d.value {
3446                Directive::Balance(b) => Some(b),
3447                _ => None,
3448            })
3449            .expect("expected a Balance directive");
3450        assert_eq!(bal.amount.number, Decimal::from(1));
3451        assert_eq!(bal.amount.currency.as_str(), "GBP");
3452    }
3453
3454    #[test]
3455    fn price_directive_evaluates_arithmetic_value() {
3456        let src = "2024-01-01 price USD  1/2 EUR\n";
3457        let result = parse_via_cst(src);
3458        assert!(result.errors.is_empty(), "errors: {:?}", result.errors);
3459        let Directive::Price(p) = &result.directives[0].value else {
3460            panic!("expected Price");
3461        };
3462        assert_eq!(p.amount.number, Decimal::new(5, 1));
3463        assert_eq!(p.amount.currency.as_str(), "EUR");
3464    }
3465
3466    // ---- round-5 architecture review (#1281) -------------------
3467
3468    #[test]
3469    fn body_line_tag_does_not_drop_following_postings_comment() {
3470        // F2-bis: trailing TAG / LINK tokens on transaction body
3471        // lines are valid Beancount (extend the transaction's
3472        // tag/link set). Before the exemption was added, the
3473        // `pending.clear()` over-fired on the TAG and silently
3474        // dropped the preceding comment that semantically
3475        // belonged to the next posting.
3476        let src = "2024-01-01 * \"x\"\n  \
3477                   Assets:A   100 USD\n  \
3478                   ; comment-for-B\n  \
3479                   #late-tag\n  \
3480                   Assets:B   -100 USD\n";
3481        let result = parse_via_cst(src);
3482        assert!(result.errors.is_empty(), "errors: {:?}", result.errors);
3483        let Directive::Transaction(t) = &result.directives[0].value else {
3484            panic!("expected Transaction");
3485        };
3486        // The trailing tag joins the transaction's tag set.
3487        assert!(
3488            t.tags.iter().any(|tag| tag.as_str() == "late-tag"),
3489            "expected #late-tag in tags: {:?}",
3490            t.tags,
3491        );
3492        // And the comment survives - attached to the next posting.
3493        let b = t.postings.last().expect("at least one posting");
3494        assert_eq!(b.value.account.as_str(), "Assets:B");
3495        assert!(
3496            b.value.comments.iter().any(|c| c.contains("comment-for-B")),
3497            "expected comment-for-B to survive on Assets:B: {:?}",
3498            b.value.comments,
3499        );
3500    }
3501
3502    #[test]
3503    fn oversized_number_in_amount_emits_diagnostic() {
3504        // F5-bis: the non-arithmetic NUMBER path is now symmetric
3505        // with the arithmetic-evaluation path. A NUMBER whose
3506        // text the lexer accepts but `Decimal::from_str` rejects
3507        // (e.g., 30+ digits, exceeding the 28-digit precision
3508        // ceiling) used to silently degrade to `CurrencyOnly`.
3509        let huge = "1".to_string() + &"2345678901234567890".repeat(2); // 39 digits
3510        let src = format!("2024-01-15 * \"big\"\n  Expenses:X   {huge} USD\n  Assets:Y\n");
3511        let result = parse_via_cst(&src);
3512        let invalid_num = result
3513            .errors
3514            .iter()
3515            .filter(|e| match &e.kind {
3516                crate::ParseErrorKind::SyntaxError(s) => s.contains("invalid number"),
3517                _ => false,
3518            })
3519            .count();
3520        assert_eq!(
3521            invalid_num, 1,
3522            "expected one invalid-number diagnostic, got: {:?}",
3523            result.errors
3524        );
3525    }
3526
3527    // ---- round-4 architecture review (#1281) -------------------
3528
3529    #[test]
3530    fn posting_with_two_amount_siblings_emits_error_and_keeps_first() {
3531        // F1: a posting like `Expenses:Food  5 USD + 3 USD` builds
3532        // two sibling AMOUNT nodes in the CST. `Posting::amount()`
3533        // only returns the first. Without an explicit guard the
3534        // second AMOUNT plus the joining `+` would be silently
3535        // dropped - the user's transaction would balance against
3536        // 5 USD instead of the intended 8 USD with no diagnostic.
3537        let src = "2024-01-15 * \"ambig\"\n  \
3538                   Expenses:Food   5 USD + 3 USD\n  \
3539                   Assets:Bank\n";
3540        let result = parse_via_cst(src);
3541        let trailing_count = result
3542            .errors
3543            .iter()
3544            .filter(|e| match &e.kind {
3545                crate::ParseErrorKind::SyntaxError(s) => s.contains("trailing tokens"),
3546                _ => false,
3547            })
3548            .count();
3549        assert_eq!(
3550            trailing_count, 1,
3551            "expected one trailing-tokens diagnostic, got: {:?}",
3552            result.errors
3553        );
3554        // The first AMOUNT is still surfaced so partial recovery
3555        // works for downstream tooling.
3556        let Directive::Transaction(t) = &result.directives[0].value else {
3557            panic!("expected Transaction");
3558        };
3559        let Some(IncompleteAmount::Complete(amt)) = &t.postings[0].value.units else {
3560            panic!("expected complete units from the first AMOUNT");
3561        };
3562        assert_eq!(amt.number, Decimal::from(5));
3563    }
3564
3565    #[test]
3566    fn comments_dont_leak_across_failed_posting() {
3567        // F2: when convert_posting returns None, the queue of
3568        // pending pre-posting comments must be CLEARED so they
3569        // don't migrate forward and attach to the next valid
3570        // posting. Without the clear, comments labelled for the
3571        // failed posting would silently re-attach to the wrong
3572        // account, visibly misleading the user.
3573        let src = "2024-01-15 * \"test\"\n  \
3574                   Assets:A   100 USD\n  \
3575                   ; comment-for-bad\n  \
3576                   ; another-comment\n  \
3577                   bogus_token_line_no_account\n  \
3578                   ; comment-for-good\n  \
3579                   Assets:B   -100 USD\n";
3580        let result = parse_via_cst(src);
3581        let Directive::Transaction(t) = &result.directives[0].value else {
3582            panic!("expected Transaction");
3583        };
3584        // Assets:B is the LAST successful posting; the only
3585        // comment that should attach to it is the one that
3586        // immediately precedes it (`; comment-for-good`). The
3587        // pre-failed-posting comments belong to the failed
3588        // posting and should be DROPPED with it.
3589        let b = t.postings.last().expect("at least one posting");
3590        assert_eq!(b.value.account.as_str(), "Assets:B");
3591        assert!(
3592            !b.value
3593                .comments
3594                .iter()
3595                .any(|c| c.contains("comment-for-bad")),
3596            "comment-for-bad leaked across failed posting onto Assets:B: {:?}",
3597            b.value.comments
3598        );
3599        assert!(
3600            !b.value
3601                .comments
3602                .iter()
3603                .any(|c| c.contains("another-comment")),
3604            "another-comment leaked: {:?}",
3605            b.value.comments
3606        );
3607    }
3608
3609    #[test]
3610    fn arithmetic_overflow_in_amount_emits_diagnostic() {
3611        // F5: when `is_arithmetic` is true but the evaluator
3612        // gives up (overflow, div-by-zero), the converter used
3613        // to silently produce CurrencyOnly. Now an explicit
3614        // SyntaxError fires so the user sees the actual root
3615        // cause instead of just a downstream "doesn't balance".
3616        // Decimal max is 28 digits - `9999999999999999999999999999 *
3617        // 9999999999999999999999999999` overflows.
3618        let huge = "9999999999999999999999999999 * 9999999999999999999999999999";
3619        let src = format!("2024-01-15 * \"big\"\n  Expenses:X   {huge} USD\n  Assets:Y\n");
3620        let result = parse_via_cst(&src);
3621        let arith_errs = result
3622            .errors
3623            .iter()
3624            .filter(|e| match &e.kind {
3625                crate::ParseErrorKind::SyntaxError(s) => s.contains("arithmetic"),
3626                _ => false,
3627            })
3628            .count();
3629        assert_eq!(
3630            arith_errs, 1,
3631            "expected one arithmetic-error diagnostic, got: {:?}",
3632            result.errors
3633        );
3634    }
3635
3636    // ---- 14 emission-gap regressions (#1281 round-3 review) ----
3637
3638    #[test]
3639    fn date_with_single_digit_month_parses() {
3640        let result = parse_via_cst("2024-1-15 open Assets:Checking\n");
3641        assert!(result.errors.is_empty(), "errors: {:?}", result.errors);
3642        let Directive::Open(open) = &result.directives[0].value else {
3643            panic!("expected Open");
3644        };
3645        assert_eq!(open.date, naive_date(2024, 1, 15).unwrap());
3646    }
3647
3648    #[test]
3649    fn date_with_single_digit_day_parses() {
3650        let result = parse_via_cst("2024-01-5 open Assets:Cash USD\n");
3651        assert!(result.errors.is_empty(), "errors: {:?}", result.errors);
3652        let Directive::Open(open) = &result.directives[0].value else {
3653            panic!("expected Open");
3654        };
3655        assert_eq!(open.date, naive_date(2024, 1, 5).unwrap());
3656    }
3657
3658    #[test]
3659    fn date_with_single_digit_month_and_day_parses() {
3660        let result = parse_via_cst("2024-1-1 open Assets:Cash USD\n");
3661        assert!(result.errors.is_empty(), "errors: {:?}", result.errors);
3662        let Directive::Open(open) = &result.directives[0].value else {
3663            panic!("expected Open");
3664        };
3665        assert_eq!(open.date, naive_date(2024, 1, 1).unwrap());
3666    }
3667
3668    #[test]
3669    fn date_with_month_out_of_range_emits_invalid_date_value() {
3670        let result = parse_via_cst("2024-13-01 open Assets:Cash USD\n");
3671        let invalid_date: Vec<_> = result
3672            .errors
3673            .iter()
3674            .filter_map(|e| match &e.kind {
3675                crate::ParseErrorKind::InvalidDateValue(s) => Some(s.clone()),
3676                _ => None,
3677            })
3678            .collect();
3679        assert_eq!(invalid_date.len(), 1, "errors: {:?}", result.errors);
3680        let msg = &invalid_date[0];
3681        assert!(
3682            msg.contains("month") && msg.contains("out of range"),
3683            "msg: {msg}"
3684        );
3685    }
3686
3687    #[test]
3688    fn date_with_invalid_leap_year_emits_invalid_date_value() {
3689        let result = parse_via_cst("2023-02-29 open Assets:Cash USD\n");
3690        let invalid_date: Vec<_> = result
3691            .errors
3692            .iter()
3693            .filter_map(|e| match &e.kind {
3694                crate::ParseErrorKind::InvalidDateValue(s) => Some(s.clone()),
3695                _ => None,
3696            })
3697            .collect();
3698        assert_eq!(invalid_date.len(), 1, "errors: {:?}", result.errors);
3699        let msg = &invalid_date[0];
3700        assert!(
3701            msg.contains("day") && msg.contains("out of range") && msg.contains("2023-02"),
3702            "msg: {msg}"
3703        );
3704    }
3705
3706    #[test]
3707    fn date_with_completely_invalid_value_still_emits_error() {
3708        // `2024-13-45` has BOTH month and day out of range; any
3709        // error variant satisfies the original integration test's
3710        // `!result.errors.is_empty()` assertion.
3711        let result = parse_via_cst("2024-13-45 open Assets:Bank\n");
3712        assert!(!result.errors.is_empty(), "errors: {:?}", result.errors);
3713    }
3714
3715    #[test]
3716    fn open_directive_without_account_emits_error() {
3717        // `2024-01-01 open` with no account is rejected by legacy
3718        // via the top-level error-recovery path. CST emits the
3719        // catch-all `SyntaxError` from `parse_via_cst`'s
3720        // is_directive_producing/errors_before tracker.
3721        let result = parse_via_cst("2024-01-01 open\n");
3722        assert!(!result.errors.is_empty(), "errors: {:?}", result.errors);
3723    }
3724
3725    #[test]
3726    fn open_directive_with_lowercase_account_emits_error() {
3727        // `lowercase:invalid` doesn't match the ACCOUNT regex
3728        // (uppercase first letter required), so the open directive
3729        // has no ACCOUNT child. Same catch-all path as the no-
3730        // account case.
3731        let result = parse_via_cst("2024-01-01 open lowercase:invalid\n");
3732        assert!(!result.errors.is_empty(), "errors: {:?}", result.errors);
3733    }
3734
3735    #[test]
3736    fn incomplete_open_at_eof_emits_error() {
3737        // Regression for the PR #740 "incomplete-at-EOF" finding:
3738        // `2024-01-01 open` at EOF with no trailing newline must
3739        // not be silently dropped.
3740        let result = parse_via_cst("2024-01-01 open");
3741        assert!(!result.errors.is_empty(), "errors: {:?}", result.errors);
3742    }
3743
3744    #[test]
3745    fn balance_directive_without_amount_emits_error() {
3746        let result = parse_via_cst("2024-01-15 balance Assets:Checking\n");
3747        assert!(!result.errors.is_empty(), "errors: {:?}", result.errors);
3748    }
3749
3750    #[test]
3751    fn pad_directive_without_source_account_emits_error() {
3752        let result = parse_via_cst("2024-01-15 pad Assets:Checking\n");
3753        assert!(!result.errors.is_empty(), "errors: {:?}", result.errors);
3754    }
3755
3756    #[test]
3757    fn cost_spec_n_hash_t_uses_total_form() {
3758        use rust_decimal_macros::dec;
3759        let src = "2024-01-01 open Assets:Stock\n\
3760                   2024-01-01 open Assets:Cash USD\n\
3761                   2024-01-15 *\n  \
3762                   Assets:Stock  10 STK {50 # 1500 USD}\n  \
3763                   Assets:Cash  -1500.00 USD\n";
3764        let result = parse_via_cst(src);
3765        assert!(result.errors.is_empty(), "errors: {:?}", result.errors);
3766        let Directive::Transaction(txn) = &result.directives[2].value else {
3767            panic!("expected Transaction at index 2");
3768        };
3769        let cost = txn.postings[0]
3770            .value
3771            .cost
3772            .as_ref()
3773            .expect("cost spec present");
3774        assert_eq!(
3775            cost.number,
3776            Some(CostNumber::Total { value: dec!(1500) }),
3777            "the `{{N # T CCY}}` form must store the post-`#` total"
3778        );
3779    }
3780
3781    #[test]
3782    fn unclosed_cost_brace_emits_error() {
3783        let src = "2024-01-01 open Assets:Stock\n\
3784                   2024-01-01 open Assets:Cash USD\n\
3785                   2024-01-15 *\n  \
3786                   Assets:Stock 10 AAPL {150 USD\n  \
3787                   Assets:Cash -1500 USD\n";
3788        let result = parse_via_cst(src);
3789        let has_unclosed: bool = result
3790            .errors
3791            .iter()
3792            .any(|e| e.message().contains("unclosed cost"));
3793        assert!(
3794            has_unclosed,
3795            "expected 'unclosed cost' error, got: {:?}",
3796            result.errors
3797        );
3798    }
3799
3800    #[test]
3801    fn unclosed_cost_brace_at_eof_emits_error() {
3802        let src = "2024-01-01 open Assets:Stock\n\
3803                   2024-01-01 open Assets:Cash USD\n\
3804                   2024-01-15 *\n  \
3805                   Assets:Stock 10 AAPL {150 USD";
3806        let result = parse_via_cst(src);
3807        let has_unclosed: bool = result
3808            .errors
3809            .iter()
3810            .any(|e| e.message().contains("unclosed cost"));
3811        assert!(
3812            has_unclosed,
3813            "expected 'unclosed cost' error at EOF, got: {:?}",
3814            result.errors
3815        );
3816    }
3817
3818    #[test]
3819    fn leading_decimal_in_posting_amount_emits_error() {
3820        // `.50 USD` (no integer part before the decimal) must be
3821        // rejected by both parsers; valid `0.50 USD` still works
3822        // (covered by other tests).
3823        let src = "2024-01-15 * \"Test\"\n  \
3824                   Expenses:Food  .50 USD\n  \
3825                   Assets:Checking\n";
3826        let result = parse_via_cst(src);
3827        assert!(!result.errors.is_empty(), "errors: {:?}", result.errors);
3828    }
3829
3830    #[test]
3831    fn transaction_with_metadata_on_directive_and_posting() {
3832        let src = "2024-01-15 * \"x\"\n  \
3833                   tag1: \"hello\"\n  \
3834                   Assets:Cash  -5 USD\n    \
3835                       receipt: \"abc123\"\n";
3836        let result = parse_via_cst(src);
3837        let Directive::Transaction(t) = &result.directives[0].value else {
3838            panic!("expected Transaction");
3839        };
3840        assert_eq!(
3841            t.meta.get("tag1"),
3842            Some(&MetaValue::String("hello".to_string()))
3843        );
3844        let p_meta = &t.postings[0].value.meta;
3845        assert_eq!(
3846            p_meta.get("receipt"),
3847            Some(&MetaValue::String("abc123".to_string()))
3848        );
3849    }
3850
3851    /// Pins the `ERROR_NODE` exclusion contract on
3852    /// `account_occurrences`. The rustdoc on `ParseResult::
3853    /// account_occurrences` distinguishes two failure modes:
3854    ///
3855    /// - **Typed-conversion failure** (e.g. `InvalidBookingMethod`
3856    ///   on an `open` whose booking string is garbage): the CST is
3857    ///   intact, the `ACCOUNT` node is NOT inside `ERROR_NODE`, so
3858    ///   the token IS tracked. The LSP rename can still hit it
3859    ///   during mid-edit.
3860    /// - **CST-recovery wrap**: a directive so garbled that the
3861    ///   CST wraps the region in `ERROR_NODE`. The `ACCOUNT` token
3862    ///   is inside `ERROR_NODE`, NOT tracked.
3863    ///
3864    /// The two policies are deliberate. This test pins both.
3865    #[test]
3866    fn account_occurrences_policy_for_failing_directives() {
3867        // Case A: typed-conversion failure. `open Assets:Bank
3868        // "GARBAGE"` parses syntactically but fails the booking-
3869        // method whitelist. The ACCOUNT token IS tracked.
3870        let src = "2024-01-01 open Assets:Bank \"GARBAGE\"\n";
3871        let r = parse_via_cst(src);
3872        assert!(
3873            r.account_occurrences
3874                .iter()
3875                .any(|o| o.value == "Assets:Bank"),
3876            "typed-conversion failure should keep the ACCOUNT token in \
3877             account_occurrences (got {:?}); rename mid-edit relies on this",
3878            r.account_occurrences,
3879        );
3880
3881        // Case B: CST-recovery wrap. `opn Assets:Bank USD` (typo
3882        // `opn`) is unrecognized at the directive position and the
3883        // recovery walker wraps it in ERROR_NODE. The ACCOUNT
3884        // token is excluded.
3885        let src = "2024-01-01 opn Assets:Bank USD\n";
3886        let r = parse_via_cst(src);
3887        assert!(
3888            !r.account_occurrences
3889                .iter()
3890                .any(|o| o.value == "Assets:Bank"),
3891            "ERROR_NODE-wrapped ACCOUNT should be EXCLUDED from \
3892             account_occurrences (got {:?}); rename should not hit garbled \
3893             mid-edit syntax",
3894            r.account_occurrences,
3895        );
3896    }
3897}