rustledger-parser 0.16.0

Beancount parser with error recovery and full syntax support
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
//! `SyntaxKind`: every kind of token or node that can appear in the
//! Beancount CST.
//!
//! Design notes:
//!
//! - **No cross-version stability commitment.** Phase 2+ may add new
//!   variants. No serialized form persists `SyntaxKind` values across
//!   binary versions (rowan green trees aren't designed as on-disk
//!   format).
//! - **APPEND-ONLY in practice.** The corpus baseline at
//!   `tests/baselines/cst-corpus.manifest` hashes
//!   `(SyntaxKind as u16, len)` per token for every file in the 714-
//!   file compatibility corpus, AND a separate per-file node-shape
//!   hash. Reordering variants invalidates every committed manifest
//!   entry simultaneously, producing an unreviewable 700-line diff.
//!   The rule for routine work: APPEND new variants at the relevant
//!   section's end. If you genuinely must reorder, do it in a
//!   SEPARATE commit from any parser change so reviewers can verify
//!   the regen is mechanical.
//! - **Safe u16 conversion via `num_enum::TryFromPrimitive`** instead
//!   of a hand-rolled match table. Adding a new variant is a single
//!   line; the derive enforces parity.
//! - **`is_token` via `matches!` over the actual token variants**, not
//!   a boundary trick on discriminants. A future variant inserted
//!   anywhere is classified correctly.
//! - **`kind_from_raw` falls back to `ERROR_NODE` on unknown
//!   discriminants** in release builds (`debug_assert!` panics in
//!   debug/test). Defends against version-skewed green-node bytes
//!   reaching the parser via LSP cache, sidecar tooling, or
//!   incremental persistence without crashing production. Surfaces
//!   the skew loudly in dev/test where it's actionable.

use num_enum::TryFromPrimitive;

/// Every kind of token or node that can appear in a Beancount CST.
///
/// Tokens carry source bytes; nodes are containers. The Logos lexer
/// produces a stream of tokens; the structured parser (phase 2+) wraps
/// runs of those tokens in nodes.
#[allow(non_camel_case_types)]
// Variant naming follows the rust-analyzer / rowan convention
// (SCREAMING_SNAKE_CASE). Variants without dedicated rustdoc are
// 1:1 mirrors of `logos_lexer::Token` (keywords, punctuation) and
// are documented at the parent enum + lossless_tokens::map_kind.
#[allow(missing_docs)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, TryFromPrimitive)]
#[repr(u16)]
#[non_exhaustive]
pub enum SyntaxKind {
    // ---- Trivia tokens ---------------------------------------------------
    /// 3-byte UTF-8 BOM at the very start of a file. Synthesized by
    /// the CST builder; the Logos lexer never sees BOM bytes because
    /// `bom::strip_leading` runs first.
    BOM,
    /// Horizontal whitespace `[ \t]+`.
    WHITESPACE,
    /// `\r?\n`.
    NEWLINE,
    /// `; ...` to end-of-line.
    COMMENT,
    /// `% ...` to end-of-line (ledger-compat).
    PERCENT_COMMENT,
    /// `#! ...` (org-mode shebang at top of file).
    SHEBANG,
    /// `#+ ...` (org-mode property line).
    EMACS_DIRECTIVE,

    // ---- Literal tokens --------------------------------------------------
    /// `YYYY-MM-DD` or `YYYY/M/D`.
    DATE,
    /// Integer or decimal literal.
    NUMBER,
    /// Double-quoted string with escape sequences.
    STRING,
    /// Account name (`Assets:Bank:Checking`).
    ACCOUNT,
    /// Currency symbol (`USD`, `/GAINS`).
    CURRENCY,
    /// `#tag`.
    TAG,
    /// `^link`.
    LINK,
    /// `meta-key:` at line start.
    META_KEY,
    /// Single-character flag introducing a transaction.
    FLAG,
    /// `TRUE` / `True` / `true`.
    BOOL_TRUE,
    /// `FALSE` / `False` / `false`.
    BOOL_FALSE,
    /// `NULL`.
    NULL_KW,

    // ---- Keyword tokens --------------------------------------------------
    TXN_KW,
    BALANCE_KW,
    OPEN_KW,
    CLOSE_KW,
    COMMODITY_KW,
    PAD_KW,
    EVENT_KW,
    QUERY_KW,
    NOTE_KW,
    DOCUMENT_KW,
    PRICE_KW,
    CUSTOM_KW,
    OPTION_KW,
    INCLUDE_KW,
    PLUGIN_KW,
    PUSHTAG_KW,
    POPTAG_KW,
    PUSHMETA_KW,
    POPMETA_KW,
    /// `P` pending flag.
    PENDING_KW,

    // ---- Punctuation tokens ---------------------------------------------
    L_BRACE,
    R_BRACE,
    L_DOUBLE_BRACE,
    R_DOUBLE_BRACE,
    L_BRACE_HASH,
    L_PAREN,
    R_PAREN,
    AT,
    AT_AT,
    COLON,
    COMMA,
    TILDE,
    PIPE,
    PLUS,
    MINUS,
    STAR,
    SLASH,
    /// Bare `#` (cost-spec date separator; line-start `#` is folded
    /// into `COMMENT` by the lexer post-processing pass).
    HASH,

    // ---- Error token -----------------------------------------------------
    /// Bytes the lexer could not classify. Preserved in the CST for
    /// round-trip and diagnostics.
    ERROR_TOKEN,

    // ---- Node kinds ------------------------------------------------------
    //
    // Structural node kinds are added at the moment they're first
    // needed. Phase 1 emitted only `SOURCE_FILE` (plus `ERROR_NODE`
    // reserved for phase 2's structured recovery). Phase 2.0 adds
    // `DIRECTIVE` because the trivia-policy regression tests need a
    // wrapper to demonstrate which directive owns which trivia.
    // Phase 2.1 will introduce specific directive kinds
    // (`TRANSACTION`, `OPEN_DIRECTIVE`, ...) alongside `DIRECTIVE`,
    // which remains as the umbrella kind for error-recovery
    // wrappers and any structural test reusable across kinds.
    // `#[non_exhaustive]` + `num_enum`'s derive make new variants
    // safe to add without ABI concerns. (Append-only discipline
    // and discriminant stability notes live in the module
    // rustdoc.)
    /// Root node — every byte of the file is reachable under this node.
    SOURCE_FILE,

    /// Generic error-recovery wrapper. Phase 1 didn't emit this
    /// (lexer errors surface as `ERROR_TOKEN` leaves). **Phase 2.4**
    /// wraps each unrecognized / malformed top-level line in
    /// `ERROR_NODE` so downstream consumers can identify malformed
    /// regions structurally rather than scanning flat `SOURCE_FILE`
    /// children for stray content. Same trivia attachment policy as
    /// recognized directives (rule 2 of `cst::trivia`); per rule 5,
    /// an unterminated final `ERROR_NODE` at EOF still wraps and
    /// simply has no NEWLINE child.
    ERROR_NODE,

    /// Generic structural-directive wrapper. Phase 2.0 introduced it
    /// as the regression-test target for the trivia attachment
    /// policy. Phase 2.1a (this section) adds specific kinds
    /// alongside it; `DIRECTIVE` remains as the umbrella kind for
    /// error-recovery wrappers around partial-directive fragments
    /// AND as a structural test target where the shape is the same
    /// across all directive kinds.
    DIRECTIVE,

    // Phase 2.1a: specific directive kinds for the 14 single-line
    // directives. The trivia attachment policy (see `cst::trivia`)
    // applies UNIFORMLY to each. Each wraps its content tokens +
    // same-line trailing trivia + terminator NEWLINE per the
    // Directive-Terminator Rule.
    // OPTION/INCLUDE/PLUGIN/CUSTOM are edge directives (PR 2.3);
    // absent here.
    OPEN_DIRECTIVE,
    CLOSE_DIRECTIVE,
    BALANCE_DIRECTIVE,
    PAD_DIRECTIVE,
    EVENT_DIRECTIVE,
    QUERY_DIRECTIVE,
    NOTE_DIRECTIVE,
    DOCUMENT_DIRECTIVE,
    PRICE_DIRECTIVE,
    COMMODITY_DIRECTIVE,
    PUSHTAG_DIRECTIVE,
    POPTAG_DIRECTIVE,
    PUSHMETA_DIRECTIVE,
    POPMETA_DIRECTIVE,

    // Phase 2.3: edge directives that were previously falling
    // through to the unrecognized-line passthrough.
    //
    // - `OPTION_DIRECTIVE`: top-level `option "key" "value"`.
    // - `INCLUDE_DIRECTIVE`: top-level `include "path"`.
    // - `PLUGIN_DIRECTIVE`: top-level `plugin "module" ["config"]`
    //   (config string is optional).
    // - `CUSTOM_DIRECTIVE`: dated `DATE custom "type" values...`
    //   — like the 14 single-line dated directives, with an
    //   arbitrary trailing value list (STRING / ACCOUNT /
    //   AMOUNT-shape / DATE / CURRENCY / BOOL_TRUE / BOOL_FALSE).
    //
    // All four follow the single-line directive body shape
    // (header + optional indented metadata sub-lines via
    // `emit_directive_body`). The body / metadata story is
    // identical to PR 2.1a's 10 dated + 4 standalone-keyword
    // directives — only the header recognition is new.
    OPTION_DIRECTIVE,
    INCLUDE_DIRECTIVE,
    PLUGIN_DIRECTIVE,
    CUSTOM_DIRECTIVE,

    // Phase 2.1b: TRANSACTION wrapper. Trigger is DATE followed by
    // one of: STAR (`*`) / PENDING_KW (`!`) / FLAG (P/S/T/C/U/R/M/?/&)
    // / HASH (`#` promoted to flag) / TXN_KW (`txn`) / STRING
    // (implied-txn shorthand `2024-01-15 "Coffee"`) / single-char
    // CURRENCY (ticker letters T/V/F/X/...). Mirrors the legacy
    // AST parser at parser.rs:1707-1715. Body spans header +
    // indented posting and metadata sub-lines until next top-
    // level construct or EOF. POSTING wrapping landed in PR 2.2b;
    // AMOUNT / COST_SPEC / PRICE_ANNOTATION inside POSTING is PR 2.2c.
    TRANSACTION,

    // Phase 2.2a: META_ENTRY wraps each `WS META_KEY ... (NEWLINE
    // | EOF)` indented metadata sub-line inside a directive or
    // transaction. An unterminated final metadata sub-line at EOF
    // (per rule 5 of `cst::trivia`) is still wrapped — its
    // META_ENTRY simply has no NEWLINE child. Sub-node contents
    // stay flat (token-level access to the value); typed AST
    // wrappers in phase 3 will surface `key()` and `value()`
    // accessors. Indented `;`-comments interleaved with metadata
    // stay as flat children of the parent directive, not META_ENTRY
    // children.
    META_ENTRY,

    // Phase 2.2b: POSTING wraps each posting sub-line inside a
    // TRANSACTION. Recognition shape is `WS [(FLAG | STAR |
    // PENDING_KW | HASH | single-char CURRENCY) WS] ACCOUNT ...`
    // followed by an optional amount / cost spec / price
    // annotation, terminated by NEWLINE or EOF (per rule 5 an
    // unterminated final posting at EOF still gets wrapped — its
    // POSTING simply has no NEWLINE child). The flag arm mirrors
    // `parse_flag` in the legacy AST parser; HASH is `#`-promoted-
    // to-flag and single-char CURRENCY covers ticker letters like
    // T/V/F/X that win the lexer's priority-3 Currency-vs-Flag
    // tie-break. Posting-attached metadata — META_ENTRY sub-lines
    // strictly more indented than the POSTING's own indent —
    // becomes a child of the POSTING; a META_ENTRY at the same
    // indent terminates the POSTING and stays at TRANSACTION
    // level. AMOUNT / COST_SPEC / PRICE_ANNOTATION sub-nodes
    // inside POSTING are PR 2.2c (below).
    POSTING,

    // Phase 2.2c: AMOUNT wraps the units-amount portion of a
    // posting line, i.e. `[(MINUS | PLUS)] NUMBER [WS CURRENCY]`,
    // a bare `NUMBER` (incomplete amount with no currency), or a
    // bare `CURRENCY` (currency-only amount). Appears after the
    // ACCOUNT and before any COST_SPEC / PRICE_ANNOTATION. Mirrors
    // the legacy AST `parse_incomplete_amount` shape: NUMBER plus
    // optional CURRENCY, or CURRENCY alone.
    //
    // **Scoped to postings only**: directive-header amounts
    // (`balance Assets:Cash 100 USD`, `price USD 1.10 EUR`) are
    // emitted FLAT by `emit_directive_body`'s
    // `emit_through_terminator`, NOT wrapped in AMOUNT. PAD has
    // no inline amount and is unaffected. Phase 3 typed-AST
    // accessors for `Balance::amount()` / `Price::amount()` will
    // need a different walking strategy (scan flat tokens after
    // the keyword) than `Posting::amount()` (find the AMOUNT
    // child). Pinned by
    // `balance_and_price_directive_header_amounts_stay_flat_not_wrapped`.
    AMOUNT,

    // Phase 2.2c: COST_SPEC wraps a bracketed cost annotation
    // inside a posting line, i.e. `L_BRACE ... R_BRACE`,
    // `L_BRACE_HASH ... R_BRACE` (per-unit + total), or
    // `L_DOUBLE_BRACE ... R_DOUBLE_BRACE` (total-only). Contents
    // stay flat children of COST_SPEC for now (phase 3 typed-AST
    // will surface accessors); an unclosed brace at EOF still
    // gets wrapped (the COST_SPEC simply has no matching closing
    // brace child) per rule 5.
    COST_SPEC,

    // Phase 2.2c: PRICE_ANNOTATION wraps a price clause inside a
    // posting line, i.e. `(AT | AT_AT) [WS AMOUNT]`. Beancount
    // uses `@` for per-unit price and `@@` for total price. The
    // trailing amount IS recursively wrapped in an AMOUNT
    // sub-node mirroring the units-amount case; the typed-AST
    // decodes per-unit-vs-total by inspecting the opener token
    // kind (`AT` vs `AT_AT`) and walks the `AMOUNT` child for the
    // number and currency.
    PRICE_ANNOTATION,
}

impl SyntaxKind {
    /// Returns true if this kind is a leaf token (carries source bytes
    /// directly) rather than a parent node. Uses explicit `matches!`
    /// over the token variants so a future variant inserted anywhere
    /// in the enum is classified correctly.
    #[must_use]
    pub const fn is_token(self) -> bool {
        matches!(
            self,
            Self::BOM
                | Self::WHITESPACE
                | Self::NEWLINE
                | Self::COMMENT
                | Self::PERCENT_COMMENT
                | Self::SHEBANG
                | Self::EMACS_DIRECTIVE
                | Self::DATE
                | Self::NUMBER
                | Self::STRING
                | Self::ACCOUNT
                | Self::CURRENCY
                | Self::TAG
                | Self::LINK
                | Self::META_KEY
                | Self::FLAG
                | Self::BOOL_TRUE
                | Self::BOOL_FALSE
                | Self::NULL_KW
                | Self::TXN_KW
                | Self::BALANCE_KW
                | Self::OPEN_KW
                | Self::CLOSE_KW
                | Self::COMMODITY_KW
                | Self::PAD_KW
                | Self::EVENT_KW
                | Self::QUERY_KW
                | Self::NOTE_KW
                | Self::DOCUMENT_KW
                | Self::PRICE_KW
                | Self::CUSTOM_KW
                | Self::OPTION_KW
                | Self::INCLUDE_KW
                | Self::PLUGIN_KW
                | Self::PUSHTAG_KW
                | Self::POPTAG_KW
                | Self::PUSHMETA_KW
                | Self::POPMETA_KW
                | Self::PENDING_KW
                | Self::L_BRACE
                | Self::R_BRACE
                | Self::L_DOUBLE_BRACE
                | Self::R_DOUBLE_BRACE
                | Self::L_BRACE_HASH
                | Self::L_PAREN
                | Self::R_PAREN
                | Self::AT
                | Self::AT_AT
                | Self::COLON
                | Self::COMMA
                | Self::TILDE
                | Self::PIPE
                | Self::PLUS
                | Self::MINUS
                | Self::STAR
                | Self::SLASH
                | Self::HASH
                | Self::ERROR_TOKEN
        )
    }

    /// Returns true if this kind is trivia (whitespace, newline, BOM,
    /// or a comment variant). Trivia is byte-significant but
    /// semantically uninteresting; typed AST methods skip it.
    /// `ERROR_TOKEN` is NOT trivia: errors must surface.
    #[must_use]
    pub const fn is_trivia(self) -> bool {
        matches!(
            self,
            Self::BOM
                | Self::WHITESPACE
                | Self::NEWLINE
                | Self::COMMENT
                | Self::PERCENT_COMMENT
                | Self::SHEBANG
                | Self::EMACS_DIRECTIVE
        )
    }
}

impl From<SyntaxKind> for rowan::SyntaxKind {
    fn from(kind: SyntaxKind) -> Self {
        Self(kind as u16)
    }
}

/// Tag enum for `rowan::Language`. Zero variants — only used as a
/// type-level marker.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum BeancountLanguage {}

impl rowan::Language for BeancountLanguage {
    type Kind = SyntaxKind;

    fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
        // Dev/test: panic loudly so version-skewed green-node bytes
        // surface during development, when they're actionable. Prod:
        // fall back to ERROR_NODE so an unrecoverable panic deep in
        // rowan's tree walk (rowan calls kind_from_raw inside every
        // tree traversal) can't take down a long-running LSP from a
        // single stale cache file.
        //
        // The asymmetry with `SyntaxKind::try_from` is deliberate:
        // try_from is for explicit roundtrip validation (e.g.,
        // serializing a kind and reading it back, where Err is the
        // useful signal); kind_from_raw is for tree-walk hot paths
        // (where panic in prod is worse than a downgraded kind).
        debug_assert!(
            SyntaxKind::try_from(raw.0).is_ok(),
            "unknown SyntaxKind discriminant {} — cross-version GreenNode \
             skew, manifest reorder corruption, or a missing num_enum \
             derive update. In release builds this becomes ERROR_NODE.",
            raw.0,
        );
        SyntaxKind::try_from(raw.0).unwrap_or(SyntaxKind::ERROR_NODE)
    }

    fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
        kind.into()
    }
}

/// `rowan::SyntaxNode` specialized to `BeancountLanguage`.
pub type SyntaxNode = rowan::SyntaxNode<BeancountLanguage>;
/// `rowan::SyntaxToken` specialized to `BeancountLanguage`.
pub type SyntaxToken = rowan::SyntaxToken<BeancountLanguage>;
/// `rowan::SyntaxElement` (token-or-node) specialized to `BeancountLanguage`.
pub type SyntaxElement = rowan::SyntaxElement<BeancountLanguage>;

#[cfg(test)]
mod tests {
    use super::*;

    /// `is_token` and "is a node" are complementary: every variant is
    /// exactly one of token or node. A future variant added in the
    /// wrong category (or forgotten in either `matches!` list) would
    /// fail this property.
    #[test]
    fn nodes_are_not_tokens() {
        let node_kinds = [
            SyntaxKind::SOURCE_FILE,
            SyntaxKind::ERROR_NODE,
            SyntaxKind::DIRECTIVE,
            SyntaxKind::OPEN_DIRECTIVE,
            SyntaxKind::CLOSE_DIRECTIVE,
            SyntaxKind::BALANCE_DIRECTIVE,
            SyntaxKind::PAD_DIRECTIVE,
            SyntaxKind::EVENT_DIRECTIVE,
            SyntaxKind::QUERY_DIRECTIVE,
            SyntaxKind::NOTE_DIRECTIVE,
            SyntaxKind::DOCUMENT_DIRECTIVE,
            SyntaxKind::PRICE_DIRECTIVE,
            SyntaxKind::COMMODITY_DIRECTIVE,
            SyntaxKind::PUSHTAG_DIRECTIVE,
            SyntaxKind::POPTAG_DIRECTIVE,
            SyntaxKind::PUSHMETA_DIRECTIVE,
            SyntaxKind::POPMETA_DIRECTIVE,
            SyntaxKind::OPTION_DIRECTIVE,
            SyntaxKind::INCLUDE_DIRECTIVE,
            SyntaxKind::PLUGIN_DIRECTIVE,
            SyntaxKind::CUSTOM_DIRECTIVE,
            SyntaxKind::TRANSACTION,
            SyntaxKind::META_ENTRY,
            SyntaxKind::POSTING,
            SyntaxKind::AMOUNT,
            SyntaxKind::COST_SPEC,
            SyntaxKind::PRICE_ANNOTATION,
        ];
        for kind in node_kinds {
            assert!(
                !kind.is_token(),
                "{kind:?} is a node but is_token() returns true",
            );
        }
    }

    /// Closed-form exhaustiveness check that catches the failure
    /// mode the two hand-maintained lists (`nodes_are_not_tokens`
    /// and `tokens_are_tokens`) miss in isolation: a future variant
    /// added to the enum but forgotten in `is_token`'s `matches!` arm
    /// AND in both hand-maintained test lists.
    ///
    /// We enumerate every valid discriminant via the `num_enum`
    /// `try_from` derive — the same surface `kind_from_raw` uses —
    /// then count how many fall into each category, and compare to
    /// the documented node list. If the counts disagree, a variant
    /// was added without updating the test scaffolding.
    #[test]
    fn every_kind_partitions_token_xor_node() {
        // FULL `u16::MAX` sweep, not a sampling. SyntaxKind is
        // #[repr(u16)] so any discriminant in [0, u16::MAX] is
        // legally constructible by a future PR. ~65K try_from
        // calls is sub-millisecond and catches a future PR that
        // pushes new variants past any arbitrary upper bound.
        let all_kinds: Vec<SyntaxKind> = (0u16..=u16::MAX)
            .filter_map(|d| SyntaxKind::try_from(d).ok())
            .collect();

        // Sanity: we found something (catches a bug where
        // try_from is broken for ALL discriminants).
        assert!(
            !all_kinds.is_empty(),
            "SyntaxKind::try_from rejected every discriminant 0..256",
        );

        // The documented node kinds — must be kept in sync with
        // the `// ---- Node kinds ----` section of the enum above.
        // The exhaustive iteration catches any drift.
        let documented_nodes = [
            SyntaxKind::SOURCE_FILE,
            SyntaxKind::ERROR_NODE,
            SyntaxKind::DIRECTIVE,
            SyntaxKind::OPEN_DIRECTIVE,
            SyntaxKind::CLOSE_DIRECTIVE,
            SyntaxKind::BALANCE_DIRECTIVE,
            SyntaxKind::PAD_DIRECTIVE,
            SyntaxKind::EVENT_DIRECTIVE,
            SyntaxKind::QUERY_DIRECTIVE,
            SyntaxKind::NOTE_DIRECTIVE,
            SyntaxKind::DOCUMENT_DIRECTIVE,
            SyntaxKind::PRICE_DIRECTIVE,
            SyntaxKind::COMMODITY_DIRECTIVE,
            SyntaxKind::PUSHTAG_DIRECTIVE,
            SyntaxKind::POPTAG_DIRECTIVE,
            SyntaxKind::PUSHMETA_DIRECTIVE,
            SyntaxKind::POPMETA_DIRECTIVE,
            SyntaxKind::OPTION_DIRECTIVE,
            SyntaxKind::INCLUDE_DIRECTIVE,
            SyntaxKind::PLUGIN_DIRECTIVE,
            SyntaxKind::CUSTOM_DIRECTIVE,
            SyntaxKind::TRANSACTION,
            SyntaxKind::META_ENTRY,
            SyntaxKind::POSTING,
            SyntaxKind::AMOUNT,
            SyntaxKind::COST_SPEC,
            SyntaxKind::PRICE_ANNOTATION,
        ];
        let observed_nodes: Vec<SyntaxKind> = all_kinds
            .iter()
            .copied()
            .filter(|k| !k.is_token())
            .collect();

        assert_eq!(
            observed_nodes.len(),
            documented_nodes.len(),
            "is_token() says there are {} node kinds but the \
             documented list has {}: observed={observed_nodes:?}, \
             documented={documented_nodes:?}. A new SyntaxKind \
             variant was added without updating is_token's matches! \
             arm AND the documented_nodes list in this test.",
            observed_nodes.len(),
            documented_nodes.len(),
        );
        for kind in documented_nodes {
            assert!(
                observed_nodes.contains(&kind),
                "{kind:?} is documented as a node but is_token() \
                 returns true for it",
            );
        }
    }

    /// Inverse of `nodes_are_not_tokens`: every token kind must satisfy
    /// `is_token()`. Catches a future variant added to the enum but
    /// forgotten in the `matches!` arm of `is_token`, which would
    /// silently misclassify at runtime while passing the
    /// `nodes_are_not_tokens` test.
    #[test]
    fn tokens_are_tokens() {
        let token_kinds = [
            // Trivia
            SyntaxKind::BOM,
            SyntaxKind::WHITESPACE,
            SyntaxKind::NEWLINE,
            SyntaxKind::COMMENT,
            SyntaxKind::PERCENT_COMMENT,
            SyntaxKind::SHEBANG,
            SyntaxKind::EMACS_DIRECTIVE,
            // Literals
            SyntaxKind::DATE,
            SyntaxKind::NUMBER,
            SyntaxKind::STRING,
            SyntaxKind::ACCOUNT,
            SyntaxKind::CURRENCY,
            SyntaxKind::TAG,
            SyntaxKind::LINK,
            SyntaxKind::META_KEY,
            SyntaxKind::FLAG,
            SyntaxKind::BOOL_TRUE,
            SyntaxKind::BOOL_FALSE,
            SyntaxKind::NULL_KW,
            // Keywords
            SyntaxKind::TXN_KW,
            SyntaxKind::BALANCE_KW,
            SyntaxKind::OPEN_KW,
            SyntaxKind::CLOSE_KW,
            SyntaxKind::COMMODITY_KW,
            SyntaxKind::PAD_KW,
            SyntaxKind::EVENT_KW,
            SyntaxKind::QUERY_KW,
            SyntaxKind::NOTE_KW,
            SyntaxKind::DOCUMENT_KW,
            SyntaxKind::PRICE_KW,
            SyntaxKind::CUSTOM_KW,
            SyntaxKind::OPTION_KW,
            SyntaxKind::INCLUDE_KW,
            SyntaxKind::PLUGIN_KW,
            SyntaxKind::PUSHTAG_KW,
            SyntaxKind::POPTAG_KW,
            SyntaxKind::PUSHMETA_KW,
            SyntaxKind::POPMETA_KW,
            SyntaxKind::PENDING_KW,
            // Punctuation
            SyntaxKind::L_BRACE,
            SyntaxKind::R_BRACE,
            SyntaxKind::L_DOUBLE_BRACE,
            SyntaxKind::R_DOUBLE_BRACE,
            SyntaxKind::L_BRACE_HASH,
            SyntaxKind::L_PAREN,
            SyntaxKind::R_PAREN,
            SyntaxKind::AT,
            SyntaxKind::AT_AT,
            SyntaxKind::COLON,
            SyntaxKind::COMMA,
            SyntaxKind::TILDE,
            SyntaxKind::PIPE,
            SyntaxKind::PLUS,
            SyntaxKind::MINUS,
            SyntaxKind::STAR,
            SyntaxKind::SLASH,
            SyntaxKind::HASH,
            // Error
            SyntaxKind::ERROR_TOKEN,
        ];
        for kind in token_kinds {
            assert!(
                kind.is_token(),
                "{kind:?} is a token but is_token() returns false — \
                 likely missing from the matches! arm in is_token",
            );
        }
    }

    #[test]
    fn is_trivia_excludes_error_token() {
        // ERROR_TOKEN is byte-significant but NOT trivia: it represents
        // bytes the lexer couldn't classify, and downstream consumers
        // need to surface them rather than skip them.
        assert!(!SyntaxKind::ERROR_TOKEN.is_trivia());
        assert!(SyntaxKind::ERROR_TOKEN.is_token());
    }

    #[test]
    fn rowan_language_round_trip() {
        // num_enum::TryFromPrimitive ensures the conversion is sound
        // for every defined discriminant. Spot-check a representative
        // sample including the boundaries.
        for kind in [
            SyntaxKind::BOM,
            SyntaxKind::WHITESPACE,
            SyntaxKind::HASH,
            SyntaxKind::ERROR_TOKEN,
            SyntaxKind::SOURCE_FILE,
            SyntaxKind::ERROR_NODE,
        ] {
            let raw: rowan::SyntaxKind = kind.into();
            let back = <BeancountLanguage as rowan::Language>::kind_from_raw(raw);
            assert_eq!(kind, back);
        }
    }
}