Skip to main content

rustledger_parser/cst/
syntax_kind.rs

1//! `SyntaxKind`: every kind of token or node that can appear in the
2//! Beancount CST.
3//!
4//! Design notes:
5//!
6//! - **No cross-version stability commitment.** Phase 2+ may add new
7//!   variants. No serialized form persists `SyntaxKind` values across
8//!   binary versions (rowan green trees aren't designed as on-disk
9//!   format).
10//! - **APPEND-ONLY in practice.** The corpus baseline at
11//!   `tests/baselines/cst-corpus.manifest` hashes
12//!   `(SyntaxKind as u16, len)` per token for every file in the 714-
13//!   file compatibility corpus, AND a separate per-file node-shape
14//!   hash. Reordering variants invalidates every committed manifest
15//!   entry simultaneously, producing an unreviewable 700-line diff.
16//!   The rule for routine work: APPEND new variants at the relevant
17//!   section's end. If you genuinely must reorder, do it in a
18//!   SEPARATE commit from any parser change so reviewers can verify
19//!   the regen is mechanical.
20//! - **Safe u16 conversion via `num_enum::TryFromPrimitive`** instead
21//!   of a hand-rolled match table. Adding a new variant is a single
22//!   line; the derive enforces parity.
23//! - **`is_token` via `matches!` over the actual token variants**, not
24//!   a boundary trick on discriminants. A future variant inserted
25//!   anywhere is classified correctly.
26//! - **`kind_from_raw` falls back to `ERROR_NODE` on unknown
27//!   discriminants** in release builds (`debug_assert!` panics in
28//!   debug/test). Defends against version-skewed green-node bytes
29//!   reaching the parser via LSP cache, sidecar tooling, or
30//!   incremental persistence without crashing production. Surfaces
31//!   the skew loudly in dev/test where it's actionable.
32
33use num_enum::TryFromPrimitive;
34
35/// Every kind of token or node that can appear in a Beancount CST.
36///
37/// Tokens carry source bytes; nodes are containers. The Logos lexer
38/// produces a stream of tokens; the structured parser (phase 2+) wraps
39/// runs of those tokens in nodes.
40#[allow(non_camel_case_types)]
41// Variant naming follows the rust-analyzer / rowan convention
42// (SCREAMING_SNAKE_CASE). Variants without dedicated rustdoc are
43// 1:1 mirrors of `logos_lexer::Token` (keywords, punctuation) and
44// are documented at the parent enum + lossless_tokens::map_kind.
45#[allow(missing_docs)]
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, TryFromPrimitive)]
47#[repr(u16)]
48#[non_exhaustive]
49pub enum SyntaxKind {
50    // ---- Trivia tokens ---------------------------------------------------
51    /// 3-byte UTF-8 BOM at the very start of a file. Synthesized by
52    /// the CST builder; the Logos lexer never sees BOM bytes because
53    /// `bom::strip_leading` runs first.
54    BOM,
55    /// Horizontal whitespace `[ \t]+`.
56    WHITESPACE,
57    /// `\r?\n`.
58    NEWLINE,
59    /// `; ...` to end-of-line.
60    COMMENT,
61    /// `% ...` to end-of-line (ledger-compat).
62    PERCENT_COMMENT,
63    /// `#! ...` (org-mode shebang at top of file).
64    SHEBANG,
65    /// `#+ ...` (org-mode property line).
66    EMACS_DIRECTIVE,
67
68    // ---- Literal tokens --------------------------------------------------
69    /// `YYYY-MM-DD` or `YYYY/M/D`.
70    DATE,
71    /// Integer or decimal literal.
72    NUMBER,
73    /// Double-quoted string with escape sequences.
74    STRING,
75    /// Account name (`Assets:Bank:Checking`).
76    ACCOUNT,
77    /// Currency symbol (`USD`, `/GAINS`).
78    CURRENCY,
79    /// `#tag`.
80    TAG,
81    /// `^link`.
82    LINK,
83    /// `meta-key:` at line start.
84    META_KEY,
85    /// Single-character flag introducing a transaction.
86    FLAG,
87    /// `TRUE` / `True` / `true`.
88    BOOL_TRUE,
89    /// `FALSE` / `False` / `false`.
90    BOOL_FALSE,
91    /// `NULL`.
92    NULL_KW,
93
94    // ---- Keyword tokens --------------------------------------------------
95    TXN_KW,
96    BALANCE_KW,
97    OPEN_KW,
98    CLOSE_KW,
99    COMMODITY_KW,
100    PAD_KW,
101    EVENT_KW,
102    QUERY_KW,
103    NOTE_KW,
104    DOCUMENT_KW,
105    PRICE_KW,
106    CUSTOM_KW,
107    OPTION_KW,
108    INCLUDE_KW,
109    PLUGIN_KW,
110    PUSHTAG_KW,
111    POPTAG_KW,
112    PUSHMETA_KW,
113    POPMETA_KW,
114    /// `P` pending flag.
115    PENDING_KW,
116
117    // ---- Punctuation tokens ---------------------------------------------
118    L_BRACE,
119    R_BRACE,
120    L_DOUBLE_BRACE,
121    R_DOUBLE_BRACE,
122    L_BRACE_HASH,
123    L_PAREN,
124    R_PAREN,
125    AT,
126    AT_AT,
127    COLON,
128    COMMA,
129    TILDE,
130    PIPE,
131    PLUS,
132    MINUS,
133    STAR,
134    SLASH,
135    /// Bare `#` (cost-spec date separator; line-start `#` is folded
136    /// into `COMMENT` by the lexer post-processing pass).
137    HASH,
138
139    // ---- Error token -----------------------------------------------------
140    /// Bytes the lexer could not classify. Preserved in the CST for
141    /// round-trip and diagnostics.
142    ERROR_TOKEN,
143
144    // ---- Node kinds ------------------------------------------------------
145    //
146    // Structural node kinds are added at the moment they're first
147    // needed. Phase 1 emitted only `SOURCE_FILE` (plus `ERROR_NODE`
148    // reserved for phase 2's structured recovery). Phase 2.0 adds
149    // `DIRECTIVE` because the trivia-policy regression tests need a
150    // wrapper to demonstrate which directive owns which trivia.
151    // Phase 2.1 will introduce specific directive kinds
152    // (`TRANSACTION`, `OPEN_DIRECTIVE`, ...) alongside `DIRECTIVE`,
153    // which remains as the umbrella kind for error-recovery
154    // wrappers and any structural test reusable across kinds.
155    // `#[non_exhaustive]` + `num_enum`'s derive make new variants
156    // safe to add without ABI concerns. (Append-only discipline
157    // and discriminant stability notes live in the module
158    // rustdoc.)
159    /// Root node — every byte of the file is reachable under this node.
160    SOURCE_FILE,
161
162    /// Generic error-recovery wrapper. Phase 1 didn't emit this
163    /// (lexer errors surface as `ERROR_TOKEN` leaves). **Phase 2.4**
164    /// wraps each unrecognized / malformed top-level line in
165    /// `ERROR_NODE` so downstream consumers can identify malformed
166    /// regions structurally rather than scanning flat `SOURCE_FILE`
167    /// children for stray content. Same trivia attachment policy as
168    /// recognized directives (rule 2 of `cst::trivia`); per rule 5,
169    /// an unterminated final `ERROR_NODE` at EOF still wraps and
170    /// simply has no NEWLINE child.
171    ERROR_NODE,
172
173    /// Generic structural-directive wrapper. Phase 2.0 introduced it
174    /// as the regression-test target for the trivia attachment
175    /// policy. Phase 2.1a (this section) adds specific kinds
176    /// alongside it; `DIRECTIVE` remains as the umbrella kind for
177    /// error-recovery wrappers around partial-directive fragments
178    /// AND as a structural test target where the shape is the same
179    /// across all directive kinds.
180    DIRECTIVE,
181
182    // Phase 2.1a: specific directive kinds for the 14 single-line
183    // directives. The trivia attachment policy (see `cst::trivia`)
184    // applies UNIFORMLY to each. Each wraps its content tokens +
185    // same-line trailing trivia + terminator NEWLINE per the
186    // Directive-Terminator Rule.
187    // OPTION/INCLUDE/PLUGIN/CUSTOM are edge directives (PR 2.3);
188    // absent here.
189    OPEN_DIRECTIVE,
190    CLOSE_DIRECTIVE,
191    BALANCE_DIRECTIVE,
192    PAD_DIRECTIVE,
193    EVENT_DIRECTIVE,
194    QUERY_DIRECTIVE,
195    NOTE_DIRECTIVE,
196    DOCUMENT_DIRECTIVE,
197    PRICE_DIRECTIVE,
198    COMMODITY_DIRECTIVE,
199    PUSHTAG_DIRECTIVE,
200    POPTAG_DIRECTIVE,
201    PUSHMETA_DIRECTIVE,
202    POPMETA_DIRECTIVE,
203
204    // Phase 2.3: edge directives that were previously falling
205    // through to the unrecognized-line passthrough.
206    //
207    // - `OPTION_DIRECTIVE`: top-level `option "key" "value"`.
208    // - `INCLUDE_DIRECTIVE`: top-level `include "path"`.
209    // - `PLUGIN_DIRECTIVE`: top-level `plugin "module" ["config"]`
210    //   (config string is optional).
211    // - `CUSTOM_DIRECTIVE`: dated `DATE custom "type" values...`
212    //   — like the 14 single-line dated directives, with an
213    //   arbitrary trailing value list (STRING / ACCOUNT /
214    //   AMOUNT-shape / DATE / CURRENCY / BOOL_TRUE / BOOL_FALSE).
215    //
216    // All four follow the single-line directive body shape
217    // (header + optional indented metadata sub-lines via
218    // `emit_directive_body`). The body / metadata story is
219    // identical to PR 2.1a's 10 dated + 4 standalone-keyword
220    // directives — only the header recognition is new.
221    OPTION_DIRECTIVE,
222    INCLUDE_DIRECTIVE,
223    PLUGIN_DIRECTIVE,
224    CUSTOM_DIRECTIVE,
225
226    // Phase 2.1b: TRANSACTION wrapper. Trigger is DATE followed by
227    // one of: STAR (`*`) / PENDING_KW (`!`) / FLAG (P/S/T/C/U/R/M/?/&)
228    // / HASH (`#` promoted to flag) / TXN_KW (`txn`) / STRING
229    // (implied-txn shorthand `2024-01-15 "Coffee"`) / single-char
230    // CURRENCY (ticker letters T/V/F/X/...). Mirrors the legacy
231    // AST parser at parser.rs:1707-1715. Body spans header +
232    // indented posting and metadata sub-lines until next top-
233    // level construct or EOF. POSTING wrapping landed in PR 2.2b;
234    // AMOUNT / COST_SPEC / PRICE_ANNOTATION inside POSTING is PR 2.2c.
235    TRANSACTION,
236
237    // Phase 2.2a: META_ENTRY wraps each `WS META_KEY ... (NEWLINE
238    // | EOF)` indented metadata sub-line inside a directive or
239    // transaction. An unterminated final metadata sub-line at EOF
240    // (per rule 5 of `cst::trivia`) is still wrapped — its
241    // META_ENTRY simply has no NEWLINE child. Sub-node contents
242    // stay flat (token-level access to the value); typed AST
243    // wrappers in phase 3 will surface `key()` and `value()`
244    // accessors. Indented `;`-comments interleaved with metadata
245    // stay as flat children of the parent directive, not META_ENTRY
246    // children.
247    META_ENTRY,
248
249    // Phase 2.2b: POSTING wraps each posting sub-line inside a
250    // TRANSACTION. Recognition shape is `WS [(FLAG | STAR |
251    // PENDING_KW | HASH | single-char CURRENCY) WS] ACCOUNT ...`
252    // followed by an optional amount / cost spec / price
253    // annotation, terminated by NEWLINE or EOF (per rule 5 an
254    // unterminated final posting at EOF still gets wrapped — its
255    // POSTING simply has no NEWLINE child). The flag arm mirrors
256    // `parse_flag` in the legacy AST parser; HASH is `#`-promoted-
257    // to-flag and single-char CURRENCY covers ticker letters like
258    // T/V/F/X that win the lexer's priority-3 Currency-vs-Flag
259    // tie-break. Posting-attached metadata — META_ENTRY sub-lines
260    // strictly more indented than the POSTING's own indent —
261    // becomes a child of the POSTING; a META_ENTRY at the same
262    // indent terminates the POSTING and stays at TRANSACTION
263    // level. AMOUNT / COST_SPEC / PRICE_ANNOTATION sub-nodes
264    // inside POSTING are PR 2.2c (below).
265    POSTING,
266
267    // Phase 2.2c: AMOUNT wraps the units-amount portion of a
268    // posting line, i.e. `[(MINUS | PLUS)] NUMBER [WS CURRENCY]`,
269    // a bare `NUMBER` (incomplete amount with no currency), or a
270    // bare `CURRENCY` (currency-only amount). Appears after the
271    // ACCOUNT and before any COST_SPEC / PRICE_ANNOTATION. Mirrors
272    // the legacy AST `parse_incomplete_amount` shape: NUMBER plus
273    // optional CURRENCY, or CURRENCY alone.
274    //
275    // **Scoped to postings only**: directive-header amounts
276    // (`balance Assets:Cash 100 USD`, `price USD 1.10 EUR`) are
277    // emitted FLAT by `emit_directive_body`'s
278    // `emit_through_terminator`, NOT wrapped in AMOUNT. PAD has
279    // no inline amount and is unaffected. Phase 3 typed-AST
280    // accessors for `Balance::amount()` / `Price::amount()` will
281    // need a different walking strategy (scan flat tokens after
282    // the keyword) than `Posting::amount()` (find the AMOUNT
283    // child). Pinned by
284    // `balance_and_price_directive_header_amounts_stay_flat_not_wrapped`.
285    AMOUNT,
286
287    // Phase 2.2c: COST_SPEC wraps a bracketed cost annotation
288    // inside a posting line, i.e. `L_BRACE ... R_BRACE`,
289    // `L_BRACE_HASH ... R_BRACE` (per-unit + total), or
290    // `L_DOUBLE_BRACE ... R_DOUBLE_BRACE` (total-only). Contents
291    // stay flat children of COST_SPEC for now (phase 3 typed-AST
292    // will surface accessors); an unclosed brace at EOF still
293    // gets wrapped (the COST_SPEC simply has no matching closing
294    // brace child) per rule 5.
295    COST_SPEC,
296
297    // Phase 2.2c: PRICE_ANNOTATION wraps a price clause inside a
298    // posting line, i.e. `(AT | AT_AT) [WS AMOUNT]`. Beancount
299    // uses `@` for per-unit price and `@@` for total price. The
300    // trailing amount IS recursively wrapped in an AMOUNT
301    // sub-node mirroring the units-amount case; the typed-AST
302    // decodes per-unit-vs-total by inspecting the opener token
303    // kind (`AT` vs `AT_AT`) and walks the `AMOUNT` child for the
304    // number and currency.
305    PRICE_ANNOTATION,
306}
307
308impl SyntaxKind {
309    /// Returns true if this kind is a leaf token (carries source bytes
310    /// directly) rather than a parent node. Uses explicit `matches!`
311    /// over the token variants so a future variant inserted anywhere
312    /// in the enum is classified correctly.
313    #[must_use]
314    pub const fn is_token(self) -> bool {
315        matches!(
316            self,
317            Self::BOM
318                | Self::WHITESPACE
319                | Self::NEWLINE
320                | Self::COMMENT
321                | Self::PERCENT_COMMENT
322                | Self::SHEBANG
323                | Self::EMACS_DIRECTIVE
324                | Self::DATE
325                | Self::NUMBER
326                | Self::STRING
327                | Self::ACCOUNT
328                | Self::CURRENCY
329                | Self::TAG
330                | Self::LINK
331                | Self::META_KEY
332                | Self::FLAG
333                | Self::BOOL_TRUE
334                | Self::BOOL_FALSE
335                | Self::NULL_KW
336                | Self::TXN_KW
337                | Self::BALANCE_KW
338                | Self::OPEN_KW
339                | Self::CLOSE_KW
340                | Self::COMMODITY_KW
341                | Self::PAD_KW
342                | Self::EVENT_KW
343                | Self::QUERY_KW
344                | Self::NOTE_KW
345                | Self::DOCUMENT_KW
346                | Self::PRICE_KW
347                | Self::CUSTOM_KW
348                | Self::OPTION_KW
349                | Self::INCLUDE_KW
350                | Self::PLUGIN_KW
351                | Self::PUSHTAG_KW
352                | Self::POPTAG_KW
353                | Self::PUSHMETA_KW
354                | Self::POPMETA_KW
355                | Self::PENDING_KW
356                | Self::L_BRACE
357                | Self::R_BRACE
358                | Self::L_DOUBLE_BRACE
359                | Self::R_DOUBLE_BRACE
360                | Self::L_BRACE_HASH
361                | Self::L_PAREN
362                | Self::R_PAREN
363                | Self::AT
364                | Self::AT_AT
365                | Self::COLON
366                | Self::COMMA
367                | Self::TILDE
368                | Self::PIPE
369                | Self::PLUS
370                | Self::MINUS
371                | Self::STAR
372                | Self::SLASH
373                | Self::HASH
374                | Self::ERROR_TOKEN
375        )
376    }
377
378    /// Returns true if this kind is trivia (whitespace, newline, BOM,
379    /// or a comment variant). Trivia is byte-significant but
380    /// semantically uninteresting; typed AST methods skip it.
381    /// `ERROR_TOKEN` is NOT trivia: errors must surface.
382    #[must_use]
383    pub const fn is_trivia(self) -> bool {
384        matches!(
385            self,
386            Self::BOM
387                | Self::WHITESPACE
388                | Self::NEWLINE
389                | Self::COMMENT
390                | Self::PERCENT_COMMENT
391                | Self::SHEBANG
392                | Self::EMACS_DIRECTIVE
393        )
394    }
395}
396
397impl From<SyntaxKind> for rowan::SyntaxKind {
398    fn from(kind: SyntaxKind) -> Self {
399        Self(kind as u16)
400    }
401}
402
403/// Tag enum for `rowan::Language`. Zero variants — only used as a
404/// type-level marker.
405#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
406pub enum BeancountLanguage {}
407
408impl rowan::Language for BeancountLanguage {
409    type Kind = SyntaxKind;
410
411    fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
412        // Dev/test: panic loudly so version-skewed green-node bytes
413        // surface during development, when they're actionable. Prod:
414        // fall back to ERROR_NODE so an unrecoverable panic deep in
415        // rowan's tree walk (rowan calls kind_from_raw inside every
416        // tree traversal) can't take down a long-running LSP from a
417        // single stale cache file.
418        //
419        // The asymmetry with `SyntaxKind::try_from` is deliberate:
420        // try_from is for explicit roundtrip validation (e.g.,
421        // serializing a kind and reading it back, where Err is the
422        // useful signal); kind_from_raw is for tree-walk hot paths
423        // (where panic in prod is worse than a downgraded kind).
424        debug_assert!(
425            SyntaxKind::try_from(raw.0).is_ok(),
426            "unknown SyntaxKind discriminant {} — cross-version GreenNode \
427             skew, manifest reorder corruption, or a missing num_enum \
428             derive update. In release builds this becomes ERROR_NODE.",
429            raw.0,
430        );
431        SyntaxKind::try_from(raw.0).unwrap_or(SyntaxKind::ERROR_NODE)
432    }
433
434    fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
435        kind.into()
436    }
437}
438
439/// `rowan::SyntaxNode` specialized to `BeancountLanguage`.
440pub type SyntaxNode = rowan::SyntaxNode<BeancountLanguage>;
441/// `rowan::SyntaxToken` specialized to `BeancountLanguage`.
442pub type SyntaxToken = rowan::SyntaxToken<BeancountLanguage>;
443/// `rowan::SyntaxElement` (token-or-node) specialized to `BeancountLanguage`.
444pub type SyntaxElement = rowan::SyntaxElement<BeancountLanguage>;
445
446#[cfg(test)]
447mod tests {
448    use super::*;
449
450    /// `is_token` and "is a node" are complementary: every variant is
451    /// exactly one of token or node. A future variant added in the
452    /// wrong category (or forgotten in either `matches!` list) would
453    /// fail this property.
454    #[test]
455    fn nodes_are_not_tokens() {
456        let node_kinds = [
457            SyntaxKind::SOURCE_FILE,
458            SyntaxKind::ERROR_NODE,
459            SyntaxKind::DIRECTIVE,
460            SyntaxKind::OPEN_DIRECTIVE,
461            SyntaxKind::CLOSE_DIRECTIVE,
462            SyntaxKind::BALANCE_DIRECTIVE,
463            SyntaxKind::PAD_DIRECTIVE,
464            SyntaxKind::EVENT_DIRECTIVE,
465            SyntaxKind::QUERY_DIRECTIVE,
466            SyntaxKind::NOTE_DIRECTIVE,
467            SyntaxKind::DOCUMENT_DIRECTIVE,
468            SyntaxKind::PRICE_DIRECTIVE,
469            SyntaxKind::COMMODITY_DIRECTIVE,
470            SyntaxKind::PUSHTAG_DIRECTIVE,
471            SyntaxKind::POPTAG_DIRECTIVE,
472            SyntaxKind::PUSHMETA_DIRECTIVE,
473            SyntaxKind::POPMETA_DIRECTIVE,
474            SyntaxKind::OPTION_DIRECTIVE,
475            SyntaxKind::INCLUDE_DIRECTIVE,
476            SyntaxKind::PLUGIN_DIRECTIVE,
477            SyntaxKind::CUSTOM_DIRECTIVE,
478            SyntaxKind::TRANSACTION,
479            SyntaxKind::META_ENTRY,
480            SyntaxKind::POSTING,
481            SyntaxKind::AMOUNT,
482            SyntaxKind::COST_SPEC,
483            SyntaxKind::PRICE_ANNOTATION,
484        ];
485        for kind in node_kinds {
486            assert!(
487                !kind.is_token(),
488                "{kind:?} is a node but is_token() returns true",
489            );
490        }
491    }
492
493    /// Closed-form exhaustiveness check that catches the failure
494    /// mode the two hand-maintained lists (`nodes_are_not_tokens`
495    /// and `tokens_are_tokens`) miss in isolation: a future variant
496    /// added to the enum but forgotten in `is_token`'s `matches!` arm
497    /// AND in both hand-maintained test lists.
498    ///
499    /// We enumerate every valid discriminant via the `num_enum`
500    /// `try_from` derive — the same surface `kind_from_raw` uses —
501    /// then count how many fall into each category, and compare to
502    /// the documented node list. If the counts disagree, a variant
503    /// was added without updating the test scaffolding.
504    #[test]
505    fn every_kind_partitions_token_xor_node() {
506        // FULL `u16::MAX` sweep, not a sampling. SyntaxKind is
507        // #[repr(u16)] so any discriminant in [0, u16::MAX] is
508        // legally constructible by a future PR. ~65K try_from
509        // calls is sub-millisecond and catches a future PR that
510        // pushes new variants past any arbitrary upper bound.
511        let all_kinds: Vec<SyntaxKind> = (0u16..=u16::MAX)
512            .filter_map(|d| SyntaxKind::try_from(d).ok())
513            .collect();
514
515        // Sanity: we found something (catches a bug where
516        // try_from is broken for ALL discriminants).
517        assert!(
518            !all_kinds.is_empty(),
519            "SyntaxKind::try_from rejected every discriminant 0..256",
520        );
521
522        // The documented node kinds — must be kept in sync with
523        // the `// ---- Node kinds ----` section of the enum above.
524        // The exhaustive iteration catches any drift.
525        let documented_nodes = [
526            SyntaxKind::SOURCE_FILE,
527            SyntaxKind::ERROR_NODE,
528            SyntaxKind::DIRECTIVE,
529            SyntaxKind::OPEN_DIRECTIVE,
530            SyntaxKind::CLOSE_DIRECTIVE,
531            SyntaxKind::BALANCE_DIRECTIVE,
532            SyntaxKind::PAD_DIRECTIVE,
533            SyntaxKind::EVENT_DIRECTIVE,
534            SyntaxKind::QUERY_DIRECTIVE,
535            SyntaxKind::NOTE_DIRECTIVE,
536            SyntaxKind::DOCUMENT_DIRECTIVE,
537            SyntaxKind::PRICE_DIRECTIVE,
538            SyntaxKind::COMMODITY_DIRECTIVE,
539            SyntaxKind::PUSHTAG_DIRECTIVE,
540            SyntaxKind::POPTAG_DIRECTIVE,
541            SyntaxKind::PUSHMETA_DIRECTIVE,
542            SyntaxKind::POPMETA_DIRECTIVE,
543            SyntaxKind::OPTION_DIRECTIVE,
544            SyntaxKind::INCLUDE_DIRECTIVE,
545            SyntaxKind::PLUGIN_DIRECTIVE,
546            SyntaxKind::CUSTOM_DIRECTIVE,
547            SyntaxKind::TRANSACTION,
548            SyntaxKind::META_ENTRY,
549            SyntaxKind::POSTING,
550            SyntaxKind::AMOUNT,
551            SyntaxKind::COST_SPEC,
552            SyntaxKind::PRICE_ANNOTATION,
553        ];
554        let observed_nodes: Vec<SyntaxKind> = all_kinds
555            .iter()
556            .copied()
557            .filter(|k| !k.is_token())
558            .collect();
559
560        assert_eq!(
561            observed_nodes.len(),
562            documented_nodes.len(),
563            "is_token() says there are {} node kinds but the \
564             documented list has {}: observed={observed_nodes:?}, \
565             documented={documented_nodes:?}. A new SyntaxKind \
566             variant was added without updating is_token's matches! \
567             arm AND the documented_nodes list in this test.",
568            observed_nodes.len(),
569            documented_nodes.len(),
570        );
571        for kind in documented_nodes {
572            assert!(
573                observed_nodes.contains(&kind),
574                "{kind:?} is documented as a node but is_token() \
575                 returns true for it",
576            );
577        }
578    }
579
580    /// Inverse of `nodes_are_not_tokens`: every token kind must satisfy
581    /// `is_token()`. Catches a future variant added to the enum but
582    /// forgotten in the `matches!` arm of `is_token`, which would
583    /// silently misclassify at runtime while passing the
584    /// `nodes_are_not_tokens` test.
585    #[test]
586    fn tokens_are_tokens() {
587        let token_kinds = [
588            // Trivia
589            SyntaxKind::BOM,
590            SyntaxKind::WHITESPACE,
591            SyntaxKind::NEWLINE,
592            SyntaxKind::COMMENT,
593            SyntaxKind::PERCENT_COMMENT,
594            SyntaxKind::SHEBANG,
595            SyntaxKind::EMACS_DIRECTIVE,
596            // Literals
597            SyntaxKind::DATE,
598            SyntaxKind::NUMBER,
599            SyntaxKind::STRING,
600            SyntaxKind::ACCOUNT,
601            SyntaxKind::CURRENCY,
602            SyntaxKind::TAG,
603            SyntaxKind::LINK,
604            SyntaxKind::META_KEY,
605            SyntaxKind::FLAG,
606            SyntaxKind::BOOL_TRUE,
607            SyntaxKind::BOOL_FALSE,
608            SyntaxKind::NULL_KW,
609            // Keywords
610            SyntaxKind::TXN_KW,
611            SyntaxKind::BALANCE_KW,
612            SyntaxKind::OPEN_KW,
613            SyntaxKind::CLOSE_KW,
614            SyntaxKind::COMMODITY_KW,
615            SyntaxKind::PAD_KW,
616            SyntaxKind::EVENT_KW,
617            SyntaxKind::QUERY_KW,
618            SyntaxKind::NOTE_KW,
619            SyntaxKind::DOCUMENT_KW,
620            SyntaxKind::PRICE_KW,
621            SyntaxKind::CUSTOM_KW,
622            SyntaxKind::OPTION_KW,
623            SyntaxKind::INCLUDE_KW,
624            SyntaxKind::PLUGIN_KW,
625            SyntaxKind::PUSHTAG_KW,
626            SyntaxKind::POPTAG_KW,
627            SyntaxKind::PUSHMETA_KW,
628            SyntaxKind::POPMETA_KW,
629            SyntaxKind::PENDING_KW,
630            // Punctuation
631            SyntaxKind::L_BRACE,
632            SyntaxKind::R_BRACE,
633            SyntaxKind::L_DOUBLE_BRACE,
634            SyntaxKind::R_DOUBLE_BRACE,
635            SyntaxKind::L_BRACE_HASH,
636            SyntaxKind::L_PAREN,
637            SyntaxKind::R_PAREN,
638            SyntaxKind::AT,
639            SyntaxKind::AT_AT,
640            SyntaxKind::COLON,
641            SyntaxKind::COMMA,
642            SyntaxKind::TILDE,
643            SyntaxKind::PIPE,
644            SyntaxKind::PLUS,
645            SyntaxKind::MINUS,
646            SyntaxKind::STAR,
647            SyntaxKind::SLASH,
648            SyntaxKind::HASH,
649            // Error
650            SyntaxKind::ERROR_TOKEN,
651        ];
652        for kind in token_kinds {
653            assert!(
654                kind.is_token(),
655                "{kind:?} is a token but is_token() returns false — \
656                 likely missing from the matches! arm in is_token",
657            );
658        }
659    }
660
661    #[test]
662    fn is_trivia_excludes_error_token() {
663        // ERROR_TOKEN is byte-significant but NOT trivia: it represents
664        // bytes the lexer couldn't classify, and downstream consumers
665        // need to surface them rather than skip them.
666        assert!(!SyntaxKind::ERROR_TOKEN.is_trivia());
667        assert!(SyntaxKind::ERROR_TOKEN.is_token());
668    }
669
670    #[test]
671    fn rowan_language_round_trip() {
672        // num_enum::TryFromPrimitive ensures the conversion is sound
673        // for every defined discriminant. Spot-check a representative
674        // sample including the boundaries.
675        for kind in [
676            SyntaxKind::BOM,
677            SyntaxKind::WHITESPACE,
678            SyntaxKind::HASH,
679            SyntaxKind::ERROR_TOKEN,
680            SyntaxKind::SOURCE_FILE,
681            SyntaxKind::ERROR_NODE,
682        ] {
683            let raw: rowan::SyntaxKind = kind.into();
684            let back = <BeancountLanguage as rowan::Language>::kind_from_raw(raw);
685            assert_eq!(kind, back);
686        }
687    }
688}