rustledger_parser/cst/syntax_kind.rs
1//! `SyntaxKind`: every kind of token or node that can appear in the
2//! Beancount CST.
3//!
4//! Design notes:
5//!
6//! - **No cross-version stability commitment.** Phase 2+ may add new
7//! variants. No serialized form persists `SyntaxKind` values across
8//! binary versions (rowan green trees aren't designed as on-disk
9//! format).
10//! - **APPEND-ONLY in practice.** The corpus baseline at
11//! `tests/baselines/cst-corpus.manifest` hashes
12//! `(SyntaxKind as u16, len)` per token for every file in the 714-
13//! file compatibility corpus, AND a separate per-file node-shape
14//! hash. Reordering variants invalidates every committed manifest
15//! entry simultaneously, producing an unreviewable 700-line diff.
16//! The rule for routine work: APPEND new variants at the relevant
17//! section's end. If you genuinely must reorder, do it in a
18//! SEPARATE commit from any parser change so reviewers can verify
19//! the regen is mechanical.
20//! - **Safe u16 conversion via `num_enum::TryFromPrimitive`** instead
21//! of a hand-rolled match table. Adding a new variant is a single
22//! line; the derive enforces parity.
23//! - **`is_token` via `matches!` over the actual token variants**, not
24//! a boundary trick on discriminants. A future variant inserted
25//! anywhere is classified correctly.
26//! - **`kind_from_raw` falls back to `ERROR_NODE` on unknown
27//! discriminants** in release builds (`debug_assert!` panics in
28//! debug/test). Defends against version-skewed green-node bytes
29//! reaching the parser via LSP cache, sidecar tooling, or
30//! incremental persistence without crashing production. Surfaces
31//! the skew loudly in dev/test where it's actionable.
32
33use num_enum::TryFromPrimitive;
34
35/// Every kind of token or node that can appear in a Beancount CST.
36///
37/// Tokens carry source bytes; nodes are containers. The Logos lexer
38/// produces a stream of tokens; the structured parser (phase 2+) wraps
39/// runs of those tokens in nodes.
40#[allow(non_camel_case_types)]
41// Variant naming follows the rust-analyzer / rowan convention
42// (SCREAMING_SNAKE_CASE). Variants without dedicated rustdoc are
43// 1:1 mirrors of `logos_lexer::Token` (keywords, punctuation) and
44// are documented at the parent enum + lossless_tokens::map_kind.
45#[allow(missing_docs)]
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, TryFromPrimitive)]
47#[repr(u16)]
48#[non_exhaustive]
49pub enum SyntaxKind {
50 // ---- Trivia tokens ---------------------------------------------------
51 /// 3-byte UTF-8 BOM at the very start of a file. Synthesized by
52 /// the CST builder; the Logos lexer never sees BOM bytes because
53 /// `bom::strip_leading` runs first.
54 BOM,
55 /// Horizontal whitespace `[ \t]+`.
56 WHITESPACE,
57 /// `\r?\n`.
58 NEWLINE,
59 /// `; ...` to end-of-line.
60 COMMENT,
61 /// `% ...` to end-of-line (ledger-compat).
62 PERCENT_COMMENT,
63 /// `#! ...` (org-mode shebang at top of file).
64 SHEBANG,
65 /// `#+ ...` (org-mode property line).
66 EMACS_DIRECTIVE,
67
68 // ---- Literal tokens --------------------------------------------------
69 /// `YYYY-MM-DD` or `YYYY/M/D`.
70 DATE,
71 /// Integer or decimal literal.
72 NUMBER,
73 /// Double-quoted string with escape sequences.
74 STRING,
75 /// Account name (`Assets:Bank:Checking`).
76 ACCOUNT,
77 /// Currency symbol (`USD`, `/GAINS`).
78 CURRENCY,
79 /// `#tag`.
80 TAG,
81 /// `^link`.
82 LINK,
83 /// `meta-key:` at line start.
84 META_KEY,
85 /// Single-character flag introducing a transaction.
86 FLAG,
87 /// `TRUE` / `True` / `true`.
88 BOOL_TRUE,
89 /// `FALSE` / `False` / `false`.
90 BOOL_FALSE,
91 /// `NULL`.
92 NULL_KW,
93
94 // ---- Keyword tokens --------------------------------------------------
95 TXN_KW,
96 BALANCE_KW,
97 OPEN_KW,
98 CLOSE_KW,
99 COMMODITY_KW,
100 PAD_KW,
101 EVENT_KW,
102 QUERY_KW,
103 NOTE_KW,
104 DOCUMENT_KW,
105 PRICE_KW,
106 CUSTOM_KW,
107 OPTION_KW,
108 INCLUDE_KW,
109 PLUGIN_KW,
110 PUSHTAG_KW,
111 POPTAG_KW,
112 PUSHMETA_KW,
113 POPMETA_KW,
114 /// `P` pending flag.
115 PENDING_KW,
116
117 // ---- Punctuation tokens ---------------------------------------------
118 L_BRACE,
119 R_BRACE,
120 L_DOUBLE_BRACE,
121 R_DOUBLE_BRACE,
122 L_BRACE_HASH,
123 L_PAREN,
124 R_PAREN,
125 AT,
126 AT_AT,
127 COLON,
128 COMMA,
129 TILDE,
130 PIPE,
131 PLUS,
132 MINUS,
133 STAR,
134 SLASH,
135 /// Bare `#` (cost-spec date separator; line-start `#` is folded
136 /// into `COMMENT` by the lexer post-processing pass).
137 HASH,
138
139 // ---- Error token -----------------------------------------------------
140 /// Bytes the lexer could not classify. Preserved in the CST for
141 /// round-trip and diagnostics.
142 ERROR_TOKEN,
143
144 // ---- Node kinds ------------------------------------------------------
145 //
146 // Structural node kinds are added at the moment they're first
147 // needed. Phase 1 emitted only `SOURCE_FILE` (plus `ERROR_NODE`
148 // reserved for phase 2's structured recovery). Phase 2.0 adds
149 // `DIRECTIVE` because the trivia-policy regression tests need a
150 // wrapper to demonstrate which directive owns which trivia.
151 // Phase 2.1 will introduce specific directive kinds
152 // (`TRANSACTION`, `OPEN_DIRECTIVE`, ...) alongside `DIRECTIVE`,
153 // which remains as the umbrella kind for error-recovery
154 // wrappers and any structural test reusable across kinds.
155 // `#[non_exhaustive]` + `num_enum`'s derive make new variants
156 // safe to add without ABI concerns. (Append-only discipline
157 // and discriminant stability notes live in the module
158 // rustdoc.)
159 /// Root node — every byte of the file is reachable under this node.
160 SOURCE_FILE,
161
162 /// Generic error-recovery wrapper. Phase 1 didn't emit this
163 /// (lexer errors surface as `ERROR_TOKEN` leaves). **Phase 2.4**
164 /// wraps each unrecognized / malformed top-level line in
165 /// `ERROR_NODE` so downstream consumers can identify malformed
166 /// regions structurally rather than scanning flat `SOURCE_FILE`
167 /// children for stray content. Same trivia attachment policy as
168 /// recognized directives (rule 2 of `cst::trivia`); per rule 5,
169 /// an unterminated final `ERROR_NODE` at EOF still wraps and
170 /// simply has no NEWLINE child.
171 ERROR_NODE,
172
173 /// Generic structural-directive wrapper. Phase 2.0 introduced it
174 /// as the regression-test target for the trivia attachment
175 /// policy. Phase 2.1a (this section) adds specific kinds
176 /// alongside it; `DIRECTIVE` remains as the umbrella kind for
177 /// error-recovery wrappers around partial-directive fragments
178 /// AND as a structural test target where the shape is the same
179 /// across all directive kinds.
180 DIRECTIVE,
181
182 // Phase 2.1a: specific directive kinds for the 14 single-line
183 // directives. The trivia attachment policy (see `cst::trivia`)
184 // applies UNIFORMLY to each. Each wraps its content tokens +
185 // same-line trailing trivia + terminator NEWLINE per the
186 // Directive-Terminator Rule.
187 // OPTION/INCLUDE/PLUGIN/CUSTOM are edge directives (PR 2.3);
188 // absent here.
189 OPEN_DIRECTIVE,
190 CLOSE_DIRECTIVE,
191 BALANCE_DIRECTIVE,
192 PAD_DIRECTIVE,
193 EVENT_DIRECTIVE,
194 QUERY_DIRECTIVE,
195 NOTE_DIRECTIVE,
196 DOCUMENT_DIRECTIVE,
197 PRICE_DIRECTIVE,
198 COMMODITY_DIRECTIVE,
199 PUSHTAG_DIRECTIVE,
200 POPTAG_DIRECTIVE,
201 PUSHMETA_DIRECTIVE,
202 POPMETA_DIRECTIVE,
203
204 // Phase 2.3: edge directives that were previously falling
205 // through to the unrecognized-line passthrough.
206 //
207 // - `OPTION_DIRECTIVE`: top-level `option "key" "value"`.
208 // - `INCLUDE_DIRECTIVE`: top-level `include "path"`.
209 // - `PLUGIN_DIRECTIVE`: top-level `plugin "module" ["config"]`
210 // (config string is optional).
211 // - `CUSTOM_DIRECTIVE`: dated `DATE custom "type" values...`
212 // — like the 14 single-line dated directives, with an
213 // arbitrary trailing value list (STRING / ACCOUNT /
214 // AMOUNT-shape / DATE / CURRENCY / BOOL_TRUE / BOOL_FALSE).
215 //
216 // All four follow the single-line directive body shape
217 // (header + optional indented metadata sub-lines via
218 // `emit_directive_body`). The body / metadata story is
219 // identical to PR 2.1a's 10 dated + 4 standalone-keyword
220 // directives — only the header recognition is new.
221 OPTION_DIRECTIVE,
222 INCLUDE_DIRECTIVE,
223 PLUGIN_DIRECTIVE,
224 CUSTOM_DIRECTIVE,
225
226 // Phase 2.1b: TRANSACTION wrapper. Trigger is DATE followed by
227 // one of: STAR (`*`) / PENDING_KW (`!`) / FLAG (P/S/T/C/U/R/M/?/&)
228 // / HASH (`#` promoted to flag) / TXN_KW (`txn`) / STRING
229 // (implied-txn shorthand `2024-01-15 "Coffee"`) / single-char
230 // CURRENCY (ticker letters T/V/F/X/...). Mirrors the legacy
231 // AST parser at parser.rs:1707-1715. Body spans header +
232 // indented posting and metadata sub-lines until next top-
233 // level construct or EOF. POSTING wrapping landed in PR 2.2b;
234 // AMOUNT / COST_SPEC / PRICE_ANNOTATION inside POSTING is PR 2.2c.
235 TRANSACTION,
236
237 // Phase 2.2a: META_ENTRY wraps each `WS META_KEY ... (NEWLINE
238 // | EOF)` indented metadata sub-line inside a directive or
239 // transaction. An unterminated final metadata sub-line at EOF
240 // (per rule 5 of `cst::trivia`) is still wrapped — its
241 // META_ENTRY simply has no NEWLINE child. Sub-node contents
242 // stay flat (token-level access to the value); typed AST
243 // wrappers in phase 3 will surface `key()` and `value()`
244 // accessors. Indented `;`-comments interleaved with metadata
245 // stay as flat children of the parent directive, not META_ENTRY
246 // children.
247 META_ENTRY,
248
249 // Phase 2.2b: POSTING wraps each posting sub-line inside a
250 // TRANSACTION. Recognition shape is `WS [(FLAG | STAR |
251 // PENDING_KW | HASH | single-char CURRENCY) WS] ACCOUNT ...`
252 // followed by an optional amount / cost spec / price
253 // annotation, terminated by NEWLINE or EOF (per rule 5 an
254 // unterminated final posting at EOF still gets wrapped — its
255 // POSTING simply has no NEWLINE child). The flag arm mirrors
256 // `parse_flag` in the legacy AST parser; HASH is `#`-promoted-
257 // to-flag and single-char CURRENCY covers ticker letters like
258 // T/V/F/X that win the lexer's priority-3 Currency-vs-Flag
259 // tie-break. Posting-attached metadata — META_ENTRY sub-lines
260 // strictly more indented than the POSTING's own indent —
261 // becomes a child of the POSTING; a META_ENTRY at the same
262 // indent terminates the POSTING and stays at TRANSACTION
263 // level. AMOUNT / COST_SPEC / PRICE_ANNOTATION sub-nodes
264 // inside POSTING are PR 2.2c (below).
265 POSTING,
266
267 // Phase 2.2c: AMOUNT wraps the units-amount portion of a
268 // posting line, i.e. `[(MINUS | PLUS)] NUMBER [WS CURRENCY]`,
269 // a bare `NUMBER` (incomplete amount with no currency), or a
270 // bare `CURRENCY` (currency-only amount). Appears after the
271 // ACCOUNT and before any COST_SPEC / PRICE_ANNOTATION. Mirrors
272 // the legacy AST `parse_incomplete_amount` shape: NUMBER plus
273 // optional CURRENCY, or CURRENCY alone.
274 //
275 // **Scoped to postings only**: directive-header amounts
276 // (`balance Assets:Cash 100 USD`, `price USD 1.10 EUR`) are
277 // emitted FLAT by `emit_directive_body`'s
278 // `emit_through_terminator`, NOT wrapped in AMOUNT. PAD has
279 // no inline amount and is unaffected. Phase 3 typed-AST
280 // accessors for `Balance::amount()` / `Price::amount()` will
281 // need a different walking strategy (scan flat tokens after
282 // the keyword) than `Posting::amount()` (find the AMOUNT
283 // child). Pinned by
284 // `balance_and_price_directive_header_amounts_stay_flat_not_wrapped`.
285 AMOUNT,
286
287 // Phase 2.2c: COST_SPEC wraps a bracketed cost annotation
288 // inside a posting line, i.e. `L_BRACE ... R_BRACE`,
289 // `L_BRACE_HASH ... R_BRACE` (per-unit + total), or
290 // `L_DOUBLE_BRACE ... R_DOUBLE_BRACE` (total-only). Contents
291 // stay flat children of COST_SPEC for now (phase 3 typed-AST
292 // will surface accessors); an unclosed brace at EOF still
293 // gets wrapped (the COST_SPEC simply has no matching closing
294 // brace child) per rule 5.
295 COST_SPEC,
296
297 // Phase 2.2c: PRICE_ANNOTATION wraps a price clause inside a
298 // posting line, i.e. `(AT | AT_AT) [WS AMOUNT]`. Beancount
299 // uses `@` for per-unit price and `@@` for total price. The
300 // trailing amount IS recursively wrapped in an AMOUNT
301 // sub-node mirroring the units-amount case; the typed-AST
302 // decodes per-unit-vs-total by inspecting the opener token
303 // kind (`AT` vs `AT_AT`) and walks the `AMOUNT` child for the
304 // number and currency.
305 PRICE_ANNOTATION,
306}
307
308impl SyntaxKind {
309 /// Returns true if this kind is a leaf token (carries source bytes
310 /// directly) rather than a parent node. Uses explicit `matches!`
311 /// over the token variants so a future variant inserted anywhere
312 /// in the enum is classified correctly.
313 #[must_use]
314 pub const fn is_token(self) -> bool {
315 matches!(
316 self,
317 Self::BOM
318 | Self::WHITESPACE
319 | Self::NEWLINE
320 | Self::COMMENT
321 | Self::PERCENT_COMMENT
322 | Self::SHEBANG
323 | Self::EMACS_DIRECTIVE
324 | Self::DATE
325 | Self::NUMBER
326 | Self::STRING
327 | Self::ACCOUNT
328 | Self::CURRENCY
329 | Self::TAG
330 | Self::LINK
331 | Self::META_KEY
332 | Self::FLAG
333 | Self::BOOL_TRUE
334 | Self::BOOL_FALSE
335 | Self::NULL_KW
336 | Self::TXN_KW
337 | Self::BALANCE_KW
338 | Self::OPEN_KW
339 | Self::CLOSE_KW
340 | Self::COMMODITY_KW
341 | Self::PAD_KW
342 | Self::EVENT_KW
343 | Self::QUERY_KW
344 | Self::NOTE_KW
345 | Self::DOCUMENT_KW
346 | Self::PRICE_KW
347 | Self::CUSTOM_KW
348 | Self::OPTION_KW
349 | Self::INCLUDE_KW
350 | Self::PLUGIN_KW
351 | Self::PUSHTAG_KW
352 | Self::POPTAG_KW
353 | Self::PUSHMETA_KW
354 | Self::POPMETA_KW
355 | Self::PENDING_KW
356 | Self::L_BRACE
357 | Self::R_BRACE
358 | Self::L_DOUBLE_BRACE
359 | Self::R_DOUBLE_BRACE
360 | Self::L_BRACE_HASH
361 | Self::L_PAREN
362 | Self::R_PAREN
363 | Self::AT
364 | Self::AT_AT
365 | Self::COLON
366 | Self::COMMA
367 | Self::TILDE
368 | Self::PIPE
369 | Self::PLUS
370 | Self::MINUS
371 | Self::STAR
372 | Self::SLASH
373 | Self::HASH
374 | Self::ERROR_TOKEN
375 )
376 }
377
378 /// Returns true if this kind is trivia (whitespace, newline, BOM,
379 /// or a comment variant). Trivia is byte-significant but
380 /// semantically uninteresting; typed AST methods skip it.
381 /// `ERROR_TOKEN` is NOT trivia: errors must surface.
382 #[must_use]
383 pub const fn is_trivia(self) -> bool {
384 matches!(
385 self,
386 Self::BOM
387 | Self::WHITESPACE
388 | Self::NEWLINE
389 | Self::COMMENT
390 | Self::PERCENT_COMMENT
391 | Self::SHEBANG
392 | Self::EMACS_DIRECTIVE
393 )
394 }
395}
396
397impl From<SyntaxKind> for rowan::SyntaxKind {
398 fn from(kind: SyntaxKind) -> Self {
399 Self(kind as u16)
400 }
401}
402
403/// Tag enum for `rowan::Language`. Zero variants — only used as a
404/// type-level marker.
405#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
406pub enum BeancountLanguage {}
407
408impl rowan::Language for BeancountLanguage {
409 type Kind = SyntaxKind;
410
411 fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
412 // Dev/test: panic loudly so version-skewed green-node bytes
413 // surface during development, when they're actionable. Prod:
414 // fall back to ERROR_NODE so an unrecoverable panic deep in
415 // rowan's tree walk (rowan calls kind_from_raw inside every
416 // tree traversal) can't take down a long-running LSP from a
417 // single stale cache file.
418 //
419 // The asymmetry with `SyntaxKind::try_from` is deliberate:
420 // try_from is for explicit roundtrip validation (e.g.,
421 // serializing a kind and reading it back, where Err is the
422 // useful signal); kind_from_raw is for tree-walk hot paths
423 // (where panic in prod is worse than a downgraded kind).
424 debug_assert!(
425 SyntaxKind::try_from(raw.0).is_ok(),
426 "unknown SyntaxKind discriminant {} — cross-version GreenNode \
427 skew, manifest reorder corruption, or a missing num_enum \
428 derive update. In release builds this becomes ERROR_NODE.",
429 raw.0,
430 );
431 SyntaxKind::try_from(raw.0).unwrap_or(SyntaxKind::ERROR_NODE)
432 }
433
434 fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
435 kind.into()
436 }
437}
438
439/// `rowan::SyntaxNode` specialized to `BeancountLanguage`.
440pub type SyntaxNode = rowan::SyntaxNode<BeancountLanguage>;
441/// `rowan::SyntaxToken` specialized to `BeancountLanguage`.
442pub type SyntaxToken = rowan::SyntaxToken<BeancountLanguage>;
443/// `rowan::SyntaxElement` (token-or-node) specialized to `BeancountLanguage`.
444pub type SyntaxElement = rowan::SyntaxElement<BeancountLanguage>;
445
446#[cfg(test)]
447mod tests {
448 use super::*;
449
450 /// `is_token` and "is a node" are complementary: every variant is
451 /// exactly one of token or node. A future variant added in the
452 /// wrong category (or forgotten in either `matches!` list) would
453 /// fail this property.
454 #[test]
455 fn nodes_are_not_tokens() {
456 let node_kinds = [
457 SyntaxKind::SOURCE_FILE,
458 SyntaxKind::ERROR_NODE,
459 SyntaxKind::DIRECTIVE,
460 SyntaxKind::OPEN_DIRECTIVE,
461 SyntaxKind::CLOSE_DIRECTIVE,
462 SyntaxKind::BALANCE_DIRECTIVE,
463 SyntaxKind::PAD_DIRECTIVE,
464 SyntaxKind::EVENT_DIRECTIVE,
465 SyntaxKind::QUERY_DIRECTIVE,
466 SyntaxKind::NOTE_DIRECTIVE,
467 SyntaxKind::DOCUMENT_DIRECTIVE,
468 SyntaxKind::PRICE_DIRECTIVE,
469 SyntaxKind::COMMODITY_DIRECTIVE,
470 SyntaxKind::PUSHTAG_DIRECTIVE,
471 SyntaxKind::POPTAG_DIRECTIVE,
472 SyntaxKind::PUSHMETA_DIRECTIVE,
473 SyntaxKind::POPMETA_DIRECTIVE,
474 SyntaxKind::OPTION_DIRECTIVE,
475 SyntaxKind::INCLUDE_DIRECTIVE,
476 SyntaxKind::PLUGIN_DIRECTIVE,
477 SyntaxKind::CUSTOM_DIRECTIVE,
478 SyntaxKind::TRANSACTION,
479 SyntaxKind::META_ENTRY,
480 SyntaxKind::POSTING,
481 SyntaxKind::AMOUNT,
482 SyntaxKind::COST_SPEC,
483 SyntaxKind::PRICE_ANNOTATION,
484 ];
485 for kind in node_kinds {
486 assert!(
487 !kind.is_token(),
488 "{kind:?} is a node but is_token() returns true",
489 );
490 }
491 }
492
493 /// Closed-form exhaustiveness check that catches the failure
494 /// mode the two hand-maintained lists (`nodes_are_not_tokens`
495 /// and `tokens_are_tokens`) miss in isolation: a future variant
496 /// added to the enum but forgotten in `is_token`'s `matches!` arm
497 /// AND in both hand-maintained test lists.
498 ///
499 /// We enumerate every valid discriminant via the `num_enum`
500 /// `try_from` derive — the same surface `kind_from_raw` uses —
501 /// then count how many fall into each category, and compare to
502 /// the documented node list. If the counts disagree, a variant
503 /// was added without updating the test scaffolding.
504 #[test]
505 fn every_kind_partitions_token_xor_node() {
506 // FULL `u16::MAX` sweep, not a sampling. SyntaxKind is
507 // #[repr(u16)] so any discriminant in [0, u16::MAX] is
508 // legally constructible by a future PR. ~65K try_from
509 // calls is sub-millisecond and catches a future PR that
510 // pushes new variants past any arbitrary upper bound.
511 let all_kinds: Vec<SyntaxKind> = (0u16..=u16::MAX)
512 .filter_map(|d| SyntaxKind::try_from(d).ok())
513 .collect();
514
515 // Sanity: we found something (catches a bug where
516 // try_from is broken for ALL discriminants).
517 assert!(
518 !all_kinds.is_empty(),
519 "SyntaxKind::try_from rejected every discriminant 0..256",
520 );
521
522 // The documented node kinds — must be kept in sync with
523 // the `// ---- Node kinds ----` section of the enum above.
524 // The exhaustive iteration catches any drift.
525 let documented_nodes = [
526 SyntaxKind::SOURCE_FILE,
527 SyntaxKind::ERROR_NODE,
528 SyntaxKind::DIRECTIVE,
529 SyntaxKind::OPEN_DIRECTIVE,
530 SyntaxKind::CLOSE_DIRECTIVE,
531 SyntaxKind::BALANCE_DIRECTIVE,
532 SyntaxKind::PAD_DIRECTIVE,
533 SyntaxKind::EVENT_DIRECTIVE,
534 SyntaxKind::QUERY_DIRECTIVE,
535 SyntaxKind::NOTE_DIRECTIVE,
536 SyntaxKind::DOCUMENT_DIRECTIVE,
537 SyntaxKind::PRICE_DIRECTIVE,
538 SyntaxKind::COMMODITY_DIRECTIVE,
539 SyntaxKind::PUSHTAG_DIRECTIVE,
540 SyntaxKind::POPTAG_DIRECTIVE,
541 SyntaxKind::PUSHMETA_DIRECTIVE,
542 SyntaxKind::POPMETA_DIRECTIVE,
543 SyntaxKind::OPTION_DIRECTIVE,
544 SyntaxKind::INCLUDE_DIRECTIVE,
545 SyntaxKind::PLUGIN_DIRECTIVE,
546 SyntaxKind::CUSTOM_DIRECTIVE,
547 SyntaxKind::TRANSACTION,
548 SyntaxKind::META_ENTRY,
549 SyntaxKind::POSTING,
550 SyntaxKind::AMOUNT,
551 SyntaxKind::COST_SPEC,
552 SyntaxKind::PRICE_ANNOTATION,
553 ];
554 let observed_nodes: Vec<SyntaxKind> = all_kinds
555 .iter()
556 .copied()
557 .filter(|k| !k.is_token())
558 .collect();
559
560 assert_eq!(
561 observed_nodes.len(),
562 documented_nodes.len(),
563 "is_token() says there are {} node kinds but the \
564 documented list has {}: observed={observed_nodes:?}, \
565 documented={documented_nodes:?}. A new SyntaxKind \
566 variant was added without updating is_token's matches! \
567 arm AND the documented_nodes list in this test.",
568 observed_nodes.len(),
569 documented_nodes.len(),
570 );
571 for kind in documented_nodes {
572 assert!(
573 observed_nodes.contains(&kind),
574 "{kind:?} is documented as a node but is_token() \
575 returns true for it",
576 );
577 }
578 }
579
580 /// Inverse of `nodes_are_not_tokens`: every token kind must satisfy
581 /// `is_token()`. Catches a future variant added to the enum but
582 /// forgotten in the `matches!` arm of `is_token`, which would
583 /// silently misclassify at runtime while passing the
584 /// `nodes_are_not_tokens` test.
585 #[test]
586 fn tokens_are_tokens() {
587 let token_kinds = [
588 // Trivia
589 SyntaxKind::BOM,
590 SyntaxKind::WHITESPACE,
591 SyntaxKind::NEWLINE,
592 SyntaxKind::COMMENT,
593 SyntaxKind::PERCENT_COMMENT,
594 SyntaxKind::SHEBANG,
595 SyntaxKind::EMACS_DIRECTIVE,
596 // Literals
597 SyntaxKind::DATE,
598 SyntaxKind::NUMBER,
599 SyntaxKind::STRING,
600 SyntaxKind::ACCOUNT,
601 SyntaxKind::CURRENCY,
602 SyntaxKind::TAG,
603 SyntaxKind::LINK,
604 SyntaxKind::META_KEY,
605 SyntaxKind::FLAG,
606 SyntaxKind::BOOL_TRUE,
607 SyntaxKind::BOOL_FALSE,
608 SyntaxKind::NULL_KW,
609 // Keywords
610 SyntaxKind::TXN_KW,
611 SyntaxKind::BALANCE_KW,
612 SyntaxKind::OPEN_KW,
613 SyntaxKind::CLOSE_KW,
614 SyntaxKind::COMMODITY_KW,
615 SyntaxKind::PAD_KW,
616 SyntaxKind::EVENT_KW,
617 SyntaxKind::QUERY_KW,
618 SyntaxKind::NOTE_KW,
619 SyntaxKind::DOCUMENT_KW,
620 SyntaxKind::PRICE_KW,
621 SyntaxKind::CUSTOM_KW,
622 SyntaxKind::OPTION_KW,
623 SyntaxKind::INCLUDE_KW,
624 SyntaxKind::PLUGIN_KW,
625 SyntaxKind::PUSHTAG_KW,
626 SyntaxKind::POPTAG_KW,
627 SyntaxKind::PUSHMETA_KW,
628 SyntaxKind::POPMETA_KW,
629 SyntaxKind::PENDING_KW,
630 // Punctuation
631 SyntaxKind::L_BRACE,
632 SyntaxKind::R_BRACE,
633 SyntaxKind::L_DOUBLE_BRACE,
634 SyntaxKind::R_DOUBLE_BRACE,
635 SyntaxKind::L_BRACE_HASH,
636 SyntaxKind::L_PAREN,
637 SyntaxKind::R_PAREN,
638 SyntaxKind::AT,
639 SyntaxKind::AT_AT,
640 SyntaxKind::COLON,
641 SyntaxKind::COMMA,
642 SyntaxKind::TILDE,
643 SyntaxKind::PIPE,
644 SyntaxKind::PLUS,
645 SyntaxKind::MINUS,
646 SyntaxKind::STAR,
647 SyntaxKind::SLASH,
648 SyntaxKind::HASH,
649 // Error
650 SyntaxKind::ERROR_TOKEN,
651 ];
652 for kind in token_kinds {
653 assert!(
654 kind.is_token(),
655 "{kind:?} is a token but is_token() returns false — \
656 likely missing from the matches! arm in is_token",
657 );
658 }
659 }
660
661 #[test]
662 fn is_trivia_excludes_error_token() {
663 // ERROR_TOKEN is byte-significant but NOT trivia: it represents
664 // bytes the lexer couldn't classify, and downstream consumers
665 // need to surface them rather than skip them.
666 assert!(!SyntaxKind::ERROR_TOKEN.is_trivia());
667 assert!(SyntaxKind::ERROR_TOKEN.is_token());
668 }
669
670 #[test]
671 fn rowan_language_round_trip() {
672 // num_enum::TryFromPrimitive ensures the conversion is sound
673 // for every defined discriminant. Spot-check a representative
674 // sample including the boundaries.
675 for kind in [
676 SyntaxKind::BOM,
677 SyntaxKind::WHITESPACE,
678 SyntaxKind::HASH,
679 SyntaxKind::ERROR_TOKEN,
680 SyntaxKind::SOURCE_FILE,
681 SyntaxKind::ERROR_NODE,
682 ] {
683 let raw: rowan::SyntaxKind = kind.into();
684 let back = <BeancountLanguage as rowan::Language>::kind_from_raw(raw);
685 assert_eq!(kind, back);
686 }
687 }
688}