1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
//! `SyntaxKind` — the kinds of CST tokens and nodes — and the rowan `Language`
//! binding for badness's LaTeX surface CST.
use rowan::Language;
/// Kinds of tokens (terminals, from the lexer) and nodes (composites, from the
/// parser) in the CST.
///
/// Token kinds come first, node kinds after; `ROOT` is kept **last** so
/// [`BadnessLang::kind_from_raw`] can bounds-check the raw discriminant with a
/// single comparison. Do not add variants after `ROOT`.
#[allow(non_camel_case_types)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)]
pub enum SyntaxKind {
// --- Tokens (terminals, produced by the lexer) ---
CONTROL_WORD, // `\foo` (backslash + ASCII letters)
CONTROL_SYMBOL, // `\\`, `\{`, `\%`, `\,` … (backslash + one non-letter)
L_BRACE, // {
R_BRACE, // }
L_BRACKET, // [
R_BRACKET, // ]
DOLLAR, // $
AMPERSAND, // &
HASH, // #
CARET, // ^
UNDERSCORE, // _
TILDE, // ~
COMMENT, // `% …` up to (not including) the line break
WHITESPACE, // spaces / tabs
NEWLINE, // `\n`, `\r\n`, or `\r`
WORD, // a run of ordinary text characters
VERB, // `\verb|…|` / `\verb*|…|` inline verbatim (a single token)
VERBATIM_BODY, // the raw body of a verbatim-like environment (a single token)
ERROR, // lexer fallback; the lexer is total, so this is unused today
// --- Nodes (composites, produced by the Phase 1 parser) ---
GROUP, // { … }
OPTIONAL, // [ … ] optional argument
ARGUMENT, // an argument attached to a command
COMMAND, // a control sequence with its arguments
ENVIRONMENT, // \begin{…} … \end{…}
BEGIN, // \begin{name}
END, // \end{name}
NAME_GROUP, // {name} following \begin / \end
INLINE_MATH, // $ … $ or \( … \)
DISPLAY_MATH, // $$ … $$ or \[ … \]
MATH, // a math body (the atoms between the delimiters)
SCRIPTED, // a base atom with attached scripts: base (SUBSCRIPT | SUPERSCRIPT)+
SUBSCRIPT, // `_` and its tightly-bound script argument
SUPERSCRIPT, // `^` and its tightly-bound script argument
LEFT_RIGHT, // `\left( … \right)` — a matched delimiter pair wrapping a MATH body
PARAGRAPH, // text delimited by blank lines
TEXT, // a run of text and trivia
LINE_BREAK, // `\\`, with a tightly-bound `*` and/or `[len]` (`\\*[2ex]`)
ROOT, // the document root (keep LAST)
}
impl SyntaxKind {
/// The number of `SyntaxKind` variants. Sound because the enum is
/// `#[repr(u16)]` with contiguous discriminants `0..=ROOT` and `ROOT` is kept
/// last; used to size kind-indexed tables (e.g. the linter's dispatch table).
pub const COUNT: usize = SyntaxKind::ROOT as usize + 1;
}
impl From<SyntaxKind> for rowan::SyntaxKind {
fn from(kind: SyntaxKind) -> Self {
Self(kind as u16)
}
}
/// The rowan language marker for badness's CST.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum BadnessLang {}
impl Language for BadnessLang {
type Kind = SyntaxKind;
fn kind_from_raw(raw: rowan::SyntaxKind) -> SyntaxKind {
assert!(
raw.0 <= SyntaxKind::ROOT as u16,
"invalid SyntaxKind discriminant: {}",
raw.0
);
// SAFETY: `SyntaxKind` is `#[repr(u16)]` with contiguous discriminants
// `0..=ROOT`, and the assert above bounds `raw.0` into that range.
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: SyntaxKind) -> rowan::SyntaxKind {
kind.into()
}
}
pub type SyntaxNode = rowan::SyntaxNode<BadnessLang>;
pub type SyntaxToken = rowan::SyntaxToken<BadnessLang>;
pub type SyntaxElement = rowan::SyntaxElement<BadnessLang>;