Skip to main content

styx_cst/
syntax_kind.rs

1//! Syntax node and token kinds for the Styx CST.
2
3use styx_tokenizer::TokenKind;
4
5/// The kind of a syntax element (node or token).
6///
7/// Tokens are terminal elements (leaves), while nodes are non-terminal
8/// (contain children). The distinction is made by value: tokens have
9/// lower values than `__LAST_TOKEN`.
10///
11/// The SCREAMING_CASE naming convention is used to match rowan/rust-analyzer
12/// conventions for syntax kinds.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
14#[repr(u16)]
15#[allow(non_camel_case_types)]
16#[allow(clippy::manual_non_exhaustive)] // __LAST_TOKEN is used for token/node distinction
17pub enum SyntaxKind {
18    // ========== TOKENS (terminals) ==========
19    // Structural tokens
20    /// `{`
21    L_BRACE = 0,
22    /// `}`
23    R_BRACE,
24    /// `(`
25    L_PAREN,
26    /// `)`
27    R_PAREN,
28    /// `,`
29    COMMA,
30    /// `=`
31    GT,
32    /// `@`
33    AT,
34    /// `@name` (tag token from tokenizer)
35    TAG_TOKEN,
36    /// `/` inside a chained tag payload
37    SLASH,
38
39    // Scalar tokens
40    /// Bare (unquoted) scalar: `hello`, `42`, `true`
41    BARE_SCALAR,
42    /// Quoted scalar: `"hello world"`
43    QUOTED_SCALAR,
44    /// Raw scalar: `r#"..."#`
45    RAW_SCALAR,
46    /// Heredoc start marker: `<<DELIM\n`
47    HEREDOC_START,
48    /// Heredoc content
49    HEREDOC_CONTENT,
50    /// Heredoc end marker
51    HEREDOC_END,
52
53    // Comment tokens
54    /// Line comment: `// ...`
55    LINE_COMMENT,
56    /// Doc comment: `/// ...`
57    DOC_COMMENT,
58
59    // Whitespace tokens
60    /// Horizontal whitespace (spaces, tabs)
61    WHITESPACE,
62    /// Newline (`\n` or `\r\n`)
63    NEWLINE,
64
65    // Special tokens
66    /// End of file
67    EOF,
68    /// Lexer/parser error
69    ERROR,
70
71    // Marker for end of tokens
72    #[doc(hidden)]
73    __LAST_TOKEN,
74
75    // ========== NODES (non-terminals) ==========
76    /// Root document node
77    DOCUMENT,
78    /// An entry (key-value pair or sequence element)
79    ENTRY,
80    /// An explicit object `{ ... }`
81    OBJECT,
82    /// A sequence `( ... )`
83    SEQUENCE,
84    /// A scalar value wrapper
85    SCALAR,
86    /// Unit value `@`
87    UNIT,
88    /// A tag `@name` with optional payload
89    TAG,
90    /// Tag name (without @)
91    TAG_NAME,
92    /// Tag payload (the value after the tag name)
93    TAG_PAYLOAD,
94    /// Key in an entry
95    KEY,
96    /// Value in an entry
97    VALUE,
98    /// A heredoc (groups start, content, end)
99    HEREDOC,
100    /// A group of attributes (key=value pairs)
101    ATTRIBUTES,
102    /// A single attribute (key=value)
103    ATTRIBUTE,
104}
105
106impl SyntaxKind {
107    /// Whether this is a token (terminal) kind.
108    pub fn is_token(self) -> bool {
109        (self as u16) < (Self::__LAST_TOKEN as u16)
110    }
111
112    /// Whether this is a node (non-terminal) kind.
113    pub fn is_node(self) -> bool {
114        (self as u16) > (Self::__LAST_TOKEN as u16)
115    }
116
117    /// Whether this is trivia (whitespace or comments).
118    pub fn is_trivia(self) -> bool {
119        matches!(self, Self::WHITESPACE | Self::NEWLINE | Self::LINE_COMMENT)
120    }
121}
122
123impl From<TokenKind> for SyntaxKind {
124    fn from(kind: TokenKind) -> Self {
125        match kind {
126            TokenKind::LBrace => Self::L_BRACE,
127            TokenKind::RBrace => Self::R_BRACE,
128            TokenKind::LParen => Self::L_PAREN,
129            TokenKind::RParen => Self::R_PAREN,
130            TokenKind::Comma => Self::COMMA,
131            TokenKind::Gt => Self::GT,
132            TokenKind::At => Self::AT,
133            TokenKind::Tag => Self::TAG_TOKEN,
134            TokenKind::BareScalar => Self::BARE_SCALAR,
135            TokenKind::QuotedScalar => Self::QUOTED_SCALAR,
136            TokenKind::RawScalar => Self::RAW_SCALAR,
137            TokenKind::HeredocStart => Self::HEREDOC_START,
138            TokenKind::HeredocContent => Self::HEREDOC_CONTENT,
139            TokenKind::HeredocEnd => Self::HEREDOC_END,
140            TokenKind::LineComment => Self::LINE_COMMENT,
141            TokenKind::DocComment => Self::DOC_COMMENT,
142            TokenKind::Whitespace => Self::WHITESPACE,
143            TokenKind::Newline => Self::NEWLINE,
144            TokenKind::Eof => Self::EOF,
145            TokenKind::Error => Self::ERROR,
146        }
147    }
148}
149
150impl From<SyntaxKind> for rowan::SyntaxKind {
151    fn from(kind: SyntaxKind) -> Self {
152        rowan::SyntaxKind(kind as u16)
153    }
154}
155
156/// Language definition for Styx, used by rowan.
157#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
158pub enum StyxLanguage {}
159
160impl rowan::Language for StyxLanguage {
161    type Kind = SyntaxKind;
162
163    fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
164        Self::Kind::from_raw(raw.0).expect("invalid SyntaxKind value from rowan")
165    }
166
167    fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
168        rowan::SyntaxKind(kind as u16)
169    }
170}
171
172impl SyntaxKind {
173    /// Convert from a raw u16 value to SyntaxKind.
174    /// Returns None if the value is out of range or corresponds to __LAST_TOKEN.
175    pub const fn from_raw(raw: u16) -> Option<Self> {
176        match raw {
177            0 => Some(Self::L_BRACE),
178            1 => Some(Self::R_BRACE),
179            2 => Some(Self::L_PAREN),
180            3 => Some(Self::R_PAREN),
181            4 => Some(Self::COMMA),
182            5 => Some(Self::GT),
183            6 => Some(Self::AT),
184            7 => Some(Self::TAG_TOKEN),
185            8 => Some(Self::SLASH),
186            9 => Some(Self::BARE_SCALAR),
187            10 => Some(Self::QUOTED_SCALAR),
188            11 => Some(Self::RAW_SCALAR),
189            12 => Some(Self::HEREDOC_START),
190            13 => Some(Self::HEREDOC_CONTENT),
191            14 => Some(Self::HEREDOC_END),
192            15 => Some(Self::LINE_COMMENT),
193            16 => Some(Self::DOC_COMMENT),
194            17 => Some(Self::WHITESPACE),
195            18 => Some(Self::NEWLINE),
196            19 => Some(Self::EOF),
197            20 => Some(Self::ERROR),
198            // 21 is __LAST_TOKEN - skip it
199            22 => Some(Self::DOCUMENT),
200            23 => Some(Self::ENTRY),
201            24 => Some(Self::OBJECT),
202            25 => Some(Self::SEQUENCE),
203            26 => Some(Self::SCALAR),
204            27 => Some(Self::UNIT),
205            28 => Some(Self::TAG),
206            29 => Some(Self::TAG_NAME),
207            30 => Some(Self::TAG_PAYLOAD),
208            31 => Some(Self::KEY),
209            32 => Some(Self::VALUE),
210            33 => Some(Self::HEREDOC),
211            34 => Some(Self::ATTRIBUTES),
212            35 => Some(Self::ATTRIBUTE),
213            _ => None,
214        }
215    }
216}
217
218/// A syntax node in the Styx CST.
219pub type SyntaxNode = rowan::SyntaxNode<StyxLanguage>;
220
221/// A syntax token in the Styx CST.
222pub type SyntaxToken = rowan::SyntaxToken<StyxLanguage>;
223
224/// A syntax element (either node or token) in the Styx CST.
225pub type SyntaxElement = rowan::SyntaxElement<StyxLanguage>;
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230    use rowan::Language;
231
232    #[test]
233    fn token_vs_node() {
234        assert!(SyntaxKind::L_BRACE.is_token());
235        assert!(SyntaxKind::WHITESPACE.is_token());
236        assert!(SyntaxKind::ERROR.is_token());
237
238        assert!(SyntaxKind::DOCUMENT.is_node());
239        assert!(SyntaxKind::ENTRY.is_node());
240        assert!(SyntaxKind::OBJECT.is_node());
241    }
242
243    #[test]
244    fn trivia() {
245        assert!(SyntaxKind::WHITESPACE.is_trivia());
246        assert!(SyntaxKind::NEWLINE.is_trivia());
247        assert!(SyntaxKind::LINE_COMMENT.is_trivia());
248
249        assert!(!SyntaxKind::DOC_COMMENT.is_trivia());
250        assert!(!SyntaxKind::BARE_SCALAR.is_trivia());
251    }
252
253    #[test]
254    fn token_kind_conversion() {
255        assert_eq!(SyntaxKind::from(TokenKind::LBrace), SyntaxKind::L_BRACE);
256        assert_eq!(
257            SyntaxKind::from(TokenKind::BareScalar),
258            SyntaxKind::BARE_SCALAR
259        );
260        assert_eq!(SyntaxKind::from(TokenKind::Newline), SyntaxKind::NEWLINE);
261    }
262
263    #[test]
264    fn rowan_roundtrip() {
265        let kind = SyntaxKind::DOCUMENT;
266        let raw = StyxLanguage::kind_to_raw(kind);
267        let back = StyxLanguage::kind_from_raw(raw);
268        assert_eq!(kind, back);
269    }
270}