Skip to main content

cyrs_syntax/
kind.rs

1//! `SyntaxKind` — the canonical grammar reference (spec 0001 §4.4).
2//!
3//! Every grammar production and every token class has an entry here. The
4//! enum is `repr(u16)` so it round-trips through `rowan::SyntaxKind`. New
5//! kinds are appended at the end of their section; existing numeric values
6//! are never reused, matching the diagnostic code policy (spec §10.2).
7//!
8//! The enum is partitioned into three zones:
9//!
10//! - Tokens (trivia, keywords, literals, punctuation, error markers).
11//! - Nodes (clauses, patterns, expressions, etc.).
12//! - Composite-only meta kinds (EOF, ROOT).
13//!
14//! The [`SyntaxKind::is_trivia`], [`SyntaxKind::is_keyword`],
15//! [`SyntaxKind::is_punct`], [`SyntaxKind::is_literal`], and
16//! [`SyntaxKind::is_node`] predicates are the primary consumers' lens on
17//! these partitions.
18
19#![allow(non_camel_case_types)]
20// Every variant of `SyntaxKind` is a grammar-production name whose
21// semantics live in spec §4.4 (tokens) and the `cypher.ungrammar` source
22// (nodes).  Per-variant doc strings would duplicate those sources and
23// add boilerplate without informational value; exempt the whole file
24// from `missing_docs` rather than paper them all over.
25#![allow(missing_docs)]
26
27/// Every syntactic category in Cypher: tokens, nodes, and meta.
28///
29/// `repr(u16)` so it can be used directly as a rowan kind.
30///
31/// Marked `#[non_exhaustive]` per spec §4.4: consumers must use a
32/// wildcard arm when matching, so the grammar can grow without
33/// breaking every downstream match.
34#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
35#[repr(u16)]
36#[non_exhaustive]
37pub enum SyntaxKind {
38    // =====================================================================
39    // Trivia (0..16)
40    // =====================================================================
41    WHITESPACE = 0,
42    LINE_COMMENT,
43    BLOCK_COMMENT,
44
45    // =====================================================================
46    // Identifiers & literals (16..48)
47    // =====================================================================
48    IDENT = 16,
49    QUOTED_IDENT,
50    INT_LITERAL,
51    FLOAT_LITERAL,
52    STRING_LITERAL,
53    PARAM,
54    BOOL_LITERAL,
55    NULL_LITERAL,
56
57    // =====================================================================
58    // Punctuation (48..128)
59    // =====================================================================
60    L_PAREN = 48,
61    R_PAREN,
62    L_BRACK,
63    R_BRACK,
64    L_BRACE,
65    R_BRACE,
66    COMMA,
67    SEMI,
68    COLON,
69    DOUBLE_COLON,
70    DOT,
71    DOT_DOT,
72    PIPE,
73    STAR,
74    PLUS,
75    MINUS,
76    SLASH,
77    PERCENT,
78    CARET,
79    EQ,
80    NEQ,
81    BANG_EQ,
82    LT,
83    LE,
84    GT,
85    GE,
86    ARROW_R,
87    ARROW_L,
88    REGEX_EQ,
89    DOLLAR,
90    BANG,
91    AMP,
92
93    // =====================================================================
94    // Keywords (128..320). `_KW` suffix to disambiguate from AST nodes.
95    // =====================================================================
96    MATCH_KW = 128,
97    OPTIONAL_KW,
98    WHERE_KW,
99    WITH_KW,
100    RETURN_KW,
101    CREATE_KW,
102    MERGE_KW,
103    DELETE_KW,
104    DETACH_KW,
105    SET_KW,
106    REMOVE_KW,
107    UNWIND_KW,
108    CALL_KW,
109    YIELD_KW,
110    ON_KW,
111    AS_KW,
112    AND_KW,
113    OR_KW,
114    XOR_KW,
115    NOT_KW,
116    IN_KW,
117    IS_KW,
118    NULL_KW,
119    TRUE_KW,
120    FALSE_KW,
121    CASE_KW,
122    WHEN_KW,
123    THEN_KW,
124    ELSE_KW,
125    END_KW,
126    ORDER_KW,
127    BY_KW,
128    ASC_KW,
129    ASCENDING_KW,
130    DESC_KW,
131    DESCENDING_KW,
132    SKIP_KW,
133    LIMIT_KW,
134    DISTINCT_KW,
135    UNION_KW,
136    ALL_KW,
137    STARTS_KW,
138    ENDS_KW,
139    CONTAINS_KW,
140    DIV_KW,
141    MOD_KW,
142    COUNT_KW,
143    EXISTS_KW,
144    SHORTESTPATH_KW,
145    ALLSHORTESTPATHS_KW,
146    // List-predicate keywords (cy-8x5). `ALL_KW` already exists above —
147    // these three join the set so `ANY(x IN xs WHERE p)` etc. lex as
148    // dedicated keyword tokens rather than identifiers. Shared across
149    // GqlAligned and OpenCypherV9 (spec §9), so no dialect gate needed.
150    ANY_KW,
151    NONE_KW,
152    SINGLE_KW,
153
154    // =====================================================================
155    // Syntax nodes (320..768)
156    // =====================================================================
157
158    // Roots
159    SOURCE_FILE = 320,
160    STATEMENT,
161
162    // Clauses
163    MATCH_CLAUSE,
164    OPTIONAL_MATCH_CLAUSE,
165    WHERE_CLAUSE,
166    WITH_CLAUSE,
167    RETURN_CLAUSE,
168    CREATE_CLAUSE,
169    MERGE_CLAUSE,
170    SET_CLAUSE,
171    REMOVE_CLAUSE,
172    DELETE_CLAUSE,
173    UNWIND_CLAUSE,
174    CALL_CLAUSE,
175    UNION_TAIL,
176    MERGE_ACTION,
177
178    // Return body
179    RETURN_BODY,
180    RETURN_ITEMS,
181    RETURN_ITEM,
182    ORDER_BY,
183    ORDER_ITEM,
184    SKIP_SUBCLAUSE,
185    LIMIT_SUBCLAUSE,
186
187    // Patterns
188    PATTERN,
189    PATTERN_PART,
190    NAMED_PATTERN_PART,
191    NODE_PATTERN,
192    REL_PATTERN,
193    REL_DETAIL,
194    REL_LENGTH,
195    LABEL_EXPR,
196    REL_TYPE_EXPR,
197    PROPERTY_MAP,
198
199    // Set / remove
200    SET_ITEM,
201    REMOVE_ITEM,
202
203    // Call
204    YIELD_SUBCLAUSE,
205    YIELD_ITEM,
206    PROCEDURE_NAME,
207
208    // Expressions
209    BINARY_EXPR,
210    UNARY_EXPR,
211    POSTFIX_EXPR,
212    LITERAL_EXPR,
213    VAR_EXPR,
214    PROP_ACCESS_EXPR,
215    SUBSCRIPT_EXPR,
216    LIST_LITERAL,
217    MAP_LITERAL,
218    MAP_PROJECTION,
219    MAP_PROJECTION_ITEM,
220    CASE_EXPR,
221    CASE_WHEN_ARM,
222    CASE_ELSE_ARM,
223    FUNCTION_CALL,
224    CALL_ARGS,
225    PAREN_EXPR,
226    LIST_COMPREHENSION,
227    PATTERN_COMPREHENSION,
228    PATTERN_PREDICATE,
229    PARAM_EXPR,
230    IS_NULL_EXPR,
231    IN_EXPR,
232    REGEX_MATCH_EXPR,
233    STRING_OP_EXPR,
234
235    // Shared
236    NAME,
237    ARG_LIST,
238
239    // List indexing / slicing (cy-7s6.1). Kept at the end of the node zone
240    // to preserve numeric stability of the already-assigned variants above.
241    INDEX_EXPR,
242    SLICE_EXPR,
243
244    // List predicates: ANY / ALL / NONE / SINGLE (cy-8x5). Appended at the
245    // end of the node zone for the same stability reason as cy-7s6.1. The
246    // discriminant between the four is carried by the first keyword child
247    // token (ANY_KW / ALL_KW / NONE_KW / SINGLE_KW) so a single node kind
248    // covers all four surface forms.
249    LIST_PREDICATE_EXPR,
250
251    // shortestPath / allShortestPaths pattern (cy-b5b, spec §6.4 / §19
252    // "shortest-path"). Wraps the inner path pattern; the discriminant
253    // between `shortestPath(...)` and `allShortestPaths(...)` is the
254    // first keyword child token (SHORTESTPATH_KW vs ALLSHORTESTPATHS_KW),
255    // matching the LIST_PREDICATE_EXPR pattern above.
256    SHORTEST_PATH_PATTERN,
257
258    // =====================================================================
259    // Errors & EOF (768..1024)
260    // =====================================================================
261    ERROR = 768,
262    EOF = 769,
263    // Reserved for future expansion; additions keep numeric stability.
264}
265
266impl SyntaxKind {
267    /// Convert a raw u16 back into a `SyntaxKind`. Returns `None` for
268    /// values not in the enumeration.
269    #[must_use]
270    pub fn from_u16(raw: u16) -> Option<Self> {
271        // Safety-free check: reject values outside any defined slot.
272        // A hand-written match is the rowan-canonical pattern. Generated
273        // here via an internal macro invocation to keep the match in sync
274        // with the enum above.
275        Some(match raw {
276            0 => Self::WHITESPACE,
277            1 => Self::LINE_COMMENT,
278            2 => Self::BLOCK_COMMENT,
279
280            16 => Self::IDENT,
281            17 => Self::QUOTED_IDENT,
282            18 => Self::INT_LITERAL,
283            19 => Self::FLOAT_LITERAL,
284            20 => Self::STRING_LITERAL,
285            21 => Self::PARAM,
286            22 => Self::BOOL_LITERAL,
287            23 => Self::NULL_LITERAL,
288
289            48 => Self::L_PAREN,
290            49 => Self::R_PAREN,
291            50 => Self::L_BRACK,
292            51 => Self::R_BRACK,
293            52 => Self::L_BRACE,
294            53 => Self::R_BRACE,
295            54 => Self::COMMA,
296            55 => Self::SEMI,
297            56 => Self::COLON,
298            57 => Self::DOUBLE_COLON,
299            58 => Self::DOT,
300            59 => Self::DOT_DOT,
301            60 => Self::PIPE,
302            61 => Self::STAR,
303            62 => Self::PLUS,
304            63 => Self::MINUS,
305            64 => Self::SLASH,
306            65 => Self::PERCENT,
307            66 => Self::CARET,
308            67 => Self::EQ,
309            68 => Self::NEQ,
310            69 => Self::BANG_EQ,
311            70 => Self::LT,
312            71 => Self::LE,
313            72 => Self::GT,
314            73 => Self::GE,
315            74 => Self::ARROW_R,
316            75 => Self::ARROW_L,
317            76 => Self::REGEX_EQ,
318            77 => Self::DOLLAR,
319            78 => Self::BANG,
320            79 => Self::AMP,
321
322            128 => Self::MATCH_KW,
323            129 => Self::OPTIONAL_KW,
324            130 => Self::WHERE_KW,
325            131 => Self::WITH_KW,
326            132 => Self::RETURN_KW,
327            133 => Self::CREATE_KW,
328            134 => Self::MERGE_KW,
329            135 => Self::DELETE_KW,
330            136 => Self::DETACH_KW,
331            137 => Self::SET_KW,
332            138 => Self::REMOVE_KW,
333            139 => Self::UNWIND_KW,
334            140 => Self::CALL_KW,
335            141 => Self::YIELD_KW,
336            142 => Self::ON_KW,
337            143 => Self::AS_KW,
338            144 => Self::AND_KW,
339            145 => Self::OR_KW,
340            146 => Self::XOR_KW,
341            147 => Self::NOT_KW,
342            148 => Self::IN_KW,
343            149 => Self::IS_KW,
344            150 => Self::NULL_KW,
345            151 => Self::TRUE_KW,
346            152 => Self::FALSE_KW,
347            153 => Self::CASE_KW,
348            154 => Self::WHEN_KW,
349            155 => Self::THEN_KW,
350            156 => Self::ELSE_KW,
351            157 => Self::END_KW,
352            158 => Self::ORDER_KW,
353            159 => Self::BY_KW,
354            160 => Self::ASC_KW,
355            161 => Self::ASCENDING_KW,
356            162 => Self::DESC_KW,
357            163 => Self::DESCENDING_KW,
358            164 => Self::SKIP_KW,
359            165 => Self::LIMIT_KW,
360            166 => Self::DISTINCT_KW,
361            167 => Self::UNION_KW,
362            168 => Self::ALL_KW,
363            169 => Self::STARTS_KW,
364            170 => Self::ENDS_KW,
365            171 => Self::CONTAINS_KW,
366            172 => Self::DIV_KW,
367            173 => Self::MOD_KW,
368            174 => Self::COUNT_KW,
369            175 => Self::EXISTS_KW,
370            176 => Self::SHORTESTPATH_KW,
371            177 => Self::ALLSHORTESTPATHS_KW,
372            178 => Self::ANY_KW,
373            179 => Self::NONE_KW,
374            180 => Self::SINGLE_KW,
375
376            320 => Self::SOURCE_FILE,
377            321 => Self::STATEMENT,
378            322 => Self::MATCH_CLAUSE,
379            323 => Self::OPTIONAL_MATCH_CLAUSE,
380            324 => Self::WHERE_CLAUSE,
381            325 => Self::WITH_CLAUSE,
382            326 => Self::RETURN_CLAUSE,
383            327 => Self::CREATE_CLAUSE,
384            328 => Self::MERGE_CLAUSE,
385            329 => Self::SET_CLAUSE,
386            330 => Self::REMOVE_CLAUSE,
387            331 => Self::DELETE_CLAUSE,
388            332 => Self::UNWIND_CLAUSE,
389            333 => Self::CALL_CLAUSE,
390            334 => Self::UNION_TAIL,
391            335 => Self::MERGE_ACTION,
392            336 => Self::RETURN_BODY,
393            337 => Self::RETURN_ITEMS,
394            338 => Self::RETURN_ITEM,
395            339 => Self::ORDER_BY,
396            340 => Self::ORDER_ITEM,
397            341 => Self::SKIP_SUBCLAUSE,
398            342 => Self::LIMIT_SUBCLAUSE,
399            343 => Self::PATTERN,
400            344 => Self::PATTERN_PART,
401            345 => Self::NAMED_PATTERN_PART,
402            346 => Self::NODE_PATTERN,
403            347 => Self::REL_PATTERN,
404            348 => Self::REL_DETAIL,
405            349 => Self::REL_LENGTH,
406            350 => Self::LABEL_EXPR,
407            351 => Self::REL_TYPE_EXPR,
408            352 => Self::PROPERTY_MAP,
409            353 => Self::SET_ITEM,
410            354 => Self::REMOVE_ITEM,
411            355 => Self::YIELD_SUBCLAUSE,
412            356 => Self::YIELD_ITEM,
413            357 => Self::PROCEDURE_NAME,
414            358 => Self::BINARY_EXPR,
415            359 => Self::UNARY_EXPR,
416            360 => Self::POSTFIX_EXPR,
417            361 => Self::LITERAL_EXPR,
418            362 => Self::VAR_EXPR,
419            363 => Self::PROP_ACCESS_EXPR,
420            364 => Self::SUBSCRIPT_EXPR,
421            365 => Self::LIST_LITERAL,
422            366 => Self::MAP_LITERAL,
423            367 => Self::MAP_PROJECTION,
424            368 => Self::MAP_PROJECTION_ITEM,
425            369 => Self::CASE_EXPR,
426            370 => Self::CASE_WHEN_ARM,
427            371 => Self::CASE_ELSE_ARM,
428            372 => Self::FUNCTION_CALL,
429            373 => Self::CALL_ARGS,
430            374 => Self::PAREN_EXPR,
431            375 => Self::LIST_COMPREHENSION,
432            376 => Self::PATTERN_COMPREHENSION,
433            377 => Self::PATTERN_PREDICATE,
434            378 => Self::PARAM_EXPR,
435            379 => Self::IS_NULL_EXPR,
436            380 => Self::IN_EXPR,
437            381 => Self::REGEX_MATCH_EXPR,
438            382 => Self::STRING_OP_EXPR,
439            383 => Self::NAME,
440            384 => Self::ARG_LIST,
441            385 => Self::INDEX_EXPR,
442            386 => Self::SLICE_EXPR,
443            387 => Self::LIST_PREDICATE_EXPR,
444            388 => Self::SHORTEST_PATH_PATTERN,
445
446            768 => Self::ERROR,
447            769 => Self::EOF,
448
449            _ => return None,
450        })
451    }
452
453    // ---------- partition predicates ----------
454    //
455    // These are the consumers' lens on the zones laid out above
456    // (spec §4.4). All are cheap range checks or `matches!` over a
457    // handful of variants, and all are `const fn` so callers can use
458    // them in const contexts (e.g. static tables).
459
460    /// Returns `true` for the trivia zone (whitespace and comments).
461    ///
462    /// ```
463    /// use cyrs_syntax::SyntaxKind;
464    /// assert!(SyntaxKind::WHITESPACE.is_trivia());
465    /// assert!(!SyntaxKind::IDENT.is_trivia());
466    /// ```
467    #[must_use]
468    pub const fn is_trivia(self) -> bool {
469        matches!(
470            self,
471            Self::WHITESPACE | Self::LINE_COMMENT | Self::BLOCK_COMMENT
472        )
473    }
474
475    /// Returns `true` for the keyword zone (`MATCH_KW..=SINGLE_KW`).
476    #[must_use]
477    pub const fn is_keyword(self) -> bool {
478        let k = self as u16;
479        k >= Self::MATCH_KW as u16 && k <= Self::SINGLE_KW as u16
480    }
481
482    /// Returns `true` for the punctuation zone (`L_PAREN..=AMP`).
483    #[must_use]
484    pub const fn is_punct(self) -> bool {
485        let k = self as u16;
486        k >= Self::L_PAREN as u16 && k <= Self::AMP as u16
487    }
488
489    /// Returns `true` for literal-shaped tokens: numeric, string, boolean,
490    /// null, and parameter tokens (`$name` / `$0`).
491    #[must_use]
492    pub const fn is_literal(self) -> bool {
493        matches!(
494            self,
495            Self::INT_LITERAL
496                | Self::FLOAT_LITERAL
497                | Self::STRING_LITERAL
498                | Self::BOOL_LITERAL
499                | Self::NULL_LITERAL
500                | Self::PARAM
501        )
502    }
503
504    /// Returns `true` for composite syntax nodes (clauses, patterns,
505    /// expressions, etc. — every kind in `SOURCE_FILE..ERROR`).
506    #[must_use]
507    pub const fn is_node(self) -> bool {
508        let k = self as u16;
509        k >= Self::SOURCE_FILE as u16 && k < Self::ERROR as u16
510    }
511
512    /// Returns `true` for any kind that is a token rather than a node or
513    /// meta sentinel. Equivalent to `!is_node() && !matches!(ERROR | EOF)`.
514    #[must_use]
515    pub const fn is_token(self) -> bool {
516        (self as u16) < Self::SOURCE_FILE as u16
517    }
518}
519
520#[cfg(test)]
521mod tests {
522    use super::SyntaxKind;
523
524    /// Every kind defined in the enum must round-trip through `from_u16`.
525    /// This is the canonical regression test for the grammar table.
526    #[test]
527    fn round_trip_known_kinds() {
528        let known = [
529            SyntaxKind::WHITESPACE,
530            SyntaxKind::IDENT,
531            SyntaxKind::MATCH_KW,
532            SyntaxKind::ALLSHORTESTPATHS_KW,
533            SyntaxKind::ANY_KW,
534            SyntaxKind::NONE_KW,
535            SyntaxKind::SINGLE_KW,
536            SyntaxKind::SOURCE_FILE,
537            SyntaxKind::LIST_PREDICATE_EXPR,
538            SyntaxKind::SHORTEST_PATH_PATTERN,
539            SyntaxKind::ERROR,
540            SyntaxKind::EOF,
541        ];
542        for kind in known {
543            let round = SyntaxKind::from_u16(kind as u16);
544            assert_eq!(round, Some(kind), "round-trip failed for {kind:?}");
545        }
546    }
547
548    #[test]
549    fn partitions_are_disjoint() {
550        assert!(SyntaxKind::WHITESPACE.is_trivia());
551        assert!(!SyntaxKind::WHITESPACE.is_keyword());
552        assert!(SyntaxKind::MATCH_KW.is_keyword());
553        assert!(!SyntaxKind::MATCH_KW.is_punct());
554        assert!(SyntaxKind::L_PAREN.is_punct());
555        assert!(SyntaxKind::SOURCE_FILE.is_node());
556        assert!(!SyntaxKind::SOURCE_FILE.is_token());
557    }
558}