Skip to main content

gdscript_syntax/
syntax_kind.rs

1//! `SyntaxKind` — the single `#[repr(u32)]` kind enum shared by the lexer, the
2//! indentation pre-pass, the parser, and the typed AST.
3//!
4//! This is the one source of truth for every terminal (token) and non-terminal
5//! (node) in the GDScript grammar. `cstree` keys its green nodes by this enum via
6//! the derived [`cstree::Syntax`] impl. Fixed-lexeme kinds (keywords, operators,
7//! punctuation) carry `#[static_text(...)]` so `cstree` stores them without
8//! interning a string (and validates that the byte text matches).
9//!
10//! Design notes (see `plans/PHASE-1-IMPLEMENTATION-PLAYBOOK.md` §4.1):
11//! - **`#[repr(u32)]`** — `cstree::RawSyntaxKind` is a `u32` newtype (not rowan's
12//!   `u16`). The discriminants are contiguous from 0, so the derived `from_raw`
13//!   round-trips every variant.
14//! - **`true`/`false`/`null` are literals, not keywords** — Godot tokenizes them as
15//!   literal tokens, and they parse as primary expressions. They still have fixed
16//!   text, so they get `#[static_text]`.
17//! - **`Newline`/`Indent`/`Dedent`** are *synthetic, zero-width* structural tokens
18//!   injected by the pre-pass. The parser reads them to recover block structure;
19//!   the sink emits them as empty-text tokens, so they never affect the byte-exact
20//!   round-trip (the real newline/space bytes live in the retained `NewlinePhys` /
21//!   `Whitespace` trivia tokens).
22
23use cstree::Syntax;
24
25/// Every terminal and non-terminal kind in the GDScript syntax tree.
26///
27/// Variants are grouped: trivia, synthetic block-structure, literals/names,
28/// keywords, built-in constant names, punctuation/operators, error tokens, then
29/// grammar-production nodes. `Tombstone` is kept last as the count sentinel.
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Syntax)]
31#[repr(u32)]
32pub enum SyntaxKind {
33    // ---- tokens: trivia (carry source bytes; the parser skips them) ----
34    /// A run of spaces and/or tabs.
35    Whitespace,
36    /// `# ...` to end of line.
37    LineComment,
38    /// `## ...` documentation comment (feeds hover later).
39    DocComment,
40    /// `#region ...` fold-region opener (a comment, flagged for folding).
41    RegionComment,
42    /// `#endregion ...` fold-region closer.
43    EndRegionComment,
44    /// A `\` immediately before a newline — joins the logical line.
45    LineContinuation,
46    /// A physical line break (`\n`, `\r\n`, or `\r`). The pre-pass keeps it as
47    /// trivia for losslessness and emits a synthetic [`SyntaxKind::Newline`] where
48    /// a statement actually terminates.
49    NewlinePhys,
50    /// A UTF-8 byte-order mark (`U+FEFF`). Some editors prepend one to a saved `.gd`
51    /// file; Godot strips a leading BOM, so we keep it as its own trivia token (not
52    /// `Whitespace` — that would mis-count the first line's indentation by 3 bytes, and
53    /// not an `Error` — the file is valid GDScript).
54    Bom,
55
56    // ---- tokens: synthetic block structure (zero-width; injected by the pre-pass) ----
57    /// Logical statement terminator (outside brackets, not after a continuation).
58    Newline,
59    /// Block open — indentation increased.
60    Indent,
61    /// Block close — indentation decreased (possibly several in a row).
62    Dedent,
63
64    // ---- tokens: literals & names (interned text) ----
65    /// Integer literal: `45`, `0x8f51`, `0b101010`, `12_345`.
66    Int,
67    /// Float literal: `3.14`, `.5`, `1.`, `58.1e-10`.
68    Float,
69    /// String literal: `"..."`, `'...'`, `"""..."""`, `'''...'''`, raw `r"..."`.
70    String,
71    /// StringName literal: `&"..."` / `&'...'`.
72    StringName,
73    /// NodePath literal: `^"..."` / `^'...'`.
74    NodePath,
75    /// An identifier: `[A-Za-z_][A-Za-z0-9_]*`.
76    Ident,
77
78    // ---- tokens: literal keywords (fixed text) ----
79    #[static_text("true")]
80    True,
81    #[static_text("false")]
82    False,
83    #[static_text("null")]
84    Null,
85
86    // ---- tokens: built-in math constants (fixed text; engine CONST_* tokens) ----
87    #[static_text("PI")]
88    ConstPi,
89    #[static_text("TAU")]
90    ConstTau,
91    #[static_text("INF")]
92    ConstInf,
93    #[static_text("NAN")]
94    ConstNan,
95
96    // ---- tokens: keywords (~35; fixed text) ----
97    #[static_text("if")]
98    IfKw,
99    #[static_text("elif")]
100    ElifKw,
101    #[static_text("else")]
102    ElseKw,
103    #[static_text("for")]
104    ForKw,
105    #[static_text("while")]
106    WhileKw,
107    #[static_text("match")]
108    MatchKw,
109    #[static_text("when")]
110    WhenKw,
111    #[static_text("break")]
112    BreakKw,
113    #[static_text("continue")]
114    ContinueKw,
115    #[static_text("pass")]
116    PassKw,
117    #[static_text("return")]
118    ReturnKw,
119    #[static_text("var")]
120    VarKw,
121    #[static_text("const")]
122    ConstKw,
123    #[static_text("enum")]
124    EnumKw,
125    #[static_text("func")]
126    FuncKw,
127    #[static_text("static")]
128    StaticKw,
129    #[static_text("signal")]
130    SignalKw,
131    #[static_text("class")]
132    ClassKw,
133    #[static_text("class_name")]
134    ClassNameKw,
135    #[static_text("extends")]
136    ExtendsKw,
137    #[static_text("is")]
138    IsKw,
139    #[static_text("in")]
140    InKw,
141    #[static_text("as")]
142    AsKw,
143    #[static_text("self")]
144    SelfKw,
145    #[static_text("super")]
146    SuperKw,
147    #[static_text("void")]
148    VoidKw,
149    #[static_text("await")]
150    AwaitKw,
151    #[static_text("preload")]
152    PreloadKw,
153    #[static_text("assert")]
154    AssertKw,
155    #[static_text("breakpoint")]
156    BreakpointKw,
157    #[static_text("not")]
158    NotKw,
159    #[static_text("and")]
160    AndKw,
161    #[static_text("or")]
162    OrKw,
163    /// Deprecated since GDScript 2.0 — still lexed so we can diagnose it.
164    #[static_text("yield")]
165    YieldKw,
166    /// Reserved but unused — lexed to reject as an identifier.
167    #[static_text("namespace")]
168    NamespaceKw,
169    /// Reserved but unused.
170    #[static_text("trait")]
171    TraitKw,
172
173    // ---- tokens: punctuation / brackets (fixed text) ----
174    #[static_text("(")]
175    LParen,
176    #[static_text(")")]
177    RParen,
178    #[static_text("[")]
179    LBrack,
180    #[static_text("]")]
181    RBrack,
182    #[static_text("{")]
183    LBrace,
184    #[static_text("}")]
185    RBrace,
186    #[static_text(",")]
187    Comma,
188    #[static_text(":")]
189    Colon,
190    #[static_text(";")]
191    Semicolon,
192    #[static_text(".")]
193    Dot,
194    #[static_text("..")]
195    DotDot,
196    #[static_text("...")]
197    Ellipsis,
198    #[static_text("@")]
199    At,
200    #[static_text("$")]
201    Dollar,
202    #[static_text("%")]
203    Percent,
204    #[static_text("&")]
205    Amp,
206    #[static_text("->")]
207    Arrow,
208    #[static_text(":=")]
209    ColonEq,
210
211    // ---- tokens: operators (fixed text) ----
212    #[static_text("+")]
213    Plus,
214    #[static_text("-")]
215    Minus,
216    #[static_text("*")]
217    Star,
218    #[static_text("/")]
219    Slash,
220    #[static_text("**")]
221    StarStar,
222    #[static_text("=")]
223    Eq,
224    #[static_text("==")]
225    EqEq,
226    #[static_text("!=")]
227    Neq,
228    #[static_text("<")]
229    Lt,
230    #[static_text(">")]
231    Gt,
232    #[static_text("<=")]
233    Le,
234    #[static_text(">=")]
235    Ge,
236    #[static_text("&&")]
237    AmpAmp,
238    #[static_text("||")]
239    PipePipe,
240    #[static_text("!")]
241    Bang,
242    #[static_text("~")]
243    Tilde,
244    #[static_text("|")]
245    Pipe,
246    #[static_text("^")]
247    Caret,
248    #[static_text("<<")]
249    Shl,
250    #[static_text(">>")]
251    Shr,
252    #[static_text("+=")]
253    PlusEq,
254    #[static_text("-=")]
255    MinusEq,
256    #[static_text("*=")]
257    StarEq,
258    #[static_text("/=")]
259    SlashEq,
260    #[static_text("**=")]
261    StarStarEq,
262    #[static_text("%=")]
263    PercentEq,
264    #[static_text("&=")]
265    AmpEq,
266    #[static_text("|=")]
267    PipeEq,
268    #[static_text("^=")]
269    CaretEq,
270    #[static_text("<<=")]
271    ShlEq,
272    #[static_text(">>=")]
273    ShrEq,
274
275    // ---- tokens: error / sentinel ----
276    /// An unlexable byte — carried into the tree (never dropped) for losslessness.
277    Error,
278    /// Virtual end-of-input. Used by the parser; never emitted into the tree.
279    Eof,
280
281    // ---- nodes: file & top-level ----
282    SourceFile,
283    ExtendsClause,
284    ClassNameDecl,
285    Annotation,
286    AnnotationArgList,
287
288    // ---- nodes: declarations ----
289    InnerClassDecl,
290    ClassBody,
291    FuncDecl,
292    ParamList,
293    Param,
294    VarargParam,
295    VarDecl,
296    ConstDecl,
297    EnumDecl,
298    EnumVariant,
299    SignalDecl,
300    PropertyBody,
301    Getter,
302    Setter,
303    Name,
304
305    // ---- nodes: types ----
306    TypeRef,
307    TypedArray,
308    TypedDict,
309
310    // ---- nodes: statements ----
311    Block,
312    IfStmt,
313    ElifClause,
314    ElseClause,
315    ForStmt,
316    WhileStmt,
317    MatchStmt,
318    MatchArm,
319    ReturnStmt,
320    BreakStmt,
321    ContinueStmt,
322    PassStmt,
323    AssertStmt,
324    BreakpointStmt,
325    ExprStmt,
326    VarStmt,
327
328    // ---- nodes: match patterns ----
329    PatternLiteral,
330    PatternBind,
331    PatternWildcard,
332    PatternArray,
333    PatternDict,
334    PatternRest,
335    PatternGuard,
336
337    // ---- nodes: expressions ----
338    BinExpr,
339    UnaryExpr,
340    TernaryExpr,
341    CastExpr,
342    IsExpr,
343    InExpr,
344    CallExpr,
345    ArgList,
346    IndexExpr,
347    FieldExpr,
348    AwaitExpr,
349    LambdaExpr,
350    ParenExpr,
351    ArrayLit,
352    DictLit,
353    DictEntry,
354    NameRef,
355    Literal,
356    GetNodeExpr,
357    UniqueNodeExpr,
358    PreloadExpr,
359
360    // ---- nodes: error recovery ----
361    ErrorNode,
362
363    /// Count sentinel — keep last (drives the `u32` ↔ kind range).
364    Tombstone,
365}
366
367impl SyntaxKind {
368    /// Trivia carry source bytes but are skipped by the parser (re-attached by the
369    /// tree sink). The synthetic `Newline`/`Indent`/`Dedent` markers are **not**
370    /// trivia — the parser consumes them to recover block structure.
371    #[must_use]
372    pub const fn is_trivia(self) -> bool {
373        matches!(
374            self,
375            Self::Whitespace
376                | Self::LineComment
377                | Self::DocComment
378                | Self::RegionComment
379                | Self::EndRegionComment
380                | Self::LineContinuation
381                | Self::NewlinePhys
382                | Self::Bom
383        )
384    }
385
386    /// The synthetic, zero-width block-structure markers injected by the pre-pass.
387    #[must_use]
388    pub const fn is_synthetic_layout(self) -> bool {
389        matches!(self, Self::Newline | Self::Indent | Self::Dedent)
390    }
391
392    /// Whether this kind is a node (grammar production) rather than a token.
393    #[must_use]
394    pub const fn is_node(self) -> bool {
395        // Nodes are exactly the kinds at/after `SourceFile`.
396        (self as u32) >= (Self::SourceFile as u32)
397    }
398}
399
400/// A resolved (interner-carrying) red node — supports `Display`/`.text()` and the
401/// byte-exact round-trip. This is the public tree type for callers that need text.
402pub type GdNode = cstree::syntax::ResolvedNode<SyntaxKind>;
403/// A resolved red token.
404pub type GdToken = cstree::syntax::ResolvedToken<SyntaxKind>;
405/// A bare (resolver-less) red node — cheap, `Send + Sync`; text needs a resolver.
406pub type SyntaxNode = cstree::syntax::SyntaxNode<SyntaxKind>;
407
408#[cfg(test)]
409mod tests {
410    use super::*;
411    use cstree::build::GreenNodeBuilder;
412    use cstree::syntax::ResolvedNode;
413
414    /// The Step-1 gate: build a tiny `func foo` tree by hand and prove it
415    /// round-trips byte-for-byte — exercising `static_token` (keyword), an interned
416    /// trivia token (whitespace), an interned `Ident`, and a zero-width synthetic
417    /// `Newline` marker that must contribute no bytes.
418    #[test]
419    fn three_node_tree_round_trips() {
420        let mut builder: GreenNodeBuilder<'_, '_, SyntaxKind> = GreenNodeBuilder::new();
421        builder.start_node(SyntaxKind::SourceFile);
422        builder.start_node(SyntaxKind::FuncDecl);
423        builder.static_token(SyntaxKind::FuncKw); // "func"
424        builder.token(SyntaxKind::Whitespace, " ");
425        builder.start_node(SyntaxKind::Name);
426        builder.token(SyntaxKind::Ident, "foo");
427        builder.finish_node(); // Name
428        builder.token(SyntaxKind::Newline, ""); // zero-width synthetic marker
429        builder.finish_node(); // FuncDecl
430        builder.finish_node(); // SourceFile
431
432        let (green, cache) = builder.finish();
433        let interner = cache.unwrap().into_interner().unwrap();
434        let root = ResolvedNode::<SyntaxKind>::new_root_with_resolver(green, interner);
435
436        // Byte-for-byte round-trip — the defining lossless invariant.
437        assert_eq!(root.to_string(), "func foo");
438        assert_eq!(root.kind(), SyntaxKind::SourceFile);
439    }
440
441    #[test]
442    fn raw_kind_round_trips() {
443        // The derived `from_raw`/`into_raw` must be inverses across every variant.
444        for raw in 0..(SyntaxKind::Tombstone as u32) {
445            let kind = <SyntaxKind as Syntax>::from_raw(cstree::RawSyntaxKind(raw));
446            assert_eq!(<SyntaxKind as Syntax>::into_raw(kind).0, raw);
447        }
448    }
449
450    #[test]
451    fn classification_helpers() {
452        assert!(SyntaxKind::Whitespace.is_trivia());
453        assert!(!SyntaxKind::Newline.is_trivia());
454        assert!(SyntaxKind::Indent.is_synthetic_layout());
455        assert!(SyntaxKind::FuncDecl.is_node());
456        assert!(!SyntaxKind::FuncKw.is_node());
457    }
458}