Skip to main content

oak_core/language/
mod.rs

1#[cfg(feature = "serde")]
2use serde::{Deserialize, Serialize};
3use std::{fmt::Debug, hash::Hash};
4
5/// Represents the broad category a language belongs to.
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
7#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
8pub enum LanguageCategory {
9    /// General-purpose programming languages (e.g., Rust, C, Java).
10    Programming,
11    /// Markup and document languages (e.g., Markdown, HTML, Typst).
12    Markup,
13    /// Configuration and data serialization languages (e.g., YAML, JSON, TOML).
14    Config,
15    /// Styling languages (e.g., CSS, Sass, Less).
16    StyleSheet,
17    /// Domain-specific languages or specialized notation (e.g., SQL, Regex, Math).
18    Dsl,
19    /// Modeling languages (e.g., UML, Mermaid, PlantUML).
20    Modeling,
21    /// Other or unclassified.
22    Other,
23}
24
25/// Language definition trait that coordinates all language-related types and behaviors.
26///
27/// This trait serves as the foundation for defining programming languages within the
28/// incremental parsing system. It acts as a marker trait that ties together various
29/// language-specific components like lexers, parsers, and rebuilders.
30///
31/// # Overview
32///
33/// The Language trait is the central abstraction that enables the parsing framework
34/// to be language-agnostic while still providing language-specific functionality.
35/// Each language implementation must define its own types for tokens, elements,
36/// and the root structure of the parsed tree.
37///
38/// # Design Philosophy
39///
40/// The trait follows a compositional design where:
41/// - `TokenType` defines the atomic units of the language (tokens)
42/// - `ElementType` defines the composite structures (nodes)
43/// - `TypedRoot` defines the top-level structure of the parsed document
44///
45/// This separation allows for maximum flexibility while maintaining type safety
46/// and performance characteristics required for incremental parsing.
47///
48/// # Examples
49///
50/// ```rust
51/// # use oak_core::{Language, TokenType, ElementType, UniversalTokenRole, UniversalElementRole};
52/// // Define a simple language
53/// #[derive(Clone)]
54/// struct MyLanguage;
55///
56/// impl Language for MyLanguage {
57///     const NAME: &'static str = "my-language";
58///     type TokenType = MyToken;
59///     type ElementType = MyElement;
60///     type TypedRoot = ();
61/// }
62///
63/// // With corresponding type definitions
64/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
65/// enum MyToken {
66///     Identifier,
67///     EndOfStream,
68/// }
69///
70/// impl TokenType for MyToken {
71///     const END_OF_STREAM: Self = MyToken::EndOfStream;
72///     type Role = UniversalTokenRole;
73///     fn role(&self) -> Self::Role { UniversalTokenRole::None }
74/// }
75///
76/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
77/// enum MyElement {}
78///
79/// impl ElementType for MyElement {
80///     type Role = UniversalElementRole;
81///     fn role(&self) -> Self::Role { UniversalElementRole::None }
82/// }
83/// ```
84pub trait Language: Send + Sync {
85    /// The name of the language (e.g., "rust", "sql").
86    const NAME: &'static str;
87
88    /// The category of the language.
89    const CATEGORY: LanguageCategory = LanguageCategory::Programming;
90
91    /// The token type used to represent different token and node types in the language.
92    ///
93    /// This associated type defines how different syntactic elements (tokens, nodes) are
94    /// categorized and identified within the language. It must implement `Copy` and `Eq`
95    /// to ensure efficient handling in the parsing system.
96    ///
97    /// # Requirements
98    ///
99    /// The token type must:
100    /// - Implement the `TokenType` trait
101    /// - Be copyable to enable efficient passing
102    /// - Support equality comparison for token matching
103    /// - Be sendable across thread boundaries
104    ///
105    /// # Examples
106    ///
107    /// ```
108    /// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
109    /// enum RustSyntaxKind {
110    ///     LetKeyword,
111    ///     Identifier,
112    ///     Number,
113    ///     // ... other token kinds
114    /// }
115    /// ```
116    type TokenType: TokenType;
117
118    /// The element type used to represent composite structures in the parsed tree.
119    ///
120    /// While tokens represent the atomic units of the language, elements represent
121    /// the composite structures formed by combining tokens according to grammar rules.
122    /// This includes expressions, statements, declarations, and other syntactic constructs.
123    ///
124    /// # Requirements
125    ///
126    /// The element type must:
127    /// - Implement the `ElementType` trait
128    /// - Be copyable for efficient handling
129    /// - Support equality comparison
130    /// - Be sendable across thread boundaries
131    type ElementType: ElementType;
132
133    /// The root type for the parsed tree that represents the top-level structure of the language.
134    ///
135    /// This associated type defines the structure of the root node in the parsed tree,
136    /// which typically contains the entire parsed source code organized according to the
137    /// language's grammar rules. The root type serves as the entry point for traversing
138    /// and manipulating the parsed representation.
139    ///
140    /// # Design Considerations
141    ///
142    /// The root type should:
143    /// - Contain references to all top-level language constructs
144    /// - Provide efficient access to the parsed content
145    /// - Support incremental updates when the source changes
146    ///
147    /// # Examples
148    ///
149    /// ```ignore
150    /// struct RustRoot {
151    ///     items: Vec<RustItem>,
152    /// }
153    ///
154    /// struct RustRoot {
155    ///     modules: Vec<Module>,
156    ///     imports: Vec<Import>,
157    ///     declarations: Vec<Declaration>,
158    /// }
159    /// ```
160    type TypedRoot;
161}
162
163/// Token type definitions for tokens in the parsing system.
164///
165/// This module provides the [`TokenType`] trait which serves as the foundation
166/// for defining different types of tokens in the parsing system.
167/// It enables categorization of token elements and provides methods for
168/// identifying their roles in the language grammar.
169///
170/// # Universal Grammar Philosophy
171///
172/// The role mechanism in Oak is inspired by the concept of "Universal Grammar".
173/// While every language has its own unique "Surface Structure" (its specific token kinds),
174/// most share a common "Deep Structure" (syntactic roles).
175///
176/// By mapping language-specific kinds to [`UniversalTokenRole`], we enable generic tools
177/// like highlighters and formatters to work across 100+ languages without deep
178/// knowledge of each one's specific grammar.
179///
180/// # Implementation Guidelines
181///
182/// When implementing this trait for a specific language:
183/// - Use an enum with discriminant values for efficient matching
184/// - Ensure all variants are Copy and Eq for performance
185/// - Include an END_OF_STREAM variant to signal input termination
186/// - Define a `Role` associated type and implement the `role()` method to provide
187///   syntactic context.
188///
189/// # Examples
190///
191/// ```ignore
192/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
193/// enum SimpleToken {
194///     Identifier,
195///     Number,
196///     Plus,
197///     EndOfStream,
198/// }
199///
200/// impl TokenType for SimpleToken {
201///     const END_OF_STREAM: Self = SimpleToken::EndOfStream;
202///     type Role = UniversalTokenRole; // Or a custom Role type
203///
204///     fn role(&self) -> Self::Role {
205///         match self {
206///             SimpleToken::Identifier => UniversalTokenRole::Name,
207///             SimpleToken::Number => UniversalTokenRole::Literal,
208///             SimpleToken::Plus => UniversalTokenRole::Operator,
209///             _ => UniversalTokenRole::None,
210///         }
211///     }
212///
213///     // ... other methods
214/// }
215/// ```
216macro_rules! define_token_type {
217    ($($bound:tt)*) => {
218        /// A trait for types that represent a token's kind in a specific language.
219        pub trait TokenType: Copy + Eq + Hash + Send + Sync + std::fmt::Debug $($bound)* {
220            /// The associated role type for this token kind.
221            type Role: TokenRole;
222
223            /// A constant representing the end of the input stream.
224            const END_OF_STREAM: Self;
225
226            /// Returns the general syntactic role of this token.
227            fn role(&self) -> Self::Role;
228
229            /// Returns true if this token matches the specified language-specific role.
230            fn is_role(&self, role: Self::Role) -> bool {
231                self.role() == role
232            }
233
234            /// Returns true if this token matches the specified universal role.
235            fn is_universal(&self, role: UniversalTokenRole) -> bool {
236                self.role().universal() == role
237            }
238
239            /// Returns true if this token represents a comment.
240            fn is_comment(&self) -> bool {
241                self.is_universal(UniversalTokenRole::Comment)
242            }
243
244            /// Returns true if this token represents whitespace.
245            fn is_whitespace(&self) -> bool {
246                self.is_universal(UniversalTokenRole::Whitespace)
247            }
248
249            /// Returns true if this token represents an error condition.
250            fn is_error(&self) -> bool {
251                self.is_universal(UniversalTokenRole::Error)
252            }
253
254            /// Returns true if this token represents trivia (whitespace, comments, etc.).
255            fn is_ignored(&self) -> bool {
256                self.is_whitespace() || self.is_comment()
257            }
258
259            /// Returns true if this token represents the end of the input stream.
260            fn is_end_of_stream(&self) -> bool {
261                *self == Self::END_OF_STREAM
262            }
263        }
264    };
265}
266
267#[cfg(feature = "serde")]
268define_token_type!(+ Serialize + for<'de> Deserialize<'de>);
269
270#[cfg(not(feature = "serde"))]
271define_token_type!();
272
273/// A trait for types that can represent a token's syntactic role.
274pub trait TokenRole: Copy + Eq + Send {
275    /// Maps this role to a universal, language-agnostic role.
276    fn universal(&self) -> UniversalTokenRole;
277
278    /// Returns a specific name for this role, used for granular highlighting.
279    ///
280    /// For universal roles, this should return the standard scope name (e.g., "keyword").
281    /// For language-specific roles, it can return more specific names (e.g., "keyword.control").
282    fn name(&self) -> &str;
283}
284
285/// Represents the general syntactic role of a token across diverse languages.
286///
287/// # Universal Grammar
288///
289/// This mechanism is inspired by Noam Chomsky's Universal Grammar theory.
290/// It posits that while the "Surface Structure" (specific token kinds) of languages
291/// may vary wildly, they share a common "Deep Structure" (syntactic roles).
292///
293/// In the Oak framework:
294/// - **Surface Structure**: Refers to specific token kinds defined by a language (e.g., Rust's `PubKeyword`).
295/// - **Deep Structure**: Refers to the universal roles defined in this enum (e.g., [`UniversalTokenRole::Keyword`]).
296///
297/// By mapping to these roles, generic tools can identify names, literals, or operators
298/// across 100+ languages without needing to learn the specifics of each grammar.
299#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
300#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
301#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
302pub enum UniversalTokenRole {
303    /// Language reserved words or built-in commands (e.g., 'SELECT', 'let', 'MOV').
304    Keyword,
305    /// Identifiers, labels, keys, tags, or any name-like token.
306    Name,
307    /// Literal values like strings, numbers, booleans, or nulls.
308    Literal,
309    /// An escape sequence or a special character representation within a literal.
310    Escape,
311    /// Mathematical, logical, or structural operators (e.g., '+', '=>', 'LIKE').
312    Operator,
313    /// Structural characters like brackets, commas, semicolons.
314    Punctuation,
315    /// Developer annotations or documentation.
316    Comment,
317    /// Formatting characters like spaces or tabs.
318    Whitespace,
319    /// Malformed or unrecognized content.
320    Error,
321    /// No specific role assigned.
322    None,
323    /// End of stream marker.
324    Eof,
325}
326
327impl TokenRole for UniversalTokenRole {
328    fn universal(&self) -> UniversalTokenRole {
329        *self
330    }
331
332    fn name(&self) -> &str {
333        match *self {
334            UniversalTokenRole::Keyword => "keyword",
335            UniversalTokenRole::Name => "variable.other",
336            UniversalTokenRole::Literal => "constant",
337            UniversalTokenRole::Escape => "constant.character.escape",
338            UniversalTokenRole::Operator => "keyword.operator",
339            UniversalTokenRole::Punctuation => "punctuation",
340            UniversalTokenRole::Comment => "comment",
341            UniversalTokenRole::Whitespace => "punctuation.whitespace",
342            UniversalTokenRole::Error => "invalid",
343            UniversalTokenRole::None => "none",
344            UniversalTokenRole::Eof => "punctuation.eof",
345        }
346    }
347}
348
349/// Element type definitions for nodes in the parsed tree.
350///
351/// While tokens represent the atomic units of a language, elements represent the
352/// composite structures formed by combining tokens according to grammar rules.
353/// This includes expressions, statements, declarations, and other syntactic constructs.
354///
355/// # Universal Grammar Philosophy
356///
357/// Just like tokens, syntax tree elements are mapped from their "Surface Structure"
358/// (language-specific nodes) to a "Deep Structure" via [`UniversalElementRole`].
359///
360/// This allows structural analysis tools (like symbol outline extractors) to
361/// identify [`UniversalElementRole::Binding`] (definitions) or [`UniversalElementRole::Container`]
362/// (scopes/blocks) uniformly across different language families.
363///
364/// # Implementation Guidelines
365///
366/// When implementing this trait for a specific language:
367/// - Use an enum with discriminant values for efficient matching
368/// - Include a Root variant to identify the top-level element
369/// - Include an Error variant for malformed constructs
370/// - Define a `Role` associated type and implement the `role()` method.
371///
372/// # Examples
373///
374/// ```ignore
375/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
376/// enum MyElement {
377///     Root,
378///     FunctionDeclaration,
379///     Block,
380///     Error,
381/// }
382///
383/// impl ElementType for MyElement {
384///     type Role = UniversalElementRole;
385///
386///     fn role(&self) -> Self::Role {
387///         match self {
388///             MyElement::Root => UniversalElementRole::Root,
389///             MyElement::FunctionDeclaration => UniversalElementRole::Binding,
390///             MyElement::Block => UniversalElementRole::Container,
391///             MyElement::Error => UniversalElementRole::Error,
392///         }
393///     }
394///
395///     fn is_root(&self) -> bool {
396///         matches!(self, MyElement::Root)
397///     }
398///
399///     fn is_error(&self) -> bool {
400///         matches!(self, MyElement::Error)
401///     }
402/// }
403/// ```
404macro_rules! define_element_type {
405    ($($bound:tt)*) => {
406        /// A trait for types that represent an element's kind in a syntax tree.
407        pub trait ElementType: Copy + Eq + Hash + Send + Sync + std::fmt::Debug $($bound)* {
408            /// The associated role type for this element kind.
409            type Role: ElementRole;
410
411            /// Returns the general syntactic role of this element.
412            fn role(&self) -> Self::Role;
413
414            /// Returns true if this element matches the specified language-specific role.
415            fn is_role(&self, role: Self::Role) -> bool {
416                self.role() == role
417            }
418
419            /// Returns true if this element matches the specified universal role.
420            fn is_universal(&self, role: UniversalElementRole) -> bool {
421                self.role().universal() == role
422            }
423
424            /// Returns true if this element represents the root of the parsed tree.
425            fn is_root(&self) -> bool {
426                self.is_universal(UniversalElementRole::Root)
427            }
428
429            /// Returns true if this element represents an error condition.
430            fn is_error(&self) -> bool {
431                self.is_universal(UniversalElementRole::Error)
432            }
433        }
434    };
435}
436
437#[cfg(feature = "serde")]
438define_element_type!(+ Serialize + for<'de> Deserialize<'de>);
439
440#[cfg(not(feature = "serde"))]
441define_element_type!();
442
443/// A trait for types that can represent an element's structural role.
444pub trait ElementRole: Copy + Eq + Send {
445    /// Maps this role to a universal, language-agnostic role.
446    fn universal(&self) -> UniversalElementRole;
447
448    /// Returns a specific name for this role, used for granular highlighting.
449    fn name(&self) -> &str;
450}
451
452/// Represents the general structural role of a syntax tree element.
453///
454/// # Universal Grammar
455///
456/// This mechanism is inspired by Noam Chomsky's Universal Grammar theory, applied
457/// here to the structural hierarchy of syntax trees. It posits that while the
458/// "Surface Structure" (the specific production rules of a grammar) varies across
459/// languages, they share a common "Deep Structure" (structural intent).
460///
461/// In the Oak framework, syntax tree elements are categorized by their role:
462/// - **Surface Structure**: Refers to specific node kinds defined by a language
463///   (e.g., Rust's `FnDeclaration`, SQL's `SelectStatement`, or YAML's `Mapping`).
464/// - **Deep Structure**: Refers to the universal structural patterns defined in this enum.
465///
466/// By mapping to these roles, we can raise sophisticated analysis across diverse
467/// language families:
468/// - **Containers & Statements**: Identify hierarchical scopes and their constituents
469///   (e.g., a SQL table is a container, its clauses are statements).
470/// - **Bindings & References**: Identify the flow of information and identifiers
471///   (e.g., an ASM label is a binding, a jump instruction is a reference).
472/// - **Values**: Identify the atomic data payload or expression results.
473///
474/// # Design Philosophy: The 99% Rule
475///
476/// This enum is designed to provide a "sufficiently complete" abstraction for common tool
477/// requirements (Highlighting, Outline, Navigation, and Refactoring) while maintaining
478/// language-agnostic simplicity.
479///
480/// ### 1. Structural Identity (The "What")
481/// Roles describe a node's primary structural responsibility in the tree, not its
482/// domain-specific semantic meaning. For example:
483/// - A "Class" or "Function" is structurally a [`UniversalElementRole::Definition`] and often a [`UniversalElementRole::Container`].
484/// - An "Import" is structurally a [`UniversalElementRole::Statement`] that contains a [`UniversalElementRole::Reference`].
485///
486/// ### 2. Broad Categories (The "How")
487/// We categorize elements into four major structural groups:
488/// - **Flow Control & logic**: [`UniversalElementRole::Statement`], [`UniversalElementRole::Expression`], [`UniversalElementRole::Call`], and [`UniversalElementRole::Root`].
489/// - **Symbol Management**: [`UniversalElementRole::Definition`], [`UniversalElementRole::Binding`], and [`UniversalElementRole::Reference`].
490/// - **Hierarchy & Scoping**: [`UniversalElementRole::Container`].
491/// - **Metadata & Auxiliaries**: [`UniversalElementRole::Typing`], [`UniversalElementRole::Metadata`], [`UniversalElementRole::Attribute`], [`UniversalElementRole::Documentation`], etc.
492///
493/// ### 3. Intent-Based Selection
494/// When a node could fit multiple roles, choose the one that represents its **primary
495/// structural intent**.
496/// - **Example**: In Rust, an `if` expression is both an `Expression` and a `Container`.
497///   However, its primary role in the tree is as an [`UniversalElementRole::Expression`] (producing a value),
498///   whereas its children (the blocks) are [`UniversalElementRole::Container`]s.
499/// - **Example**: In Markdown, a "List" is a [`UniversalElementRole::Container`], while each "ListItem" is a
500///   [`UniversalElementRole::Statement`] within that container.
501///
502/// ### 4. Intentional Exclusions
503/// We intentionally exclude roles that can be represented by combining existing roles or
504/// that require deep semantic analysis:
505/// - **Keyword-specific roles**: Roles like "Loop", "Conditional", or "Module" are excluded.
506///   These are surface-level distinctions. In the Deep Structure, they are all [`UniversalElementRole::Container`]s
507///   or [`UniversalElementRole::Statement`]s.
508/// - **Semantic Relationships**: Roles like "Inheritance", "Implementation", or "Dependency"
509///   are excluded. These are better handled by semantic graph analysis rather than
510///   syntactic tree roles.
511#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
512#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
513#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
514#[non_exhaustive]
515pub enum UniversalElementRole {
516    /// The top-level root of the syntax tree, representing the entire document or source file.
517    Root,
518
519    /// A high-level structural container that defines a scope or logical grouping.
520    Container,
521
522    /// A node that represents the entire declaration or definition of a symbol.
523    ///
524    /// This role identifies the "whole" entity that defines something in the code,
525    /// which is crucial for building symbol trees and navigation outlines.
526    ///
527    /// # Examples
528    /// - **Rust**: The entire `Fn` declaration block, `Struct` item, or `Enum`.
529    /// - **Markdown**: `Heading` or `LinkDefinition`.
530    /// - **SQL**: The whole `CREATE TABLE` or `CREATE PROCEDURE` statement.
531    /// - **ASM**: A `Proc` (procedure) block or a multi-line data definition.
532    /// - **YAML**: A schema-defined object or a complex configuration block.
533    Definition,
534
535    /// A node that specifically performs the act of binding a name to an entity.
536    ///
537    /// Unlike `Definition`, which represents the entire construct, `Binding` targets
538    /// the specific part (usually the identifier) that introduces the name.
539    ///
540    /// # Examples
541    /// - **Rust**: The identifier node in a `let` pattern or function name.
542    /// - **Markdown**: `LinkLabel` in a reference link definition.
543    /// - **SQL**: The `Table` name identifier in `CREATE TABLE`.
544    /// - **ASM**: A `Label` node (e.g., `main:`).
545    /// - **YAML**: The `Key` in a key-value mapping.
546    Binding,
547
548    /// A node that refers to an existing name or entity defined elsewhere.
549    ///
550    /// # Examples
551    /// - **Rust**: `PathExpr` (variable usage) or `MethodCall`.
552    /// - **Markdown**: `LinkReference` or `FootnoteReference`.
553    /// - **SQL**: `ColumnName` in a `SELECT` clause or `TableName` in `FROM`.
554    /// - **ASM**: A `Label` reference in a jump (e.g., `JMP main`).
555    /// - **YAML**: An `Alias` anchor (e.g., `*anchor_name`).
556    Reference,
557
558    /// A node representing a type signature, constraint, or type reference.
559    ///
560    /// This role distinguishes type information from general logic or values,
561    /// which is essential for type checking and intelligent completion.
562    ///
563    /// # Examples
564    /// - **Rust**: `TypePath` (e.g., `: i32`), `GenericArgument`, or `WhereClause`.
565    /// - **SQL**: `DataType` (e.g., `VARCHAR(255)` or `INT`).
566    /// - **ASM**: Size specifiers (e.g., `DWORD`, `PTR`).
567    /// - **TypeScript**: `TypeAnnotation` or `InterfaceDeclaration`.
568    Typing,
569
570    /// Structured comments or documentation nodes attached to other elements.
571    ///
572    /// Unlike raw `Comment` tokens, these are syntax nodes that may contain
573    /// their own internal structure (like Markdown or Tagged parameters).
574    ///
575    /// # Examples
576    /// - **Rust**: `DocComment` (e.g., `/// ...`).
577    /// - **Java**: `Javadoc` blocks.
578    /// - **Python**: `Docstring` literals.
579    Documentation,
580
581    /// High-level annotations, decorators, or macros that provide extra semantic info.
582    ///
583    /// # Metadata vs Attribute
584    /// - **Metadata**: Usually refers to language-level extensions that "decorate" an element
585    ///   from the outside, often affecting compilation or runtime behavior (e.g., Rust attributes).
586    /// - **Attribute**: Usually refers to built-in, structural properties that are part of the
587    ///   element's native definition (e.g., HTML attributes).
588    ///
589    /// # Examples
590    /// - **Rust**: `Attribute` (e.g., `#[derive(...)]`) or `MacroCall`.
591    /// - **Markdown**: `Frontmatter` (YAML/TOML header).
592    /// - **Java/TS**: `↯Decorator` or `↯Annotation`.
593    /// - **Python**: `↯decorator` syntax.
594    Metadata,
595
596    /// A specific property, flag, or attribute-value pair.
597    ///
598    /// Unlike `Metadata`, which decorates an element with external logic, `Attribute`
599    /// represents intrinsic properties defined by the language's schema or structure.
600    ///
601    /// # Examples
602    /// - **HTML/XML**: An `Attribute` (e.g., `id="main"`).
603    /// - **Markdown**: `LinkTitle` or `ImageAlt` text.
604    /// - **YAML**: A specific configuration property.
605    /// - **ASM**: Segment attributes (e.g., `READONLY`, `EXECUTE`).
606    Attribute,
607
608    /// The key part of an attribute, property, or configuration entry.
609    ///
610    /// This role is distinct because:
611    /// - It is not a **Reference** (it doesn't refer to an external symbol).
612    /// - It is not a traditional **Binding** (it doesn't define a symbol in a global or lexical scope).
613    /// - It is not a **Keyword** (it is typically a user-defined or schema-defined identifier).
614    ///
615    /// # Examples
616    /// - **HTML**: The `id` in `id="main"`.
617    /// - **Markdown**: `AttributeName` (in Pandoc-style `{ #id .class };`).
618    /// - **YAML**: The key in a property mapping.
619    /// - **TOML**: The key in a table entry.
620    AttributeKey,
621
622    /// A node that provides additional details or secondary information for another element.
623    ///
624    /// # Examples
625    /// - **Rust**: `GenericParameter` list, `FunctionParameter` list.
626    /// - **SQL**: `Constraint` details.
627    Detail,
628
629    /// A node that represents the name of an element, typically used in declarations.
630    ///
631    /// # Examples
632    /// - **Rust**: The name identifier in a function or struct definition.
633    /// - **HTML**: The tag name in an element.
634    Name,
635
636    /// A discrete syntactic unit within a container, representing a single
637    /// logical entry or instruction.
638    ///
639    /// This typically maps to a **Statement** in programming languages, or a standalone
640    /// instruction in assembly. In markup, it could represent a list item or a table row.
641    ///
642    /// # Examples
643    /// - **Rust**: A `Stmt` inside a block.
644    /// - **Markdown**: `ListItem` or `TableCell`.
645    /// - **SQL**: A standalone `Statement` or a `Clause` (like `WHERE`).
646    /// - **ASM**: A single `Instruction` (e.g., `NOP`).
647    Statement,
648
649    /// A node representing a computed result or a complex logical operation.
650    ///
651    /// Unlike a simple `Value` (which is an atomic literal), an `Expression` involves
652    /// operators or logic that must be evaluated.
653    ///
654    /// # Examples
655    /// - **Rust**: `BinaryExpr`, `UnaryExpr`, or `RangeExpr`.
656    /// - **SQL**: `BinaryOp` in a `WHERE` clause.
657    /// - **Python**: `ListComprehension` or `Lambda`.
658    Expression,
659
660    /// A node that performs an invocation or call to a function, method, or macro.
661    ///
662    /// This role identifies the active execution of a named entity with optional arguments.
663    ///
664    /// # Examples
665    /// - **Rust**: `CallExpr`, `MethodCallExpr`, or `MacroInvocation`.
666    /// - **SQL**: `FunctionCall` (e.g., `COUNT(*)`).
667    /// - **Excel**: A formula call.
668    Call,
669
670    /// A node representing an **atomic** data value or a primitive constant.
671    ///
672    /// This role is strictly for atomic values like numbers, strings, or booleans.
673    /// It **does not** include composite structures like arrays `[]` or objects `{}`,
674    /// which should be categorized as [`UniversalElementRole::Container`].
675    ///
676    /// # Examples
677    /// - **Rust**: `Literal` (strings, numbers, booleans).
678    /// - **Markdown**: `InlineCode`, `Emphasis`, or `Strong`.
679    /// - **SQL**: `Literal` values.
680    /// - **JSON/YAML**: Atomic `Scalar` values (strings, integers, nulls).
681    Value,
682
683    /// A node that acts as a host for content in a different language or a raw
684    /// fragment requiring a separate parsing pass (Language Injection).
685    ///
686    /// # Examples
687    /// - **HTML**: A `<script>` or `<style>` block containing JS/CSS.
688    /// - **Markdown**: `CodeBlock` (host for other languages).
689    /// - **Rust/Java**: A string literal containing SQL (if marked for injection).
690    /// - **PHP**: Raw HTML fragments outside of `<?php ... ?>` tags.
691    Embedded,
692
693    /// A node specifically created to represent a syntax error or recovery point
694    /// in the source code.
695    Error,
696
697    /// No specific structural role assigned or recognized for this element.
698    None,
699}
700
701impl ElementRole for UniversalElementRole {
702    fn universal(&self) -> UniversalElementRole {
703        *self
704    }
705
706    fn name(&self) -> &str {
707        match *self {
708            UniversalElementRole::Container => "meta.block",
709            UniversalElementRole::Statement => "meta.statement",
710            UniversalElementRole::Binding => "variable.other.declaration",
711            UniversalElementRole::Reference => "variable.other.usage",
712            UniversalElementRole::Call => "entity.name.function.call",
713            UniversalElementRole::Expression => "meta.expression",
714            UniversalElementRole::Value => "constant",
715            UniversalElementRole::Definition => "entity.name.function",
716            UniversalElementRole::Typing => "entity.name.type",
717            UniversalElementRole::Metadata => "meta.preprocessor",
718            UniversalElementRole::Attribute => "entity.other.attribute-name",
719            UniversalElementRole::AttributeKey => "entity.other.attribute-name.key",
720            UniversalElementRole::Detail => "meta.detail",
721            UniversalElementRole::Name => "entity.name",
722            UniversalElementRole::Embedded => "meta.embedded",
723            UniversalElementRole::Documentation => "comment.block.documentation",
724            UniversalElementRole::Root => "source",
725            UniversalElementRole::Error => "invalid",
726            UniversalElementRole::None => "none",
727        }
728    }
729}