Skip to main content

oak_core/language/
mod.rs

1#[cfg(feature = "serde")]
2use serde::{Deserialize, Serialize};
3use std::{fmt::Debug, hash::Hash};
4
5/// Represents the broad category a language belongs to.
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
7#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
8pub enum LanguageCategory {
9    /// General-purpose programming languages (e.g., Rust, C, Java).
10    Programming,
11    /// Markup and document languages (e.g., Markdown, HTML, Typst).
12    Markup,
13    /// Configuration and data serialization languages (e.g., YAML, JSON, TOML).
14    Config,
15    /// Styling languages (e.g., CSS, Sass, Less).
16    StyleSheet,
17    /// Domain-specific languages or specialized notation (e.g., SQL, Regex, Math).
18    Dsl,
19    /// Modeling languages (e.g., UML, Mermaid, PlantUML).
20    Modeling,
21    /// Other or unclassified.
22    Other,
23}
24
25/// Language definition trait that coordinates all language-related types and behaviors.
26///
27/// This trait serves as the foundation for defining programming languages within the
28/// incremental parsing system. It acts as a marker trait that ties together various
29/// language-specific components like lexers, parsers, and rebuilders.
30///
31/// # Overview
32///
33/// The Language trait is the central abstraction that enables the parsing framework
34/// to be language-agnostic while still providing language-specific functionality.
35/// Each language implementation must define its own types for tokens, elements,
36/// and the root structure of the parsed tree.
37///
38/// # Design Philosophy
39///
40/// The trait follows a compositional design where:
41/// - `TokenType` defines the atomic units of the language (tokens)
42/// - `ElementType` defines the composite structures (nodes)
43/// - `TypedRoot` defines the top-level structure of the parsed document
44///
45/// This separation allows for maximum flexibility while maintaining type safety
46/// and performance characteristics required for incremental parsing.
47///
48/// # Examples
49///
50/// ```rust
51/// # use oak_core::{Language, TokenType, ElementType, UniversalTokenRole, UniversalElementRole};
52/// // Define a simple language
53/// #[derive(Clone)]
54/// struct MyLanguage;
55///
56/// impl Language for MyLanguage {
57///     const NAME: &'static str = "my-language";
58///     type TokenType = MyToken;
59///     type ElementType = MyElement;
60///     type TypedRoot = ();
61/// }
62///
63/// // With corresponding type definitions
64/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
65/// enum MyToken {
66///     Identifier,
67///     EndOfStream,
68/// }
69///
70/// impl TokenType for MyToken {
71///     const END_OF_STREAM: Self = MyToken::EndOfStream;
72///     type Role = UniversalTokenRole;
73///     fn role(&self) -> Self::Role { UniversalTokenRole::None }
74/// }
75///
76/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
77/// enum MyElement {}
78///
79/// impl ElementType for MyElement {
80///     type Role = UniversalElementRole;
81///     fn role(&self) -> Self::Role { UniversalElementRole::None }
82/// }
83/// ```
84pub trait Language: Send + Sync {
85    /// The name of the language (e.g., "rust", "sql").
86    const NAME: &'static str;
87
88    /// The category of the language.
89    const CATEGORY: LanguageCategory = LanguageCategory::Programming;
90
91    /// The token type used to represent different token and node types in the language.
92    ///
93    /// This associated type defines how different syntactic elements (tokens, nodes) are
94    /// categorized and identified within the language. It must implement `Copy` and `Eq`
95    /// to ensure efficient handling in the parsing system.
96    ///
97    /// # Requirements
98    ///
99    /// The token type must:
100    /// - Implement the `TokenType` trait
101    /// - Be copyable to enable efficient passing
102    /// - Support equality comparison for token matching
103    /// - Be sendable across thread boundaries
104    ///
105    /// # Examples
106    ///
107    /// ```
108    /// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
109    /// enum RustSyntaxKind {
110    ///     LetKeyword,
111    ///     Identifier,
112    ///     Number,
113    ///     // ... other token kinds
114    /// }
115    /// ```
116    type TokenType: TokenType;
117
118    /// The element type used to represent composite structures in the parsed tree.
119    ///
120    /// While tokens represent the atomic units of the language, elements represent
121    /// the composite structures formed by combining tokens according to grammar rules.
122    /// This includes expressions, statements, declarations, and other syntactic constructs.
123    ///
124    /// # Requirements
125    ///
126    /// The element type must:
127    /// - Implement the `ElementType` trait
128    /// - Be copyable for efficient handling
129    /// - Support equality comparison
130    /// - Be sendable across thread boundaries
131    type ElementType: ElementType;
132
133    /// The root type for the parsed tree that represents the top-level structure of the language.
134    ///
135    /// This associated type defines the structure of the root node in the parsed tree,
136    /// which typically contains the entire parsed source code organized according to the
137    /// language's grammar rules. The root type serves as the entry point for traversing
138    /// and manipulating the parsed representation.
139    ///
140    /// # Design Considerations
141    ///
142    /// The root type should:
143    /// - Contain references to all top-level language constructs
144    /// - Provide efficient access to the parsed content
145    /// - Support incremental updates when the source changes
146    ///
147    /// # Examples
148    ///
149    /// ```ignore
150    /// struct RustRoot {
151    ///     items: Vec<RustItem>,
152    /// }
153    ///
154    /// struct RustRoot {
155    ///     modules: Vec<Module>,
156    ///     imports: Vec<Import>,
157    ///     declarations: Vec<Declaration>,
158    /// }
159    /// ```
160    type TypedRoot;
161}
162
163/// Token type definitions for tokens in the parsing system.
164///
165/// This module provides the [`TokenType`] trait which serves as the foundation
166/// for defining different types of tokens in the parsing system.
167/// It enables categorization of token elements and provides methods for
168/// identifying their roles in the language grammar.
169///
170/// # Universal Grammar Philosophy
171///
172/// The role mechanism in Oak is inspired by the concept of "Universal Grammar".
173/// While every language has its own unique "Surface Structure" (its specific token kinds),
174/// most share a common "Deep Structure" (syntactic roles).
175///
176/// By mapping language-specific kinds to [`UniversalTokenRole`], we enable generic tools
177/// like highlighters and formatters to work across 100+ languages without deep
178/// knowledge of each one's specific grammar.
179///
180/// # Implementation Guidelines
181///
182/// When implementing this trait for a specific language:
183/// - Use an enum with discriminant values for efficient matching
184/// - Ensure all variants are Copy and Eq for performance
185/// - Include an END_OF_STREAM variant to signal input termination
186/// - Define a `Role` associated type and implement the `role()` method to provide
187///   syntactic context.
188///
189/// # Examples
190///
191/// ```ignore
192/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
193/// enum SimpleToken {
194///     Identifier,
195///     Number,
196///     Plus,
197///     EndOfStream,
198/// }
199///
200/// impl TokenType for SimpleToken {
201///     const END_OF_STREAM: Self = SimpleToken::EndOfStream;
202///     type Role = UniversalTokenRole; // Or a custom Role type
203///
204///     fn role(&self) -> Self::Role {
205///         match self {
206///             SimpleToken::Identifier => UniversalTokenRole::Name,
207///             SimpleToken::Number => UniversalTokenRole::Literal,
208///             SimpleToken::Plus => UniversalTokenRole::Operator,
209///             _ => UniversalTokenRole::None,
210///         }
211///     }
212///
213///     // ... other methods
214/// }
215/// ```
216pub trait TokenType: Copy + Eq + Hash + Send + Sync + std::fmt::Debug {
217    /// The associated role type for this token kind.
218    type Role: TokenRole;
219
220    /// A constant representing the end of the input stream.
221    ///
222    /// This special token type is used to signal that there are no more tokens
223    /// to process in the input. It's essential for parsers to recognize when
224    /// they've reached the end of the source code.
225    ///
226    /// # Implementation Notes
227    ///
228    /// This should be a specific variant of your token enum that represents
229    /// the end-of-stream condition. It's used throughout the parsing framework
230    /// to handle boundary conditions and termination logic.
231    const END_OF_STREAM: Self;
232
233    /// Returns the general syntactic role of this token.
234    ///
235    /// This provides a language-agnostic way for tools to understand the purpose
236    /// of a token (e.g., is it a name, a literal, or a keyword) across diverse
237    /// languages like SQL, ASM, YAML, or Rust.
238    fn role(&self) -> Self::Role;
239
240    /// Returns true if this token matches the specified language-specific role.
241    fn is_role(&self, role: Self::Role) -> bool {
242        self.role() == role
243    }
244
245    /// Returns true if this token matches the specified universal role.
246    fn is_universal(&self, role: UniversalTokenRole) -> bool {
247        self.role().universal() == role
248    }
249
250    /// Returns true if this token represents a comment.
251    ///
252    /// # Default Implementation
253    ///
254    /// Based on [`UniversalTokenRole::Comment`].
255    fn is_comment(&self) -> bool {
256        self.is_universal(UniversalTokenRole::Comment)
257    }
258
259    /// Returns true if this token represents whitespace.
260    ///
261    /// # Default Implementation
262    ///
263    /// Based on [`UniversalTokenRole::Whitespace`].
264    fn is_whitespace(&self) -> bool {
265        self.is_universal(UniversalTokenRole::Whitespace)
266    }
267
268    /// Returns true if this token represents an error condition.
269    ///
270    /// # Default Implementation
271    ///
272    /// Based on [`UniversalTokenRole::Error`].
273    fn is_error(&self) -> bool {
274        self.is_universal(UniversalTokenRole::Error)
275    }
276
277    /// Returns true if this token represents trivia (whitespace, comments, etc.).
278    ///
279    /// Trivia tokens are typically ignored during parsing but preserved for
280    /// formatting and tooling purposes. They don't contribute to the syntactic
281    /// structure of the language but are important for maintaining the original
282    /// source code formatting.
283    ///
284    /// # Default Implementation
285    ///
286    /// The default implementation considers a token as trivia if it is either
287    /// whitespace or a comment. Language implementations can override this
288    /// method if they have additional trivia categories.
289    ///
290    /// # Examples
291    ///
292    /// ```ignore
293    /// // Skip over trivia tokens during parsing
294    /// while current_token.is_ignored() {
295    ///     advance_to_next_token();
296    /// }
297    /// ```
298    fn is_ignored(&self) -> bool {
299        self.is_whitespace() || self.is_comment()
300    }
301
302    /// Returns true if this token represents the end of the input stream.
303    ///
304    /// This method provides a convenient way to check if a token is the
305    /// special END_OF_STREAM token without directly comparing with the constant.
306    ///
307    /// # Examples
308    ///
309    /// ```ignore
310    /// // Loop until we reach the end of the input
311    /// while !current_token.is_end_of_stream() {
312    ///     process_token(current_token);
313    ///     current_token = next_token();
314    /// }
315    /// ```
316    fn is_end_of_stream(&self) -> bool {
317        *self == Self::END_OF_STREAM
318    }
319}
320
321/// A trait for types that can represent a token's syntactic role.
322pub trait TokenRole: Copy + Eq + Send {
323    /// Maps this role to a universal, language-agnostic role.
324    fn universal(&self) -> UniversalTokenRole;
325
326    /// Returns a specific name for this role, used for granular highlighting.
327    ///
328    /// For universal roles, this should return the standard scope name (e.g., "keyword").
329    /// For language-specific roles, it can return more specific names (e.g., "keyword.control").
330    fn name(&self) -> &str;
331}
332
333/// Represents the general syntactic role of a token across diverse languages.
334///
335/// # Universal Grammar
336///
337/// This mechanism is inspired by Noam Chomsky's Universal Grammar theory.
338/// It posits that while the "Surface Structure" (specific token kinds) of languages
339/// may vary wildly, they share a common "Deep Structure" (syntactic roles).
340///
341/// In the Oak framework:
342/// - **Surface Structure**: Refers to specific token kinds defined by a language (e.g., Rust's `PubKeyword`).
343/// - **Deep Structure**: Refers to the universal roles defined in this enum (e.g., [`UniversalTokenRole::Keyword`]).
344///
345/// By mapping to these roles, generic tools can identify names, literals, or operators
346/// across 100+ languages without needing to learn the specifics of each grammar.
347#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
348#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
349#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
350pub enum UniversalTokenRole {
351    /// Language reserved words or built-in commands (e.g., 'SELECT', 'let', 'MOV').
352    Keyword,
353    /// Identifiers, labels, keys, tags, or any name-like token.
354    Name,
355    /// Literal values like strings, numbers, booleans, or nulls.
356    Literal,
357    /// An escape sequence or a special character representation within a literal.
358    Escape,
359    /// Mathematical, logical, or structural operators (e.g., '+', '=>', 'LIKE').
360    Operator,
361    /// Structural characters like brackets, commas, semicolons.
362    Punctuation,
363    /// Developer annotations or documentation.
364    Comment,
365    /// Formatting characters like spaces or tabs.
366    Whitespace,
367    /// Malformed or unrecognized content.
368    Error,
369    /// No specific role assigned.
370    None,
371    /// End of stream marker.
372    Eof,
373}
374
375impl TokenRole for UniversalTokenRole {
376    fn universal(&self) -> UniversalTokenRole {
377        *self
378    }
379
380    fn name(&self) -> &str {
381        match *self {
382            UniversalTokenRole::Keyword => "keyword",
383            UniversalTokenRole::Name => "variable.other",
384            UniversalTokenRole::Literal => "constant",
385            UniversalTokenRole::Escape => "constant.character.escape",
386            UniversalTokenRole::Operator => "keyword.operator",
387            UniversalTokenRole::Punctuation => "punctuation",
388            UniversalTokenRole::Comment => "comment",
389            UniversalTokenRole::Whitespace => "punctuation.whitespace",
390            UniversalTokenRole::Error => "invalid",
391            UniversalTokenRole::None => "none",
392            UniversalTokenRole::Eof => "punctuation.eof",
393        }
394    }
395}
396
397/// Element type definitions for nodes in the parsed tree.
398///
399/// While tokens represent the atomic units of a language, elements represent the
400/// composite structures formed by combining tokens according to grammar rules.
401/// This includes expressions, statements, declarations, and other syntactic constructs.
402///
403/// # Universal Grammar Philosophy
404///
405/// Just like tokens, syntax tree elements are mapped from their "Surface Structure"
406/// (language-specific nodes) to a "Deep Structure" via [`UniversalElementRole`].
407///
408/// This allows structural analysis tools (like symbol outline extractors) to
409/// identify [`UniversalElementRole::Binding`] (definitions) or [`UniversalElementRole::Container`]
410/// (scopes/blocks) uniformly across different language families.
411///
412/// # Implementation Guidelines
413///
414/// When implementing this trait for a specific language:
415/// - Use an enum with discriminant values for efficient matching
416/// - Include a Root variant to identify the top-level element
417/// - Include an Error variant for malformed constructs
418/// - Define a `Role` associated type and implement the `role()` method.
419///
420/// # Examples
421///
422/// ```ignore
423/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
424/// enum MyElement {
425///     Root,
426///     FunctionDeclaration,
427///     Block,
428///     Error,
429/// }
430///
431/// impl ElementType for MyElement {
432///     type Role = UniversalElementRole;
433///
434///     fn role(&self) -> Self::Role {
435///         match self {
436///             MyElement::Root => UniversalElementRole::Root,
437///             MyElement::FunctionDeclaration => UniversalElementRole::Binding,
438///             MyElement::Block => UniversalElementRole::Container,
439///             MyElement::Error => UniversalElementRole::Error,
440///         }
441///     }
442///
443///     fn is_root(&self) -> bool {
444///         matches!(self, MyElement::Root)
445///     }
446///
447///     fn is_error(&self) -> bool {
448///         matches!(self, MyElement::Error)
449///     }
450/// }
451/// ```
452pub trait ElementType: Copy + Eq + Hash + Send + Sync + std::fmt::Debug {
453    /// The associated role type for this element kind.
454    type Role: ElementRole;
455
456    /// Returns the general syntactic role of this element.
457    ///
458    /// This helps external tools understand the structural purpose of a node
459    /// (e.g., is it a container, a binding, or a value) without deep language knowledge.
460    fn role(&self) -> Self::Role;
461
462    /// Returns true if this element matches the specified language-specific role.
463    fn is_role(&self, role: Self::Role) -> bool {
464        self.role() == role
465    }
466
467    /// Returns true if this element matches the specified universal role.
468    fn is_universal(&self, role: UniversalElementRole) -> bool {
469        self.role().universal() == role
470    }
471
472    /// Returns true if this element represents the root of the parsed tree.
473    ///
474    /// # Default Implementation
475    ///
476    /// Based on [`UniversalElementRole::Root`].
477    fn is_root(&self) -> bool {
478        self.is_universal(UniversalElementRole::Root)
479    }
480
481    /// Returns true if this element represents an error condition.
482    ///
483    /// # Default Implementation
484    ///
485    /// Based on [`UniversalElementRole::Error`].
486    fn is_error(&self) -> bool {
487        self.is_universal(UniversalElementRole::Error)
488    }
489}
490
491/// A trait for types that can represent an element's structural role.
492pub trait ElementRole: Copy + Eq + Send {
493    /// Maps this role to a universal, language-agnostic role.
494    fn universal(&self) -> UniversalElementRole;
495
496    /// Returns a specific name for this role, used for granular highlighting.
497    fn name(&self) -> &str;
498}
499
500/// Represents the general structural role of a syntax tree element.
501///
502/// # Universal Grammar
503///
504/// This mechanism is inspired by Noam Chomsky's Universal Grammar theory, applied
505/// here to the structural hierarchy of syntax trees. It posits that while the
506/// "Surface Structure" (the specific production rules of a grammar) varies across
507/// languages, they share a common "Deep Structure" (structural intent).
508///
509/// In the Oak framework, syntax tree elements are categorized by their role:
510/// - **Surface Structure**: Refers to specific node kinds defined by a language
511///   (e.g., Rust's `FnDeclaration`, SQL's `SelectStatement`, or YAML's `Mapping`).
512/// - **Deep Structure**: Refers to the universal structural patterns defined in this enum.
513///
514/// By mapping to these roles, we can raise sophisticated analysis across diverse
515/// language families:
516/// - **Containers & Statements**: Identify hierarchical scopes and their constituents
517///   (e.g., a SQL table is a container, its clauses are statements).
518/// - **Bindings & References**: Identify the flow of information and identifiers
519///   (e.g., an ASM label is a binding, a jump instruction is a reference).
520/// - **Values**: Identify the atomic data payload or expression results.
521///
522/// # Design Philosophy: The 99% Rule
523///
524/// This enum is designed to provide a "sufficiently complete" abstraction for common tool
525/// requirements (Highlighting, Outline, Navigation, and Refactoring) while maintaining
526/// language-agnostic simplicity.
527///
528/// ### 1. Structural Identity (The "What")
529/// Roles describe a node's primary structural responsibility in the tree, not its
530/// domain-specific semantic meaning. For example:
531/// - A "Class" or "Function" is structurally a [`UniversalElementRole::Definition`] and often a [`UniversalElementRole::Container`].
532/// - An "Import" is structurally a [`UniversalElementRole::Statement`] that contains a [`UniversalElementRole::Reference`].
533///
534/// ### 2. Broad Categories (The "How")
535/// We categorize elements into four major structural groups:
536/// - **Flow Control & logic**: [`UniversalElementRole::Statement`], [`UniversalElementRole::Expression`], [`UniversalElementRole::Call`], and [`UniversalElementRole::Root`].
537/// - **Symbol Management**: [`UniversalElementRole::Definition`], [`UniversalElementRole::Binding`], and [`UniversalElementRole::Reference`].
538/// - **Hierarchy & Scoping**: [`UniversalElementRole::Container`].
539/// - **Metadata & Auxiliaries**: [`UniversalElementRole::Typing`], [`UniversalElementRole::Metadata`], [`UniversalElementRole::Attribute`], [`UniversalElementRole::Documentation`], etc.
540///
541/// ### 3. Intent-Based Selection
542/// When a node could fit multiple roles, choose the one that represents its **primary
543/// structural intent**.
544/// - **Example**: In Rust, an `if` expression is both an `Expression` and a `Container`.
545///   However, its primary role in the tree is as an [`UniversalElementRole::Expression`] (producing a value),
546///   whereas its children (the blocks) are [`UniversalElementRole::Container`]s.
547/// - **Example**: In Markdown, a "List" is a [`UniversalElementRole::Container`], while each "ListItem" is a
548///   [`UniversalElementRole::Statement`] within that container.
549///
550/// ### 4. Intentional Exclusions
551/// We intentionally exclude roles that can be represented by combining existing roles or
552/// that require deep semantic analysis:
553/// - **Keyword-specific roles**: Roles like "Loop", "Conditional", or "Module" are excluded.
554///   These are surface-level distinctions. In the Deep Structure, they are all [`UniversalElementRole::Container`]s
555///   or [`UniversalElementRole::Statement`]s.
556/// - **Semantic Relationships**: Roles like "Inheritance", "Implementation", or "Dependency"
557///   are excluded. These are better handled by semantic graph analysis rather than
558///   syntactic tree roles.
559#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
560#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
561#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
562#[non_exhaustive]
563pub enum UniversalElementRole {
564    /// The top-level root of the syntax tree, representing the entire document or source file.
565    Root,
566
567    /// A high-level structural container that defines a scope or logical grouping.
568    Container,
569
570    /// A node that represents the entire declaration or definition of a symbol.
571    ///
572    /// This role identifies the "whole" entity that defines something in the code,
573    /// which is crucial for building symbol trees and navigation outlines.
574    ///
575    /// # Examples
576    /// - **Rust**: The entire `Fn` declaration block, `Struct` item, or `Enum`.
577    /// - **Markdown**: `Heading` or `LinkDefinition`.
578    /// - **SQL**: The whole `CREATE TABLE` or `CREATE PROCEDURE` statement.
579    /// - **ASM**: A `Proc` (procedure) block or a multi-line data definition.
580    /// - **YAML**: A schema-defined object or a complex configuration block.
581    Definition,
582
583    /// A node that specifically performs the act of binding a name to an entity.
584    ///
585    /// Unlike `Definition`, which represents the entire construct, `Binding` targets
586    /// the specific part (usually the identifier) that introduces the name.
587    ///
588    /// # Examples
589    /// - **Rust**: The identifier node in a `let` pattern or function name.
590    /// - **Markdown**: `LinkLabel` in a reference link definition.
591    /// - **SQL**: The `Table` name identifier in `CREATE TABLE`.
592    /// - **ASM**: A `Label` node (e.g., `main:`).
593    /// - **YAML**: The `Key` in a key-value mapping.
594    Binding,
595
596    /// A node that refers to an existing name or entity defined elsewhere.
597    ///
598    /// # Examples
599    /// - **Rust**: `PathExpr` (variable usage) or `MethodCall`.
600    /// - **Markdown**: `LinkReference` or `FootnoteReference`.
601    /// - **SQL**: `ColumnName` in a `SELECT` clause or `TableName` in `FROM`.
602    /// - **ASM**: A `Label` reference in a jump (e.g., `JMP main`).
603    /// - **YAML**: An `Alias` anchor (e.g., `*anchor_name`).
604    Reference,
605
606    /// A node representing a type signature, constraint, or type reference.
607    ///
608    /// This role distinguishes type information from general logic or values,
609    /// which is essential for type checking and intelligent completion.
610    ///
611    /// # Examples
612    /// - **Rust**: `TypePath` (e.g., `: i32`), `GenericArgument`, or `WhereClause`.
613    /// - **SQL**: `DataType` (e.g., `VARCHAR(255)` or `INT`).
614    /// - **ASM**: Size specifiers (e.g., `DWORD`, `PTR`).
615    /// - **TypeScript**: `TypeAnnotation` or `InterfaceDeclaration`.
616    Typing,
617
618    /// Structured comments or documentation nodes attached to other elements.
619    ///
620    /// Unlike raw `Comment` tokens, these are syntax nodes that may contain
621    /// their own internal structure (like Markdown or Tagged parameters).
622    ///
623    /// # Examples
624    /// - **Rust**: `DocComment` (e.g., `/// ...`).
625    /// - **Java**: `Javadoc` blocks.
626    /// - **Python**: `Docstring` literals.
627    Documentation,
628
629    /// High-level annotations, decorators, or macros that provide extra semantic info.
630    ///
631    /// # Metadata vs Attribute
632    /// - **Metadata**: Usually refers to language-level extensions that "decorate" an element
633    ///   from the outside, often affecting compilation or runtime behavior (e.g., Rust attributes).
634    /// - **Attribute**: Usually refers to built-in, structural properties that are part of the
635    ///   element's native definition (e.g., HTML attributes).
636    ///
637    /// # Examples
638    /// - **Rust**: `Attribute` (e.g., `#[derive(...)]`) or `MacroCall`.
639    /// - **Markdown**: `Frontmatter` (YAML/TOML header).
640    /// - **Java/TS**: `↯Decorator` or `↯Annotation`.
641    /// - **Python**: `↯decorator` syntax.
642    Metadata,
643
644    /// A specific property, flag, or attribute-value pair.
645    ///
646    /// Unlike `Metadata`, which decorates an element with external logic, `Attribute`
647    /// represents intrinsic properties defined by the language's schema or structure.
648    ///
649    /// # Examples
650    /// - **HTML/XML**: An `Attribute` (e.g., `id="main"`).
651    /// - **Markdown**: `LinkTitle` or `ImageAlt` text.
652    /// - **YAML**: A specific configuration property.
653    /// - **ASM**: Segment attributes (e.g., `READONLY`, `EXECUTE`).
654    Attribute,
655
656    /// The key part of an attribute, property, or configuration entry.
657    ///
658    /// This role is distinct because:
659    /// - It is not a **Reference** (it doesn't refer to an external symbol).
660    /// - It is not a traditional **Binding** (it doesn't define a symbol in a global or lexical scope).
661    /// - It is not a **Keyword** (it is typically a user-defined or schema-defined identifier).
662    ///
663    /// # Examples
664    /// - **HTML**: The `id` in `id="main"`.
665    /// - **Markdown**: `AttributeName` (in Pandoc-style `{ #id .class };`).
666    /// - **YAML**: The key in a property mapping.
667    /// - **TOML**: The key in a table entry.
668    AttributeKey,
669
670    /// A node that provides additional details or secondary information for another element.
671    ///
672    /// # Examples
673    /// - **Rust**: `GenericParameter` list, `FunctionParameter` list.
674    /// - **SQL**: `Constraint` details.
675    Detail,
676
677    /// A node that represents the name of an element, typically used in declarations.
678    ///
679    /// # Examples
680    /// - **Rust**: The name identifier in a function or struct definition.
681    /// - **HTML**: The tag name in an element.
682    Name,
683
684    /// A discrete syntactic unit within a container, representing a single
685    /// logical entry or instruction.
686    ///
687    /// This typically maps to a **Statement** in programming languages, or a standalone
688    /// instruction in assembly. In markup, it could represent a list item or a table row.
689    ///
690    /// # Examples
691    /// - **Rust**: A `Stmt` inside a block.
692    /// - **Markdown**: `ListItem` or `TableCell`.
693    /// - **SQL**: A standalone `Statement` or a `Clause` (like `WHERE`).
694    /// - **ASM**: A single `Instruction` (e.g., `NOP`).
695    Statement,
696
697    /// A node representing a computed result or a complex logical operation.
698    ///
699    /// Unlike a simple `Value` (which is an atomic literal), an `Expression` involves
700    /// operators or logic that must be evaluated.
701    ///
702    /// # Examples
703    /// - **Rust**: `BinaryExpr`, `UnaryExpr`, or `RangeExpr`.
704    /// - **SQL**: `BinaryOp` in a `WHERE` clause.
705    /// - **Python**: `ListComprehension` or `Lambda`.
706    Expression,
707
708    /// A node that performs an invocation or call to a function, method, or macro.
709    ///
710    /// This role identifies the active execution of a named entity with optional arguments.
711    ///
712    /// # Examples
713    /// - **Rust**: `CallExpr`, `MethodCallExpr`, or `MacroInvocation`.
714    /// - **SQL**: `FunctionCall` (e.g., `COUNT(*)`).
715    /// - **Excel**: A formula call.
716    Call,
717
718    /// A node representing an **atomic** data value or a primitive constant.
719    ///
720    /// This role is strictly for atomic values like numbers, strings, or booleans.
721    /// It **does not** include composite structures like arrays `[]` or objects `{}`,
722    /// which should be categorized as [`UniversalElementRole::Container`].
723    ///
724    /// # Examples
725    /// - **Rust**: `Literal` (strings, numbers, booleans).
726    /// - **Markdown**: `InlineCode`, `Emphasis`, or `Strong`.
727    /// - **SQL**: `Literal` values.
728    /// - **JSON/YAML**: Atomic `Scalar` values (strings, integers, nulls).
729    Value,
730
731    /// A node that acts as a host for content in a different language or a raw
732    /// fragment requiring a separate parsing pass (Language Injection).
733    ///
734    /// # Examples
735    /// - **HTML**: A `<script>` or `<style>` block containing JS/CSS.
736    /// - **Markdown**: `CodeBlock` (host for other languages).
737    /// - **Rust/Java**: A string literal containing SQL (if marked for injection).
738    /// - **PHP**: Raw HTML fragments outside of `<?php ... ?>` tags.
739    Embedded,
740
741    /// A node specifically created to represent a syntax error or recovery point
742    /// in the source code.
743    Error,
744
745    /// No specific structural role assigned or recognized for this element.
746    None,
747}
748
749impl ElementRole for UniversalElementRole {
750    fn universal(&self) -> UniversalElementRole {
751        *self
752    }
753
754    fn name(&self) -> &str {
755        match *self {
756            UniversalElementRole::Container => "meta.block",
757            UniversalElementRole::Statement => "meta.statement",
758            UniversalElementRole::Binding => "variable.other.declaration",
759            UniversalElementRole::Reference => "variable.other.usage",
760            UniversalElementRole::Call => "entity.name.function.call",
761            UniversalElementRole::Expression => "meta.expression",
762            UniversalElementRole::Value => "constant",
763            UniversalElementRole::Definition => "entity.name.function",
764            UniversalElementRole::Typing => "entity.name.type",
765            UniversalElementRole::Metadata => "meta.preprocessor",
766            UniversalElementRole::Attribute => "entity.other.attribute-name",
767            UniversalElementRole::AttributeKey => "entity.other.attribute-name.key",
768            UniversalElementRole::Detail => "meta.detail",
769            UniversalElementRole::Name => "entity.name",
770            UniversalElementRole::Embedded => "meta.embedded",
771            UniversalElementRole::Documentation => "comment.block.documentation",
772            UniversalElementRole::Root => "source",
773            UniversalElementRole::Error => "invalid",
774            UniversalElementRole::None => "none",
775        }
776    }
777}