oak_core/language/
mod.rs

1use serde::{Deserialize, Serialize};
2use std::{fmt::Debug, hash::Hash};
3
4/// Represents the broad category a language belongs to.
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
6pub enum LanguageCategory {
7    /// General-purpose programming languages (e.g., Rust, C, Java).
8    Programming,
9    /// Markup and document languages (e.g., Markdown, HTML, Typst).
10    Markup,
11    /// Configuration and data serialization languages (e.g., YAML, JSON, TOML).
12    Config,
13    /// Domain-specific languages or specialized notation (e.g., SQL, Regex, Math).
14    Dsl,
15    /// Other or unclassified.
16    Other,
17}
18
19/// Language definition trait that coordinates all language-related types and behaviors.
20///
21/// This trait serves as the foundation for defining programming languages within the
22/// incremental parsing system. It acts as a marker trait that ties together various
23/// language-specific components like lexers, parsers, and rebuilders.
24///
25/// # Overview
26///
27/// The Language trait is the central abstraction that enables the parsing framework
28/// to be language-agnostic while still providing language-specific functionality.
29/// Each language implementation must define its own types for tokens, elements,
30/// and the root structure of the parsed tree.
31///
32/// # Design Philosophy
33///
34/// The trait follows a compositional design where:
35/// - `TokenType` defines the atomic units of the language (tokens)
36/// - `ElementType` defines the composite structures (nodes)
37/// - `TypedRoot` defines the top-level structure of the parsed document
38///
39/// This separation allows for maximum flexibility while maintaining type safety
40/// and performance characteristics required for incremental parsing.
41///
42/// # Examples
43///
44/// ```rust
45/// # use oak_core::{Language, TokenType, ElementType, UniversalTokenRole, UniversalElementRole};
46/// // Define a simple language
47/// #[derive(Clone)]
48/// struct MyLanguage;
49///
50/// impl Language for MyLanguage {
51///     const NAME: &'static str = "my-language";
52///     type TokenType = MyToken;
53///     type ElementType = MyElement;
54///     type TypedRoot = ();
55/// }
56///
57/// // With corresponding type definitions
58/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
59/// enum MyToken {
60///     Identifier,
61///     EndOfStream,
62/// }
63///
64/// impl TokenType for MyToken {
65///     const END_OF_STREAM: Self = MyToken::EndOfStream;
66///     type Role = UniversalTokenRole;
67///     fn role(&self) -> Self::Role { UniversalTokenRole::None }
68/// }
69///
70/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
71/// enum MyElement {}
72///
73/// impl ElementType for MyElement {
74///     type Role = UniversalElementRole;
75///     fn role(&self) -> Self::Role { UniversalElementRole::None }
76/// }
77/// ```
78pub trait Language: Send + Sync + 'static {
79    /// The name of the language (e.g., "rust", "sql").
80    const NAME: &'static str;
81
82    /// The category of the language.
83    const CATEGORY: LanguageCategory = LanguageCategory::Programming;
84
85    /// The token type used to represent different token and node types in the language.
86    ///
87    /// This associated type defines how different syntactic elements (tokens, nodes) are
88    /// categorized and identified within the language. It must implement `Copy` and `Eq`
89    /// to ensure efficient handling in the parsing system.
90    ///
91    /// # Requirements
92    ///
93    /// The token type must:
94    /// - Implement the `TokenType` trait
95    /// - Be copyable to enable efficient passing
96    /// - Support equality comparison for token matching
97    /// - Be sendable across thread boundaries
98    ///
99    /// # Examples
100    ///
101    /// ```
102    /// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
103    /// enum RustSyntaxKind {
104    ///     LetKeyword,
105    ///     Identifier,
106    ///     Number,
107    ///     // ... other token kinds
108    /// }
109    /// ```
110    type TokenType: TokenType;
111
112    /// The element type used to represent composite structures in the parsed tree.
113    ///
114    /// While tokens represent the atomic units of the language, elements represent
115    /// the composite structures formed by combining tokens according to grammar rules.
116    /// This includes expressions, statements, declarations, and other syntactic constructs.
117    ///
118    /// # Requirements
119    ///
120    /// The element type must:
121    /// - Implement the `ElementType` trait
122    /// - Be copyable for efficient handling
123    /// - Support equality comparison
124    /// - Be sendable across thread boundaries
125    type ElementType: ElementType;
126
127    /// The root type for the parsed tree that represents the top-level structure of the language.
128    ///
129    /// This associated type defines the structure of the root node in the parsed tree,
130    /// which typically contains the entire parsed source code organized according to the
131    /// language's grammar rules. The root type serves as the entry point for traversing
132    /// and manipulating the parsed representation.
133    ///
134    /// # Design Considerations
135    ///
136    /// The root type should:
137    /// - Contain references to all top-level language constructs
138    /// - Provide efficient access to the parsed content
139    /// - Support incremental updates when the source changes
140    ///
141    /// # Examples
142    ///
143    /// ```ignore
144    /// struct RustRoot {
145    ///     items: Vec<RustItem>,
146    /// }
147    ///
148    /// struct RustRoot {
149    ///     modules: Vec<Module>,
150    ///     imports: Vec<Import>,
151    ///     declarations: Vec<Declaration>,
152    /// }
153    /// ```
154    type TypedRoot;
155}
156
157/// Token type definitions for tokens in the parsing system.
158///
159/// This module provides the [`TokenType`] trait which serves as the foundation
160/// for defining different types of tokens in the parsing system.
161/// It enables categorization of token elements and provides methods for
162/// identifying their roles in the language grammar.
163///
164/// # Universal Grammar Philosophy
165///
166/// The role mechanism in Oak is inspired by the concept of "Universal Grammar".
167/// While every language has its own unique "Surface Structure" (its specific token kinds),
168/// most share a common "Deep Structure" (syntactic roles).
169///
170/// By mapping language-specific kinds to [`UniversalTokenRole`], we enable generic tools
171/// like highlighters and formatters to work across 100+ languages without deep
172/// knowledge of each one's specific grammar.
173///
174/// # Implementation Guidelines
175///
176/// When implementing this trait for a specific language:
177/// - Use an enum with discriminant values for efficient matching
178/// - Ensure all variants are Copy and Eq for performance
179/// - Include an END_OF_STREAM variant to signal input termination
180/// - Define a `Role` associated type and implement the `role()` method to provide
181///   syntactic context.
182///
183/// # Examples
184///
185/// ```ignore
186/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
187/// enum SimpleToken {
188///     Identifier,
189///     Number,
190///     Plus,
191///     EndOfStream,
192/// }
193///
194/// impl TokenType for SimpleToken {
195///     const END_OF_STREAM: Self = SimpleToken::EndOfStream;
196///     type Role = UniversalTokenRole; // Or a custom Role type
197///
198///     fn role(&self) -> Self::Role {
199///         match self {
200///             SimpleToken::Identifier => UniversalTokenRole::Name,
201///             SimpleToken::Number => UniversalTokenRole::Literal,
202///             SimpleToken::Plus => UniversalTokenRole::Operator,
203///             _ => UniversalTokenRole::None,
204///         }
205///     }
206///
207///     // ... other methods
208/// }
209/// ```
210pub trait TokenType: Copy + Eq + Hash + Send + Sync + 'static + std::fmt::Debug {
211    /// The associated role type for this token kind.
212    type Role: TokenRole;
213
214    /// A constant representing the end of the input stream.
215    ///
216    /// This special token type is used to signal that there are no more tokens
217    /// to process in the input. It's essential for parsers to recognize when
218    /// they've reached the end of the source code.
219    ///
220    /// # Implementation Notes
221    ///
222    /// This should be a specific variant of your token enum that represents
223    /// the end-of-stream condition. It's used throughout the parsing framework
224    /// to handle boundary conditions and termination logic.
225    const END_OF_STREAM: Self;
226
227    /// Returns the general syntactic role of this token.
228    ///
229    /// This provides a language-agnostic way for tools to understand the purpose
230    /// of a token (e.g., is it a name, a literal, or a keyword) across diverse
231    /// languages like SQL, ASM, YAML, or Rust.
232    fn role(&self) -> Self::Role;
233
234    /// Returns true if this token matches the specified language-specific role.
235    fn is_role(&self, role: Self::Role) -> bool {
236        self.role() == role
237    }
238
239    /// Returns true if this token matches the specified universal role.
240    fn is_universal(&self, role: UniversalTokenRole) -> bool {
241        self.role().universal() == role
242    }
243
244    /// Returns true if this token represents a comment.
245    ///
246    /// # Default Implementation
247    ///
248    /// Based on [`UniversalTokenRole::Comment`].
249    fn is_comment(&self) -> bool {
250        self.is_universal(UniversalTokenRole::Comment)
251    }
252
253    /// Returns true if this token represents whitespace.
254    ///
255    /// # Default Implementation
256    ///
257    /// Based on [`UniversalTokenRole::Whitespace`].
258    fn is_whitespace(&self) -> bool {
259        self.is_universal(UniversalTokenRole::Whitespace)
260    }
261
262    /// Returns true if this token represents an error condition.
263    ///
264    /// # Default Implementation
265    ///
266    /// Based on [`UniversalTokenRole::Error`].
267    fn is_error(&self) -> bool {
268        self.is_universal(UniversalTokenRole::Error)
269    }
270
271    /// Returns true if this token represents trivia (whitespace, comments, etc.).
272    ///
273    /// Trivia tokens are typically ignored during parsing but preserved for
274    /// formatting and tooling purposes. They don't contribute to the syntactic
275    /// structure of the language but are important for maintaining the original
276    /// source code formatting.
277    ///
278    /// # Default Implementation
279    ///
280    /// The default implementation considers a token as trivia if it is either
281    /// whitespace or a comment. Language implementations can override this
282    /// method if they have additional trivia categories.
283    ///
284    /// # Examples
285    ///
286    /// ```ignore
287    /// // Skip over trivia tokens during parsing
288    /// while current_token.is_ignored() {
289    ///     advance_to_next_token();
290    /// }
291    /// ```
292    fn is_ignored(&self) -> bool {
293        self.is_whitespace() || self.is_comment()
294    }
295
296    /// Returns true if this token represents the end of the input stream.
297    ///
298    /// This method provides a convenient way to check if a token is the
299    /// special END_OF_STREAM token without directly comparing with the constant.
300    ///
301    /// # Examples
302    ///
303    /// ```ignore
304    /// // Loop until we reach the end of the input
305    /// while !current_token.is_end_of_stream() {
306    ///     process_token(current_token);
307    ///     current_token = next_token();
308    /// }
309    /// ```
310    fn is_end_of_stream(&self) -> bool {
311        *self == Self::END_OF_STREAM
312    }
313}
314
315/// A trait for types that can represent a token's syntactic role.
316pub trait TokenRole: Copy + Eq + Send {
317    /// Maps this role to a universal, language-agnostic role.
318    fn universal(&self) -> UniversalTokenRole;
319
320    /// Returns a specific name for this role, used for granular highlighting.
321    ///
322    /// For universal roles, this should return the standard scope name (e.g., "keyword").
323    /// For language-specific roles, it can return more specific names (e.g., "keyword.control").
324    fn name(&self) -> &str;
325}
326
327/// Represents the general syntactic role of a token across diverse languages.
328///
329/// # Universal Grammar
330///
331/// This mechanism is inspired by Noam Chomsky's Universal Grammar theory.
332/// It posits that while the "Surface Structure" (specific token kinds) of languages
333/// may vary wildly, they share a common "Deep Structure" (syntactic roles).
334///
335/// In the Oak framework:
336/// - **Surface Structure**: Refers to specific token kinds defined by a language (e.g., Rust's `PubKeyword`).
337/// - **Deep Structure**: Refers to the universal roles defined in this enum (e.g., [`UniversalTokenRole::Keyword`]).
338///
339/// By mapping to these roles, generic tools can identify names, literals, or operators
340/// across 100+ languages without needing to learn the specifics of each grammar.
341#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
342pub enum UniversalTokenRole {
343    /// Language reserved words or built-in commands (e.g., 'SELECT', 'let', 'MOV').
344    Keyword,
345    /// Identifiers, labels, keys, tags, or any name-like token.
346    Name,
347    /// Literal values like strings, numbers, booleans, or nulls.
348    Literal,
349    /// An escape sequence or a special character representation within a literal.
350    Escape,
351    /// Mathematical, logical, or structural operators (e.g., '+', '=>', 'LIKE').
352    Operator,
353    /// Structural characters like brackets, commas, semicolons.
354    Punctuation,
355    /// Developer annotations or documentation.
356    Comment,
357    /// Formatting characters like spaces or tabs.
358    Whitespace,
359    /// Malformed or unrecognized content.
360    Error,
361    /// No specific role assigned.
362    None,
363    /// End of stream marker.
364    Eof,
365}
366
367impl TokenRole for UniversalTokenRole {
368    fn universal(&self) -> UniversalTokenRole {
369        *self
370    }
371
372    fn name(&self) -> &str {
373        match *self {
374            UniversalTokenRole::Keyword => "keyword",
375            UniversalTokenRole::Name => "variable.other",
376            UniversalTokenRole::Literal => "constant",
377            UniversalTokenRole::Escape => "constant.character.escape",
378            UniversalTokenRole::Operator => "keyword.operator",
379            UniversalTokenRole::Punctuation => "punctuation",
380            UniversalTokenRole::Comment => "comment",
381            UniversalTokenRole::Whitespace => "punctuation.whitespace",
382            UniversalTokenRole::Error => "invalid",
383            UniversalTokenRole::None => "none",
384            UniversalTokenRole::Eof => "punctuation.eof",
385        }
386    }
387}
388
389/// Element type definitions for nodes in the parsed tree.
390///
391/// While tokens represent the atomic units of a language, elements represent the
392/// composite structures formed by combining tokens according to grammar rules.
393/// This includes expressions, statements, declarations, and other syntactic constructs.
394///
395/// # Universal Grammar Philosophy
396///
397/// Just like tokens, syntax tree elements are mapped from their "Surface Structure"
398/// (language-specific nodes) to a "Deep Structure" via [`UniversalElementRole`].
399///
400/// This allows structural analysis tools (like symbol outline extractors) to
401/// identify [`UniversalElementRole::Binding`] (definitions) or [`UniversalElementRole::Container`]
402/// (scopes/blocks) uniformly across different language families.
403///
404/// # Implementation Guidelines
405///
406/// When implementing this trait for a specific language:
407/// - Use an enum with discriminant values for efficient matching
408/// - Include a Root variant to identify the top-level element
409/// - Include an Error variant for malformed constructs
410/// - Define a `Role` associated type and implement the `role()` method.
411///
412/// # Examples
413///
414/// ```ignore
415/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
416/// enum MyElement {
417///     Root,
418///     FunctionDeclaration,
419///     Block,
420///     Error,
421/// }
422///
423/// impl ElementType for MyElement {
424///     type Role = UniversalElementRole;
425///
426///     fn role(&self) -> Self::Role {
427///         match self {
428///             MyElement::Root => UniversalElementRole::Root,
429///             MyElement::FunctionDeclaration => UniversalElementRole::Binding,
430///             MyElement::Block => UniversalElementRole::Container,
431///             MyElement::Error => UniversalElementRole::Error,
432///         }
433///     }
434///
435///     fn is_root(&self) -> bool {
436///         matches!(self, MyElement::Root)
437///     }
438///
439///     fn is_error(&self) -> bool {
440///         matches!(self, MyElement::Error)
441///     }
442/// }
443/// ```
444pub trait ElementType: Copy + Eq + Hash + Send + Sync + 'static + std::fmt::Debug {
445    /// The associated role type for this element kind.
446    type Role: ElementRole;
447
448    /// Returns the general syntactic role of this element.
449    ///
450    /// This helps external tools understand the structural purpose of a node
451    /// (e.g., is it a container, a binding, or a value) without deep language knowledge.
452    fn role(&self) -> Self::Role;
453
454    /// Returns true if this element matches the specified language-specific role.
455    fn is_role(&self, role: Self::Role) -> bool {
456        self.role() == role
457    }
458
459    /// Returns true if this element matches the specified universal role.
460    fn is_universal(&self, role: UniversalElementRole) -> bool {
461        self.role().universal() == role
462    }
463
464    /// Returns true if this element represents the root of the parsed tree.
465    ///
466    /// # Default Implementation
467    ///
468    /// Based on [`UniversalElementRole::Root`].
469    fn is_root(&self) -> bool {
470        self.is_universal(UniversalElementRole::Root)
471    }
472
473    /// Returns true if this element represents an error condition.
474    ///
475    /// # Default Implementation
476    ///
477    /// Based on [`UniversalElementRole::Error`].
478    fn is_error(&self) -> bool {
479        self.is_universal(UniversalElementRole::Error)
480    }
481}
482
483/// A trait for types that can represent an element's structural role.
484pub trait ElementRole: Copy + Eq + Send {
485    /// Maps this role to a universal, language-agnostic role.
486    fn universal(&self) -> UniversalElementRole;
487
488    /// Returns a specific name for this role, used for granular highlighting.
489    fn name(&self) -> &str;
490}
491
492/// Represents the general structural role of a syntax tree element.
493///
494/// # Universal Grammar
495///
496/// This mechanism is inspired by Noam Chomsky's Universal Grammar theory, applied
497/// here to the structural hierarchy of syntax trees. It posits that while the
498/// "Surface Structure" (the specific production rules of a grammar) varies across
499/// languages, they share a common "Deep Structure" (structural intent).
500///
501/// In the Oak framework, syntax tree elements are categorized by their role:
502/// - **Surface Structure**: Refers to specific node kinds defined by a language
503///   (e.g., Rust's `FnDeclaration`, SQL's `SelectStatement`, or YAML's `Mapping`).
504/// - **Deep Structure**: Refers to the universal structural patterns defined in this enum.
505///
506/// By mapping to these roles, we can perform sophisticated analysis across diverse
507/// language families:
508/// - **Containers & Statements**: Identify hierarchical scopes and their constituents
509///   (e.g., a SQL table is a container, its clauses are statements).
510/// - **Bindings & References**: Identify the flow of information and identifiers
511///   (e.g., an ASM label is a binding, a jump instruction is a reference).
512/// - **Values**: Identify the atomic data payload or expression results.
513///
514/// # Design Philosophy: The 99% Rule
515///
516/// This enum is designed to provide a "sufficiently complete" abstraction for common tool
517/// requirements (Highlighting, Outline, Navigation, and Refactoring) while maintaining
518/// language-agnostic simplicity.
519///
520/// ### 1. Structural Identity (The "What")
521/// Roles describe a node's primary structural responsibility in the tree, not its
522/// domain-specific semantic meaning. For example:
523/// - A "Class" or "Function" is structurally a [`Definition`] and often a [`Container`].
524/// - An "Import" is structurally a [`Statement`] that contains a [`Reference`].
525///
526/// ### 2. Broad Categories (The "How")
527/// We categorize elements into four major structural groups:
528/// - **Flow Control & logic**: [`Statement`], [`Expression`], [`Call`], and [`Root`].
529/// - **Symbol Management**: [`Definition`], [`Binding`], and [`Reference`].
530/// - **Hierarchy & Scoping**: [`Container`].
531/// - **Metadata & Auxiliaries**: [`Typing`], [`Metadata`], [`Attribute`], [`Documentation`], etc.
532///
533/// ### 3. Intent-Based Selection
534/// When a node could fit multiple roles, choose the one that represents its **primary
535/// structural intent**.
536/// - **Example**: In Rust, an `if` expression is both an `Expression` and a `Container`.
537///   However, its primary role in the tree is as an [`Expression`] (producing a value),
538///   whereas its children (the blocks) are [`Container`]s.
539/// - **Example**: In Markdown, a "List" is a [`Container`], while each "ListItem" is a
540///   [`Statement`] within that container.
541///
542/// ### 4. Intentional Exclusions
543/// We intentionally exclude roles that can be represented by combining existing roles or
544/// that require deep semantic analysis:
545/// - **Keyword-specific roles**: Roles like "Loop", "Conditional", or "Module" are excluded.
546///   These are surface-level distinctions. In the Deep Structure, they are all [`Container`]s
547///   or [`Statement`]s.
548/// - **Semantic Relationships**: Roles like "Inheritance", "Implementation", or "Dependency"
549///   are excluded. These are better handled by semantic graph analysis rather than
550///   syntactic tree roles.
551#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
552#[non_exhaustive]
553pub enum UniversalElementRole {
554    /// The top-level root of the syntax tree, representing the entire document or source file.
555    Root,
556
557    /// A high-level structural container that defines a scope or logical grouping.
558    Container,
559
560    /// A node that represents the entire declaration or definition of a symbol.
561    ///
562    /// This role identifies the "whole" entity that defines something in the code,
563    /// which is crucial for building symbol trees and navigation outlines.
564    ///
565    /// # Examples
566    /// - **Rust**: The entire `Fn` declaration block, `Struct` item, or `Enum`.
567    /// - **Markdown**: `Heading` or `LinkDefinition`.
568    /// - **SQL**: The whole `CREATE TABLE` or `CREATE PROCEDURE` statement.
569    /// - **ASM**: A `Proc` (procedure) block or a multi-line data definition.
570    /// - **YAML**: A schema-defined object or a complex configuration block.
571    Definition,
572
573    /// A node that specifically performs the act of binding a name to an entity.
574    ///
575    /// Unlike `Definition`, which represents the entire construct, `Binding` targets
576    /// the specific part (usually the identifier) that introduces the name.
577    ///
578    /// # Examples
579    /// - **Rust**: The identifier node in a `let` pattern or function name.
580    /// - **Markdown**: `LinkLabel` in a reference link definition.
581    /// - **SQL**: The `Table` name identifier in `CREATE TABLE`.
582    /// - **ASM**: A `Label` node (e.g., `main:`).
583    /// - **YAML**: The `Key` in a key-value mapping.
584    Binding,
585
586    /// A node that refers to an existing name or entity defined elsewhere.
587    ///
588    /// # Examples
589    /// - **Rust**: `PathExpr` (variable usage) or `MethodCall`.
590    /// - **Markdown**: `LinkReference` or `FootnoteReference`.
591    /// - **SQL**: `ColumnName` in a `SELECT` clause or `TableName` in `FROM`.
592    /// - **ASM**: A `Label` reference in a jump (e.g., `JMP main`).
593    /// - **YAML**: An `Alias` anchor (e.g., `*anchor_name`).
594    Reference,
595
596    /// A node representing a type signature, constraint, or type reference.
597    ///
598    /// This role distinguishes type information from general logic or values,
599    /// which is essential for type checking and intelligent completion.
600    ///
601    /// # Examples
602    /// - **Rust**: `TypePath` (e.g., `: i32`), `GenericArgument`, or `WhereClause`.
603    /// - **SQL**: `DataType` (e.g., `VARCHAR(255)` or `INT`).
604    /// - **ASM**: Size specifiers (e.g., `DWORD`, `PTR`).
605    /// - **TypeScript**: `TypeAnnotation` or `InterfaceDeclaration`.
606    Typing,
607
608    /// Structured comments or documentation nodes attached to other elements.
609    ///
610    /// Unlike raw `Comment` tokens, these are syntax nodes that may contain
611    /// their own internal structure (like Markdown or Tagged parameters).
612    ///
613    /// # Examples
614    /// - **Rust**: `DocComment` (e.g., `/// ...`).
615    /// - **Java**: `Javadoc` blocks.
616    /// - **Python**: `Docstring` literals.
617    Documentation,
618
619    /// High-level annotations, decorators, or macros that provide extra semantic info.
620    ///
621    /// # Metadata vs Attribute
622    /// - **Metadata**: Usually refers to language-level extensions that "decorate" an element
623    ///   from the outside, often affecting compilation or runtime behavior (e.g., Rust attributes).
624    /// - **Attribute**: Usually refers to built-in, structural properties that are part of the
625    ///   element's native definition (e.g., HTML attributes).
626    ///
627    /// # Examples
628    /// - **Rust**: `Attribute` (e.g., `#[derive(...)]`) or `MacroCall`.
629    /// - **Markdown**: `Frontmatter` (YAML/TOML header).
630    /// - **Java/TS**: `@Decorator` or `@Annotation`.
631    /// - **Python**: `@decorator` syntax.
632    Metadata,
633
634    /// A specific property, flag, or attribute-value pair.
635    ///
636    /// Unlike `Metadata`, which decorates an element with external logic, `Attribute`
637    /// represents intrinsic properties defined by the language's schema or structure.
638    ///
639    /// # Examples
640    /// - **HTML/XML**: An `Attribute` (e.g., `id="main"`).
641    /// - **Markdown**: `LinkTitle` or `ImageAlt` text.
642    /// - **YAML**: A specific configuration property.
643    /// - **ASM**: Segment attributes (e.g., `READONLY`, `EXECUTE`).
644    Attribute,
645
646    /// The key part of an attribute, property, or configuration entry.
647    ///
648    /// This role is distinct because:
649    /// - It is not a **Reference** (it doesn't refer to an external symbol).
650    /// - It is not a traditional **Binding** (it doesn't define a symbol in a global or lexical scope).
651    /// - It is not a **Keyword** (it is typically a user-defined or schema-defined identifier).
652    ///
653    /// # Examples
654    /// - **HTML**: The `id` in `id="main"`.
655    /// - **Markdown**: `AttributeName` (in Pandoc-style `{ #id .class }`).
656    /// - **YAML**: The key in a property mapping.
657    /// - **TOML**: The key in a table entry.
658    AttributeKey,
659
660    /// A node that provides additional details or secondary information for another element.
661    ///
662    /// # Examples
663    /// - **Rust**: `GenericParameter` list, `FunctionParameter` list.
664    /// - **SQL**: `Constraint` details.
665    Detail,
666
667    /// A node that represents the name of an element, typically used in declarations.
668    ///
669    /// # Examples
670    /// - **Rust**: The name identifier in a function or struct definition.
671    /// - **HTML**: The tag name in an element.
672    Name,
673
674    /// A discrete syntactic unit within a container, representing a single
675    /// logical entry or instruction.
676    ///
677    /// This typically maps to a **Statement** in programming languages, or a standalone
678    /// instruction in assembly. In markup, it could represent a list item or a table row.
679    ///
680    /// # Examples
681    /// - **Rust**: A `Stmt` inside a block.
682    /// - **Markdown**: `ListItem` or `TableCell`.
683    /// - **SQL**: A standalone `Statement` or a `Clause` (like `WHERE`).
684    /// - **ASM**: A single `Instruction` (e.g., `NOP`).
685    Statement,
686
687    /// A node representing a computed result or a complex logical operation.
688    ///
689    /// Unlike a simple `Value` (which is an atomic literal), an `Expression` involves
690    /// operators or logic that must be evaluated.
691    ///
692    /// # Examples
693    /// - **Rust**: `BinaryExpr`, `UnaryExpr`, or `RangeExpr`.
694    /// - **SQL**: `BinaryOp` in a `WHERE` clause.
695    /// - **Python**: `ListComprehension` or `Lambda`.
696    Expression,
697
698    /// A node that performs an invocation or call to a function, method, or macro.
699    ///
700    /// This role identifies the active execution of a named entity with optional arguments.
701    ///
702    /// # Examples
703    /// - **Rust**: `CallExpr`, `MethodCallExpr`, or `MacroInvocation`.
704    /// - **SQL**: `FunctionCall` (e.g., `COUNT(*)`).
705    /// - **Excel**: A formula call.
706    Call,
707
708    /// A node representing an **atomic** data value or a primitive constant.
709    ///
710    /// This role is strictly for atomic values like numbers, strings, or booleans.
711    /// It **does not** include composite structures like arrays `[]` or objects `{}`,
712    /// which should be categorized as [`UniversalElementRole::Container`].
713    ///
714    /// # Examples
715    /// - **Rust**: `Literal` (strings, numbers, booleans).
716    /// - **Markdown**: `InlineCode`, `Emphasis`, or `Strong`.
717    /// - **SQL**: `Literal` values.
718    /// - **JSON/YAML**: Atomic `Scalar` values (strings, integers, nulls).
719    Value,
720
721    /// A node that acts as a host for content in a different language or a raw
722    /// fragment requiring a separate parsing pass (Language Injection).
723    ///
724    /// # Examples
725    /// - **HTML**: A `<script>` or `<style>` block containing JS/CSS.
726    /// - **Markdown**: `CodeBlock` (host for other languages).
727    /// - **Rust/Java**: A string literal containing SQL (if marked for injection).
728    /// - **PHP**: Raw HTML fragments outside of `<?php ... ?>` tags.
729    Embedded,
730
731    /// A node specifically created to represent a syntax error or recovery point
732    /// in the source code.
733    Error,
734
735    /// No specific structural role assigned or recognized for this element.
736    None,
737}
738
739impl ElementRole for UniversalElementRole {
740    fn universal(&self) -> UniversalElementRole {
741        *self
742    }
743
744    fn name(&self) -> &str {
745        match *self {
746            UniversalElementRole::Container => "meta.block",
747            UniversalElementRole::Statement => "meta.statement",
748            UniversalElementRole::Binding => "variable.other.declaration",
749            UniversalElementRole::Reference => "variable.other.usage",
750            UniversalElementRole::Call => "entity.name.function.call",
751            UniversalElementRole::Expression => "meta.expression",
752            UniversalElementRole::Value => "constant",
753            UniversalElementRole::Definition => "entity.name.function",
754            UniversalElementRole::Typing => "entity.name.type",
755            UniversalElementRole::Metadata => "meta.preprocessor",
756            UniversalElementRole::Attribute => "entity.other.attribute-name",
757            UniversalElementRole::AttributeKey => "entity.other.attribute-name.key",
758            UniversalElementRole::Detail => "meta.detail",
759            UniversalElementRole::Name => "entity.name",
760            UniversalElementRole::Embedded => "meta.embedded",
761            UniversalElementRole::Documentation => "comment.block.documentation",
762            UniversalElementRole::Root => "source",
763            UniversalElementRole::Error => "invalid",
764            UniversalElementRole::None => "none",
765        }
766    }
767}