Skip to main content

oak_core/language/
mod.rs

1#[cfg(feature = "serde")]
2use serde::{Deserialize, Serialize};
3use std::{fmt::Debug, hash::Hash};
4
5/// Represents the broad category a language belongs to.
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
7#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
8pub enum LanguageCategory {
9    /// General-purpose programming languages (e.g., Rust, C, Java).
10    Programming,
11    /// Markup and document languages (e.g., Markdown, HTML, Typst).
12    Markup,
13    /// Configuration and data serialization languages (e.g., YAML, JSON, TOML).
14    Config,
15    /// Styling languages (e.g., CSS, Sass, Less).
16    StyleSheet,
17    /// Domain-specific languages or specialized notation (e.g., SQL, Regex, Math).
18    Dsl,
19    /// Other or unclassified.
20    Other,
21}
22
23/// Language definition trait that coordinates all language-related types and behaviors.
24///
25/// This trait serves as the foundation for defining programming languages within the
26/// incremental parsing system. It acts as a marker trait that ties together various
27/// language-specific components like lexers, parsers, and rebuilders.
28///
29/// # Overview
30///
31/// The Language trait is the central abstraction that enables the parsing framework
32/// to be language-agnostic while still providing language-specific functionality.
33/// Each language implementation must define its own types for tokens, elements,
34/// and the root structure of the parsed tree.
35///
36/// # Design Philosophy
37///
38/// The trait follows a compositional design where:
39/// - `TokenType` defines the atomic units of the language (tokens)
40/// - `ElementType` defines the composite structures (nodes)
41/// - `TypedRoot` defines the top-level structure of the parsed document
42///
43/// This separation allows for maximum flexibility while maintaining type safety
44/// and performance characteristics required for incremental parsing.
45///
46/// # Examples
47///
48/// ```rust
49/// # use oak_core::{Language, TokenType, ElementType, UniversalTokenRole, UniversalElementRole};
50/// // Define a simple language
51/// #[derive(Clone)]
52/// struct MyLanguage;
53///
54/// impl Language for MyLanguage {
55///     const NAME: &'static str = "my-language";
56///     type TokenType = MyToken;
57///     type ElementType = MyElement;
58///     type TypedRoot = ();
59/// }
60///
61/// // With corresponding type definitions
62/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
63/// enum MyToken {
64///     Identifier,
65///     EndOfStream,
66/// }
67///
68/// impl TokenType for MyToken {
69///     const END_OF_STREAM: Self = MyToken::EndOfStream;
70///     type Role = UniversalTokenRole;
71///     fn role(&self) -> Self::Role { UniversalTokenRole::None }
72/// }
73///
74/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
75/// enum MyElement {}
76///
77/// impl ElementType for MyElement {
78///     type Role = UniversalElementRole;
79///     fn role(&self) -> Self::Role { UniversalElementRole::None }
80/// }
81/// ```
82pub trait Language: Send + Sync + 'static {
83    /// The name of the language (e.g., "rust", "sql").
84    const NAME: &'static str;
85
86    /// The category of the language.
87    const CATEGORY: LanguageCategory = LanguageCategory::Programming;
88
89    /// The token type used to represent different token and node types in the language.
90    ///
91    /// This associated type defines how different syntactic elements (tokens, nodes) are
92    /// categorized and identified within the language. It must implement `Copy` and `Eq`
93    /// to ensure efficient handling in the parsing system.
94    ///
95    /// # Requirements
96    ///
97    /// The token type must:
98    /// - Implement the `TokenType` trait
99    /// - Be copyable to enable efficient passing
100    /// - Support equality comparison for token matching
101    /// - Be sendable across thread boundaries
102    ///
103    /// # Examples
104    ///
105    /// ```
106    /// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
107    /// enum RustSyntaxKind {
108    ///     LetKeyword,
109    ///     Identifier,
110    ///     Number,
111    ///     // ... other token kinds
112    /// }
113    /// ```
114    type TokenType: TokenType;
115
116    /// The element type used to represent composite structures in the parsed tree.
117    ///
118    /// While tokens represent the atomic units of the language, elements represent
119    /// the composite structures formed by combining tokens according to grammar rules.
120    /// This includes expressions, statements, declarations, and other syntactic constructs.
121    ///
122    /// # Requirements
123    ///
124    /// The element type must:
125    /// - Implement the `ElementType` trait
126    /// - Be copyable for efficient handling
127    /// - Support equality comparison
128    /// - Be sendable across thread boundaries
129    type ElementType: ElementType;
130
131    /// The root type for the parsed tree that represents the top-level structure of the language.
132    ///
133    /// This associated type defines the structure of the root node in the parsed tree,
134    /// which typically contains the entire parsed source code organized according to the
135    /// language's grammar rules. The root type serves as the entry point for traversing
136    /// and manipulating the parsed representation.
137    ///
138    /// # Design Considerations
139    ///
140    /// The root type should:
141    /// - Contain references to all top-level language constructs
142    /// - Provide efficient access to the parsed content
143    /// - Support incremental updates when the source changes
144    ///
145    /// # Examples
146    ///
147    /// ```ignore
148    /// struct RustRoot {
149    ///     items: Vec<RustItem>,
150    /// }
151    ///
152    /// struct RustRoot {
153    ///     modules: Vec<Module>,
154    ///     imports: Vec<Import>,
155    ///     declarations: Vec<Declaration>,
156    /// }
157    /// ```
158    type TypedRoot;
159}
160
161/// Token type definitions for tokens in the parsing system.
162///
163/// This module provides the [`TokenType`] trait which serves as the foundation
164/// for defining different types of tokens in the parsing system.
165/// It enables categorization of token elements and provides methods for
166/// identifying their roles in the language grammar.
167///
168/// # Universal Grammar Philosophy
169///
170/// The role mechanism in Oak is inspired by the concept of "Universal Grammar".
171/// While every language has its own unique "Surface Structure" (its specific token kinds),
172/// most share a common "Deep Structure" (syntactic roles).
173///
174/// By mapping language-specific kinds to [`UniversalTokenRole`], we enable generic tools
175/// like highlighters and formatters to work across 100+ languages without deep
176/// knowledge of each one's specific grammar.
177///
178/// # Implementation Guidelines
179///
180/// When implementing this trait for a specific language:
181/// - Use an enum with discriminant values for efficient matching
182/// - Ensure all variants are Copy and Eq for performance
183/// - Include an END_OF_STREAM variant to signal input termination
184/// - Define a `Role` associated type and implement the `role()` method to provide
185///   syntactic context.
186///
187/// # Examples
188///
189/// ```ignore
190/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
191/// enum SimpleToken {
192///     Identifier,
193///     Number,
194///     Plus,
195///     EndOfStream,
196/// }
197///
198/// impl TokenType for SimpleToken {
199///     const END_OF_STREAM: Self = SimpleToken::EndOfStream;
200///     type Role = UniversalTokenRole; // Or a custom Role type
201///
202///     fn role(&self) -> Self::Role {
203///         match self {
204///             SimpleToken::Identifier => UniversalTokenRole::Name,
205///             SimpleToken::Number => UniversalTokenRole::Literal,
206///             SimpleToken::Plus => UniversalTokenRole::Operator,
207///             _ => UniversalTokenRole::None,
208///         }
209///     }
210///
211///     // ... other methods
212/// }
213/// ```
214pub trait TokenType: Copy + Eq + Hash + Send + Sync + 'static + std::fmt::Debug {
215    /// The associated role type for this token kind.
216    type Role: TokenRole;
217
218    /// A constant representing the end of the input stream.
219    ///
220    /// This special token type is used to signal that there are no more tokens
221    /// to process in the input. It's essential for parsers to recognize when
222    /// they've reached the end of the source code.
223    ///
224    /// # Implementation Notes
225    ///
226    /// This should be a specific variant of your token enum that represents
227    /// the end-of-stream condition. It's used throughout the parsing framework
228    /// to handle boundary conditions and termination logic.
229    const END_OF_STREAM: Self;
230
231    /// Returns the general syntactic role of this token.
232    ///
233    /// This provides a language-agnostic way for tools to understand the purpose
234    /// of a token (e.g., is it a name, a literal, or a keyword) across diverse
235    /// languages like SQL, ASM, YAML, or Rust.
236    fn role(&self) -> Self::Role;
237
238    /// Returns true if this token matches the specified language-specific role.
239    fn is_role(&self, role: Self::Role) -> bool {
240        self.role() == role
241    }
242
243    /// Returns true if this token matches the specified universal role.
244    fn is_universal(&self, role: UniversalTokenRole) -> bool {
245        self.role().universal() == role
246    }
247
248    /// Returns true if this token represents a comment.
249    ///
250    /// # Default Implementation
251    ///
252    /// Based on [`UniversalTokenRole::Comment`].
253    fn is_comment(&self) -> bool {
254        self.is_universal(UniversalTokenRole::Comment)
255    }
256
257    /// Returns true if this token represents whitespace.
258    ///
259    /// # Default Implementation
260    ///
261    /// Based on [`UniversalTokenRole::Whitespace`].
262    fn is_whitespace(&self) -> bool {
263        self.is_universal(UniversalTokenRole::Whitespace)
264    }
265
266    /// Returns true if this token represents an error condition.
267    ///
268    /// # Default Implementation
269    ///
270    /// Based on [`UniversalTokenRole::Error`].
271    fn is_error(&self) -> bool {
272        self.is_universal(UniversalTokenRole::Error)
273    }
274
275    /// Returns true if this token represents trivia (whitespace, comments, etc.).
276    ///
277    /// Trivia tokens are typically ignored during parsing but preserved for
278    /// formatting and tooling purposes. They don't contribute to the syntactic
279    /// structure of the language but are important for maintaining the original
280    /// source code formatting.
281    ///
282    /// # Default Implementation
283    ///
284    /// The default implementation considers a token as trivia if it is either
285    /// whitespace or a comment. Language implementations can override this
286    /// method if they have additional trivia categories.
287    ///
288    /// # Examples
289    ///
290    /// ```ignore
291    /// // Skip over trivia tokens during parsing
292    /// while current_token.is_ignored() {
293    ///     advance_to_next_token();
294    /// }
295    /// ```
296    fn is_ignored(&self) -> bool {
297        self.is_whitespace() || self.is_comment()
298    }
299
300    /// Returns true if this token represents the end of the input stream.
301    ///
302    /// This method provides a convenient way to check if a token is the
303    /// special END_OF_STREAM token without directly comparing with the constant.
304    ///
305    /// # Examples
306    ///
307    /// ```ignore
308    /// // Loop until we reach the end of the input
309    /// while !current_token.is_end_of_stream() {
310    ///     process_token(current_token);
311    ///     current_token = next_token();
312    /// }
313    /// ```
314    fn is_end_of_stream(&self) -> bool {
315        *self == Self::END_OF_STREAM
316    }
317}
318
319/// A trait for types that can represent a token's syntactic role.
320pub trait TokenRole: Copy + Eq + Send {
321    /// Maps this role to a universal, language-agnostic role.
322    fn universal(&self) -> UniversalTokenRole;
323
324    /// Returns a specific name for this role, used for granular highlighting.
325    ///
326    /// For universal roles, this should return the standard scope name (e.g., "keyword").
327    /// For language-specific roles, it can return more specific names (e.g., "keyword.control").
328    fn name(&self) -> &str;
329}
330
331/// Represents the general syntactic role of a token across diverse languages.
332///
333/// # Universal Grammar
334///
335/// This mechanism is inspired by Noam Chomsky's Universal Grammar theory.
336/// It posits that while the "Surface Structure" (specific token kinds) of languages
337/// may vary wildly, they share a common "Deep Structure" (syntactic roles).
338///
339/// In the Oak framework:
340/// - **Surface Structure**: Refers to specific token kinds defined by a language (e.g., Rust's `PubKeyword`).
341/// - **Deep Structure**: Refers to the universal roles defined in this enum (e.g., [`UniversalTokenRole::Keyword`]).
342///
343/// By mapping to these roles, generic tools can identify names, literals, or operators
344/// across 100+ languages without needing to learn the specifics of each grammar.
345#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
346#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
347#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
348pub enum UniversalTokenRole {
349    /// Language reserved words or built-in commands (e.g., 'SELECT', 'let', 'MOV').
350    Keyword,
351    /// Identifiers, labels, keys, tags, or any name-like token.
352    Name,
353    /// Literal values like strings, numbers, booleans, or nulls.
354    Literal,
355    /// An escape sequence or a special character representation within a literal.
356    Escape,
357    /// Mathematical, logical, or structural operators (e.g., '+', '=>', 'LIKE').
358    Operator,
359    /// Structural characters like brackets, commas, semicolons.
360    Punctuation,
361    /// Developer annotations or documentation.
362    Comment,
363    /// Formatting characters like spaces or tabs.
364    Whitespace,
365    /// Malformed or unrecognized content.
366    Error,
367    /// No specific role assigned.
368    None,
369    /// End of stream marker.
370    Eof,
371}
372
373impl TokenRole for UniversalTokenRole {
374    fn universal(&self) -> UniversalTokenRole {
375        *self
376    }
377
378    fn name(&self) -> &str {
379        match *self {
380            UniversalTokenRole::Keyword => "keyword",
381            UniversalTokenRole::Name => "variable.other",
382            UniversalTokenRole::Literal => "constant",
383            UniversalTokenRole::Escape => "constant.character.escape",
384            UniversalTokenRole::Operator => "keyword.operator",
385            UniversalTokenRole::Punctuation => "punctuation",
386            UniversalTokenRole::Comment => "comment",
387            UniversalTokenRole::Whitespace => "punctuation.whitespace",
388            UniversalTokenRole::Error => "invalid",
389            UniversalTokenRole::None => "none",
390            UniversalTokenRole::Eof => "punctuation.eof",
391        }
392    }
393}
394
395/// Element type definitions for nodes in the parsed tree.
396///
397/// While tokens represent the atomic units of a language, elements represent the
398/// composite structures formed by combining tokens according to grammar rules.
399/// This includes expressions, statements, declarations, and other syntactic constructs.
400///
401/// # Universal Grammar Philosophy
402///
403/// Just like tokens, syntax tree elements are mapped from their "Surface Structure"
404/// (language-specific nodes) to a "Deep Structure" via [`UniversalElementRole`].
405///
406/// This allows structural analysis tools (like symbol outline extractors) to
407/// identify [`UniversalElementRole::Binding`] (definitions) or [`UniversalElementRole::Container`]
408/// (scopes/blocks) uniformly across different language families.
409///
410/// # Implementation Guidelines
411///
412/// When implementing this trait for a specific language:
413/// - Use an enum with discriminant values for efficient matching
414/// - Include a Root variant to identify the top-level element
415/// - Include an Error variant for malformed constructs
416/// - Define a `Role` associated type and implement the `role()` method.
417///
418/// # Examples
419///
420/// ```ignore
421/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
422/// enum MyElement {
423///     Root,
424///     FunctionDeclaration,
425///     Block,
426///     Error,
427/// }
428///
429/// impl ElementType for MyElement {
430///     type Role = UniversalElementRole;
431///
432///     fn role(&self) -> Self::Role {
433///         match self {
434///             MyElement::Root => UniversalElementRole::Root,
435///             MyElement::FunctionDeclaration => UniversalElementRole::Binding,
436///             MyElement::Block => UniversalElementRole::Container,
437///             MyElement::Error => UniversalElementRole::Error,
438///         }
439///     }
440///
441///     fn is_root(&self) -> bool {
442///         matches!(self, MyElement::Root)
443///     }
444///
445///     fn is_error(&self) -> bool {
446///         matches!(self, MyElement::Error)
447///     }
448/// }
449/// ```
450pub trait ElementType: Copy + Eq + Hash + Send + Sync + 'static + std::fmt::Debug {
451    /// The associated role type for this element kind.
452    type Role: ElementRole;
453
454    /// Returns the general syntactic role of this element.
455    ///
456    /// This helps external tools understand the structural purpose of a node
457    /// (e.g., is it a container, a binding, or a value) without deep language knowledge.
458    fn role(&self) -> Self::Role;
459
460    /// Returns true if this element matches the specified language-specific role.
461    fn is_role(&self, role: Self::Role) -> bool {
462        self.role() == role
463    }
464
465    /// Returns true if this element matches the specified universal role.
466    fn is_universal(&self, role: UniversalElementRole) -> bool {
467        self.role().universal() == role
468    }
469
470    /// Returns true if this element represents the root of the parsed tree.
471    ///
472    /// # Default Implementation
473    ///
474    /// Based on [`UniversalElementRole::Root`].
475    fn is_root(&self) -> bool {
476        self.is_universal(UniversalElementRole::Root)
477    }
478
479    /// Returns true if this element represents an error condition.
480    ///
481    /// # Default Implementation
482    ///
483    /// Based on [`UniversalElementRole::Error`].
484    fn is_error(&self) -> bool {
485        self.is_universal(UniversalElementRole::Error)
486    }
487}
488
489/// A trait for types that can represent an element's structural role.
490pub trait ElementRole: Copy + Eq + Send {
491    /// Maps this role to a universal, language-agnostic role.
492    fn universal(&self) -> UniversalElementRole;
493
494    /// Returns a specific name for this role, used for granular highlighting.
495    fn name(&self) -> &str;
496}
497
498/// Represents the general structural role of a syntax tree element.
499///
500/// # Universal Grammar
501///
502/// This mechanism is inspired by Noam Chomsky's Universal Grammar theory, applied
503/// here to the structural hierarchy of syntax trees. It posits that while the
504/// "Surface Structure" (the specific production rules of a grammar) varies across
505/// languages, they share a common "Deep Structure" (structural intent).
506///
507/// In the Oak framework, syntax tree elements are categorized by their role:
508/// - **Surface Structure**: Refers to specific node kinds defined by a language
509///   (e.g., Rust's `FnDeclaration`, SQL's `SelectStatement`, or YAML's `Mapping`).
510/// - **Deep Structure**: Refers to the universal structural patterns defined in this enum.
511///
512/// By mapping to these roles, we can perform sophisticated analysis across diverse
513/// language families:
514/// - **Containers & Statements**: Identify hierarchical scopes and their constituents
515///   (e.g., a SQL table is a container, its clauses are statements).
516/// - **Bindings & References**: Identify the flow of information and identifiers
517///   (e.g., an ASM label is a binding, a jump instruction is a reference).
518/// - **Values**: Identify the atomic data payload or expression results.
519///
520/// # Design Philosophy: The 99% Rule
521///
522/// This enum is designed to provide a "sufficiently complete" abstraction for common tool
523/// requirements (Highlighting, Outline, Navigation, and Refactoring) while maintaining
524/// language-agnostic simplicity.
525///
526/// ### 1. Structural Identity (The "What")
527/// Roles describe a node's primary structural responsibility in the tree, not its
528/// domain-specific semantic meaning. For example:
529/// - A "Class" or "Function" is structurally a [`Definition`] and often a [`Container`].
530/// - An "Import" is structurally a [`Statement`] that contains a [`Reference`].
531///
532/// ### 2. Broad Categories (The "How")
533/// We categorize elements into four major structural groups:
534/// - **Flow Control & logic**: [`Statement`], [`Expression`], [`Call`], and [`Root`].
535/// - **Symbol Management**: [`Definition`], [`Binding`], and [`Reference`].
536/// - **Hierarchy & Scoping**: [`Container`].
537/// - **Metadata & Auxiliaries**: [`Typing`], [`Metadata`], [`Attribute`], [`Documentation`], etc.
538///
539/// ### 3. Intent-Based Selection
540/// When a node could fit multiple roles, choose the one that represents its **primary
541/// structural intent**.
542/// - **Example**: In Rust, an `if` expression is both an `Expression` and a `Container`.
543///   However, its primary role in the tree is as an [`Expression`] (producing a value),
544///   whereas its children (the blocks) are [`Container`]s.
545/// - **Example**: In Markdown, a "List" is a [`Container`], while each "ListItem" is a
546///   [`Statement`] within that container.
547///
548/// ### 4. Intentional Exclusions
549/// We intentionally exclude roles that can be represented by combining existing roles or
550/// that require deep semantic analysis:
551/// - **Keyword-specific roles**: Roles like "Loop", "Conditional", or "Module" are excluded.
552///   These are surface-level distinctions. In the Deep Structure, they are all [`Container`]s
553///   or [`Statement`]s.
554/// - **Semantic Relationships**: Roles like "Inheritance", "Implementation", or "Dependency"
555///   are excluded. These are better handled by semantic graph analysis rather than
556///   syntactic tree roles.
557#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
558#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
559#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
560#[non_exhaustive]
561pub enum UniversalElementRole {
562    /// The top-level root of the syntax tree, representing the entire document or source file.
563    Root,
564
565    /// A high-level structural container that defines a scope or logical grouping.
566    Container,
567
568    /// A node that represents the entire declaration or definition of a symbol.
569    ///
570    /// This role identifies the "whole" entity that defines something in the code,
571    /// which is crucial for building symbol trees and navigation outlines.
572    ///
573    /// # Examples
574    /// - **Rust**: The entire `Fn` declaration block, `Struct` item, or `Enum`.
575    /// - **Markdown**: `Heading` or `LinkDefinition`.
576    /// - **SQL**: The whole `CREATE TABLE` or `CREATE PROCEDURE` statement.
577    /// - **ASM**: A `Proc` (procedure) block or a multi-line data definition.
578    /// - **YAML**: A schema-defined object or a complex configuration block.
579    Definition,
580
581    /// A node that specifically performs the act of binding a name to an entity.
582    ///
583    /// Unlike `Definition`, which represents the entire construct, `Binding` targets
584    /// the specific part (usually the identifier) that introduces the name.
585    ///
586    /// # Examples
587    /// - **Rust**: The identifier node in a `let` pattern or function name.
588    /// - **Markdown**: `LinkLabel` in a reference link definition.
589    /// - **SQL**: The `Table` name identifier in `CREATE TABLE`.
590    /// - **ASM**: A `Label` node (e.g., `main:`).
591    /// - **YAML**: The `Key` in a key-value mapping.
592    Binding,
593
594    /// A node that refers to an existing name or entity defined elsewhere.
595    ///
596    /// # Examples
597    /// - **Rust**: `PathExpr` (variable usage) or `MethodCall`.
598    /// - **Markdown**: `LinkReference` or `FootnoteReference`.
599    /// - **SQL**: `ColumnName` in a `SELECT` clause or `TableName` in `FROM`.
600    /// - **ASM**: A `Label` reference in a jump (e.g., `JMP main`).
601    /// - **YAML**: An `Alias` anchor (e.g., `*anchor_name`).
602    Reference,
603
604    /// A node representing a type signature, constraint, or type reference.
605    ///
606    /// This role distinguishes type information from general logic or values,
607    /// which is essential for type checking and intelligent completion.
608    ///
609    /// # Examples
610    /// - **Rust**: `TypePath` (e.g., `: i32`), `GenericArgument`, or `WhereClause`.
611    /// - **SQL**: `DataType` (e.g., `VARCHAR(255)` or `INT`).
612    /// - **ASM**: Size specifiers (e.g., `DWORD`, `PTR`).
613    /// - **TypeScript**: `TypeAnnotation` or `InterfaceDeclaration`.
614    Typing,
615
616    /// Structured comments or documentation nodes attached to other elements.
617    ///
618    /// Unlike raw `Comment` tokens, these are syntax nodes that may contain
619    /// their own internal structure (like Markdown or Tagged parameters).
620    ///
621    /// # Examples
622    /// - **Rust**: `DocComment` (e.g., `/// ...`).
623    /// - **Java**: `Javadoc` blocks.
624    /// - **Python**: `Docstring` literals.
625    Documentation,
626
627    /// High-level annotations, decorators, or macros that provide extra semantic info.
628    ///
629    /// # Metadata vs Attribute
630    /// - **Metadata**: Usually refers to language-level extensions that "decorate" an element
631    ///   from the outside, often affecting compilation or runtime behavior (e.g., Rust attributes).
632    /// - **Attribute**: Usually refers to built-in, structural properties that are part of the
633    ///   element's native definition (e.g., HTML attributes).
634    ///
635    /// # Examples
636    /// - **Rust**: `Attribute` (e.g., `#[derive(...)]`) or `MacroCall`.
637    /// - **Markdown**: `Frontmatter` (YAML/TOML header).
638    /// - **Java/TS**: `@Decorator` or `@Annotation`.
639    /// - **Python**: `@decorator` syntax.
640    Metadata,
641
642    /// A specific property, flag, or attribute-value pair.
643    ///
644    /// Unlike `Metadata`, which decorates an element with external logic, `Attribute`
645    /// represents intrinsic properties defined by the language's schema or structure.
646    ///
647    /// # Examples
648    /// - **HTML/XML**: An `Attribute` (e.g., `id="main"`).
649    /// - **Markdown**: `LinkTitle` or `ImageAlt` text.
650    /// - **YAML**: A specific configuration property.
651    /// - **ASM**: Segment attributes (e.g., `READONLY`, `EXECUTE`).
652    Attribute,
653
654    /// The key part of an attribute, property, or configuration entry.
655    ///
656    /// This role is distinct because:
657    /// - It is not a **Reference** (it doesn't refer to an external symbol).
658    /// - It is not a traditional **Binding** (it doesn't define a symbol in a global or lexical scope).
659    /// - It is not a **Keyword** (it is typically a user-defined or schema-defined identifier).
660    ///
661    /// # Examples
662    /// - **HTML**: The `id` in `id="main"`.
663    /// - **Markdown**: `AttributeName` (in Pandoc-style `{ #id .class }`).
664    /// - **YAML**: The key in a property mapping.
665    /// - **TOML**: The key in a table entry.
666    AttributeKey,
667
668    /// A node that provides additional details or secondary information for another element.
669    ///
670    /// # Examples
671    /// - **Rust**: `GenericParameter` list, `FunctionParameter` list.
672    /// - **SQL**: `Constraint` details.
673    Detail,
674
675    /// A node that represents the name of an element, typically used in declarations.
676    ///
677    /// # Examples
678    /// - **Rust**: The name identifier in a function or struct definition.
679    /// - **HTML**: The tag name in an element.
680    Name,
681
682    /// A discrete syntactic unit within a container, representing a single
683    /// logical entry or instruction.
684    ///
685    /// This typically maps to a **Statement** in programming languages, or a standalone
686    /// instruction in assembly. In markup, it could represent a list item or a table row.
687    ///
688    /// # Examples
689    /// - **Rust**: A `Stmt` inside a block.
690    /// - **Markdown**: `ListItem` or `TableCell`.
691    /// - **SQL**: A standalone `Statement` or a `Clause` (like `WHERE`).
692    /// - **ASM**: A single `Instruction` (e.g., `NOP`).
693    Statement,
694
695    /// A node representing a computed result or a complex logical operation.
696    ///
697    /// Unlike a simple `Value` (which is an atomic literal), an `Expression` involves
698    /// operators or logic that must be evaluated.
699    ///
700    /// # Examples
701    /// - **Rust**: `BinaryExpr`, `UnaryExpr`, or `RangeExpr`.
702    /// - **SQL**: `BinaryOp` in a `WHERE` clause.
703    /// - **Python**: `ListComprehension` or `Lambda`.
704    Expression,
705
706    /// A node that performs an invocation or call to a function, method, or macro.
707    ///
708    /// This role identifies the active execution of a named entity with optional arguments.
709    ///
710    /// # Examples
711    /// - **Rust**: `CallExpr`, `MethodCallExpr`, or `MacroInvocation`.
712    /// - **SQL**: `FunctionCall` (e.g., `COUNT(*)`).
713    /// - **Excel**: A formula call.
714    Call,
715
716    /// A node representing an **atomic** data value or a primitive constant.
717    ///
718    /// This role is strictly for atomic values like numbers, strings, or booleans.
719    /// It **does not** include composite structures like arrays `[]` or objects `{}`,
720    /// which should be categorized as [`UniversalElementRole::Container`].
721    ///
722    /// # Examples
723    /// - **Rust**: `Literal` (strings, numbers, booleans).
724    /// - **Markdown**: `InlineCode`, `Emphasis`, or `Strong`.
725    /// - **SQL**: `Literal` values.
726    /// - **JSON/YAML**: Atomic `Scalar` values (strings, integers, nulls).
727    Value,
728
729    /// A node that acts as a host for content in a different language or a raw
730    /// fragment requiring a separate parsing pass (Language Injection).
731    ///
732    /// # Examples
733    /// - **HTML**: A `<script>` or `<style>` block containing JS/CSS.
734    /// - **Markdown**: `CodeBlock` (host for other languages).
735    /// - **Rust/Java**: A string literal containing SQL (if marked for injection).
736    /// - **PHP**: Raw HTML fragments outside of `<?php ... ?>` tags.
737    Embedded,
738
739    /// A node specifically created to represent a syntax error or recovery point
740    /// in the source code.
741    Error,
742
743    /// No specific structural role assigned or recognized for this element.
744    None,
745}
746
747impl ElementRole for UniversalElementRole {
748    fn universal(&self) -> UniversalElementRole {
749        *self
750    }
751
752    fn name(&self) -> &str {
753        match *self {
754            UniversalElementRole::Container => "meta.block",
755            UniversalElementRole::Statement => "meta.statement",
756            UniversalElementRole::Binding => "variable.other.declaration",
757            UniversalElementRole::Reference => "variable.other.usage",
758            UniversalElementRole::Call => "entity.name.function.call",
759            UniversalElementRole::Expression => "meta.expression",
760            UniversalElementRole::Value => "constant",
761            UniversalElementRole::Definition => "entity.name.function",
762            UniversalElementRole::Typing => "entity.name.type",
763            UniversalElementRole::Metadata => "meta.preprocessor",
764            UniversalElementRole::Attribute => "entity.other.attribute-name",
765            UniversalElementRole::AttributeKey => "entity.other.attribute-name.key",
766            UniversalElementRole::Detail => "meta.detail",
767            UniversalElementRole::Name => "entity.name",
768            UniversalElementRole::Embedded => "meta.embedded",
769            UniversalElementRole::Documentation => "comment.block.documentation",
770            UniversalElementRole::Root => "source",
771            UniversalElementRole::Error => "invalid",
772            UniversalElementRole::None => "none",
773        }
774    }
775}