oak_core/language/mod.rs
1#[cfg(feature = "serde")]
2use serde::{Deserialize, Serialize};
3use std::{fmt::Debug, hash::Hash};
4
5/// Represents the broad category a language belongs to.
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
7#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
8pub enum LanguageCategory {
9 /// General-purpose programming languages (e.g., Rust, C, Java).
10 Programming,
11 /// Markup and document languages (e.g., Markdown, HTML, Typst).
12 Markup,
13 /// Configuration and data serialization languages (e.g., YAML, JSON, TOML).
14 Config,
15 /// Styling languages (e.g., CSS, Sass, Less).
16 StyleSheet,
17 /// Domain-specific languages or specialized notation (e.g., SQL, Regex, Math).
18 Dsl,
19 /// Modeling languages (e.g., UML, Mermaid, PlantUML).
20 Modeling,
21 /// Other or unclassified.
22 Other,
23}
24
25/// Language definition trait that coordinates all language-related types and behaviors.
26///
27/// This trait serves as the foundation for defining programming languages within the
28/// incremental parsing system. It acts as a marker trait that ties together various
29/// language-specific components like lexers, parsers, and rebuilders.
30///
31/// # Overview
32///
33/// The Language trait is the central abstraction that enables the parsing framework
34/// to be language-agnostic while still providing language-specific functionality.
35/// Each language implementation must define its own types for tokens, elements,
36/// and the root structure of the parsed tree.
37///
38/// # Design Philosophy
39///
40/// The trait follows a compositional design where:
41/// - `TokenType` defines the atomic units of the language (tokens)
42/// - `ElementType` defines the composite structures (nodes)
43/// - `TypedRoot` defines the top-level structure of the parsed document
44///
45/// This separation allows for maximum flexibility while maintaining type safety
46/// and performance characteristics required for incremental parsing.
47///
48/// # Examples
49///
50/// ```rust
51/// # use oak_core::{Language, TokenType, ElementType, UniversalTokenRole, UniversalElementRole};
52/// // Define a simple language
53/// #[derive(Clone)]
54/// struct MyLanguage;
55///
56/// impl Language for MyLanguage {
57/// const NAME: &'static str = "my-language";
58/// type TokenType = MyToken;
59/// type ElementType = MyElement;
60/// type TypedRoot = ();
61/// }
62///
63/// // With corresponding type definitions
64/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
65/// enum MyToken {
66/// Identifier,
67/// EndOfStream,
68/// }
69///
70/// impl TokenType for MyToken {
71/// const END_OF_STREAM: Self = MyToken::EndOfStream;
72/// type Role = UniversalTokenRole;
73/// fn role(&self) -> Self::Role { UniversalTokenRole::None }
74/// }
75///
76/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
77/// enum MyElement {}
78///
79/// impl ElementType for MyElement {
80/// type Role = UniversalElementRole;
81/// fn role(&self) -> Self::Role { UniversalElementRole::None }
82/// }
83/// ```
84pub trait Language: Send + Sync {
85 /// The name of the language (e.g., "rust", "sql").
86 const NAME: &'static str;
87
88 /// The category of the language.
89 const CATEGORY: LanguageCategory = LanguageCategory::Programming;
90
91 /// The token type used to represent different token and node types in the language.
92 ///
93 /// This associated type defines how different syntactic elements (tokens, nodes) are
94 /// categorized and identified within the language. It must implement `Copy` and `Eq`
95 /// to ensure efficient handling in the parsing system.
96 ///
97 /// # Requirements
98 ///
99 /// The token type must:
100 /// - Implement the `TokenType` trait
101 /// - Be copyable to enable efficient passing
102 /// - Support equality comparison for token matching
103 /// - Be sendable across thread boundaries
104 ///
105 /// # Examples
106 ///
107 /// ```
108 /// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
109 /// enum RustSyntaxKind {
110 /// LetKeyword,
111 /// Identifier,
112 /// Number,
113 /// // ... other token kinds
114 /// }
115 /// ```
116 type TokenType: TokenType;
117
118 /// The element type used to represent composite structures in the parsed tree.
119 ///
120 /// While tokens represent the atomic units of the language, elements represent
121 /// the composite structures formed by combining tokens according to grammar rules.
122 /// This includes expressions, statements, declarations, and other syntactic constructs.
123 ///
124 /// # Requirements
125 ///
126 /// The element type must:
127 /// - Implement the `ElementType` trait
128 /// - Be copyable for efficient handling
129 /// - Support equality comparison
130 /// - Be sendable across thread boundaries
131 type ElementType: ElementType;
132
133 /// The root type for the parsed tree that represents the top-level structure of the language.
134 ///
135 /// This associated type defines the structure of the root node in the parsed tree,
136 /// which typically contains the entire parsed source code organized according to the
137 /// language's grammar rules. The root type serves as the entry point for traversing
138 /// and manipulating the parsed representation.
139 ///
140 /// # Design Considerations
141 ///
142 /// The root type should:
143 /// - Contain references to all top-level language constructs
144 /// - Provide efficient access to the parsed content
145 /// - Support incremental updates when the source changes
146 ///
147 /// # Examples
148 ///
149 /// ```ignore
150 /// struct RustRoot {
151 /// items: Vec<RustItem>,
152 /// }
153 ///
154 /// struct RustRoot {
155 /// modules: Vec<Module>,
156 /// imports: Vec<Import>,
157 /// declarations: Vec<Declaration>,
158 /// }
159 /// ```
160 type TypedRoot;
161}
162
163/// Token type definitions for tokens in the parsing system.
164///
165/// This module provides the [`TokenType`] trait which serves as the foundation
166/// for defining different types of tokens in the parsing system.
167/// It enables categorization of token elements and provides methods for
168/// identifying their roles in the language grammar.
169///
170/// # Universal Grammar Philosophy
171///
172/// The role mechanism in Oak is inspired by the concept of "Universal Grammar".
173/// While every language has its own unique "Surface Structure" (its specific token kinds),
174/// most share a common "Deep Structure" (syntactic roles).
175///
176/// By mapping language-specific kinds to [`UniversalTokenRole`], we enable generic tools
177/// like highlighters and formatters to work across 100+ languages without deep
178/// knowledge of each one's specific grammar.
179///
180/// # Implementation Guidelines
181///
182/// When implementing this trait for a specific language:
183/// - Use an enum with discriminant values for efficient matching
184/// - Ensure all variants are Copy and Eq for performance
185/// - Include an END_OF_STREAM variant to signal input termination
186/// - Define a `Role` associated type and implement the `role()` method to provide
187/// syntactic context.
188///
189/// # Examples
190///
191/// ```ignore
192/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
193/// enum SimpleToken {
194/// Identifier,
195/// Number,
196/// Plus,
197/// EndOfStream,
198/// }
199///
200/// impl TokenType for SimpleToken {
201/// const END_OF_STREAM: Self = SimpleToken::EndOfStream;
202/// type Role = UniversalTokenRole; // Or a custom Role type
203///
204/// fn role(&self) -> Self::Role {
205/// match self {
206/// SimpleToken::Identifier => UniversalTokenRole::Name,
207/// SimpleToken::Number => UniversalTokenRole::Literal,
208/// SimpleToken::Plus => UniversalTokenRole::Operator,
209/// _ => UniversalTokenRole::None,
210/// }
211/// }
212///
213/// // ... other methods
214/// }
215/// ```
216macro_rules! define_token_type {
217 ($($bound:tt)*) => {
218 /// A trait for types that represent a token's kind in a specific language.
219 pub trait TokenType: Copy + Eq + Hash + Send + Sync + std::fmt::Debug $($bound)* {
220 /// The associated role type for this token kind.
221 type Role: TokenRole;
222
223 /// A constant representing the end of the input stream.
224 const END_OF_STREAM: Self;
225
226 /// Returns the general syntactic role of this token.
227 fn role(&self) -> Self::Role;
228
229 /// Returns true if this token matches the specified language-specific role.
230 fn is_role(&self, role: Self::Role) -> bool {
231 self.role() == role
232 }
233
234 /// Returns true if this token matches the specified universal role.
235 fn is_universal(&self, role: UniversalTokenRole) -> bool {
236 self.role().universal() == role
237 }
238
239 /// Returns true if this token represents a comment.
240 fn is_comment(&self) -> bool {
241 self.is_universal(UniversalTokenRole::Comment)
242 }
243
244 /// Returns true if this token represents whitespace.
245 fn is_whitespace(&self) -> bool {
246 self.is_universal(UniversalTokenRole::Whitespace)
247 }
248
249 /// Returns true if this token represents an error condition.
250 fn is_error(&self) -> bool {
251 self.is_universal(UniversalTokenRole::Error)
252 }
253
254 /// Returns true if this token represents trivia (whitespace, comments, etc.).
255 fn is_ignored(&self) -> bool {
256 self.is_whitespace() || self.is_comment()
257 }
258
259 /// Returns true if this token represents the end of the input stream.
260 fn is_end_of_stream(&self) -> bool {
261 *self == Self::END_OF_STREAM
262 }
263 }
264 };
265}
266
267#[cfg(feature = "serde")]
268define_token_type!(+ Serialize + for<'de> Deserialize<'de>);
269
270#[cfg(not(feature = "serde"))]
271define_token_type!();
272
273/// A trait for types that can represent a token's syntactic role.
274pub trait TokenRole: Copy + Eq + Send {
275 /// Maps this role to a universal, language-agnostic role.
276 fn universal(&self) -> UniversalTokenRole;
277
278 /// Returns a specific name for this role, used for granular highlighting.
279 ///
280 /// For universal roles, this should return the standard scope name (e.g., "keyword").
281 /// For language-specific roles, it can return more specific names (e.g., "keyword.control").
282 fn name(&self) -> &str;
283}
284
285/// Represents the general syntactic role of a token across diverse languages.
286///
287/// # Universal Grammar
288///
289/// This mechanism is inspired by Noam Chomsky's Universal Grammar theory.
290/// It posits that while the "Surface Structure" (specific token kinds) of languages
291/// may vary wildly, they share a common "Deep Structure" (syntactic roles).
292///
293/// In the Oak framework:
294/// - **Surface Structure**: Refers to specific token kinds defined by a language (e.g., Rust's `PubKeyword`).
295/// - **Deep Structure**: Refers to the universal roles defined in this enum (e.g., [`UniversalTokenRole::Keyword`]).
296///
297/// By mapping to these roles, generic tools can identify names, literals, or operators
298/// across 100+ languages without needing to learn the specifics of each grammar.
299#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
300#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
301#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
302pub enum UniversalTokenRole {
303 /// Language reserved words or built-in commands (e.g., 'SELECT', 'let', 'MOV').
304 Keyword,
305 /// Identifiers, labels, keys, tags, or any name-like token.
306 Name,
307 /// Literal values like strings, numbers, booleans, or nulls.
308 Literal,
309 /// An escape sequence or a special character representation within a literal.
310 Escape,
311 /// Mathematical, logical, or structural operators (e.g., '+', '=>', 'LIKE').
312 Operator,
313 /// Structural characters like brackets, commas, semicolons.
314 Punctuation,
315 /// Developer annotations or documentation.
316 Comment,
317 /// Formatting characters like spaces or tabs.
318 Whitespace,
319 /// Malformed or unrecognized content.
320 Error,
321 /// No specific role assigned.
322 None,
323 /// End of stream marker.
324 Eof,
325}
326
327impl TokenRole for UniversalTokenRole {
328 fn universal(&self) -> UniversalTokenRole {
329 *self
330 }
331
332 fn name(&self) -> &str {
333 match *self {
334 UniversalTokenRole::Keyword => "keyword",
335 UniversalTokenRole::Name => "variable.other",
336 UniversalTokenRole::Literal => "constant",
337 UniversalTokenRole::Escape => "constant.character.escape",
338 UniversalTokenRole::Operator => "keyword.operator",
339 UniversalTokenRole::Punctuation => "punctuation",
340 UniversalTokenRole::Comment => "comment",
341 UniversalTokenRole::Whitespace => "punctuation.whitespace",
342 UniversalTokenRole::Error => "invalid",
343 UniversalTokenRole::None => "none",
344 UniversalTokenRole::Eof => "punctuation.eof",
345 }
346 }
347}
348
349/// Element type definitions for nodes in the parsed tree.
350///
351/// While tokens represent the atomic units of a language, elements represent the
352/// composite structures formed by combining tokens according to grammar rules.
353/// This includes expressions, statements, declarations, and other syntactic constructs.
354///
355/// # Universal Grammar Philosophy
356///
357/// Just like tokens, syntax tree elements are mapped from their "Surface Structure"
358/// (language-specific nodes) to a "Deep Structure" via [`UniversalElementRole`].
359///
360/// This allows structural analysis tools (like symbol outline extractors) to
361/// identify [`UniversalElementRole::Binding`] (definitions) or [`UniversalElementRole::Container`]
362/// (scopes/blocks) uniformly across different language families.
363///
364/// # Implementation Guidelines
365///
366/// When implementing this trait for a specific language:
367/// - Use an enum with discriminant values for efficient matching
368/// - Include a Root variant to identify the top-level element
369/// - Include an Error variant for malformed constructs
370/// - Define a `Role` associated type and implement the `role()` method.
371///
372/// # Examples
373///
374/// ```ignore
375/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
376/// enum MyElement {
377/// Root,
378/// FunctionDeclaration,
379/// Block,
380/// Error,
381/// }
382///
383/// impl ElementType for MyElement {
384/// type Role = UniversalElementRole;
385///
386/// fn role(&self) -> Self::Role {
387/// match self {
388/// MyElement::Root => UniversalElementRole::Root,
389/// MyElement::FunctionDeclaration => UniversalElementRole::Binding,
390/// MyElement::Block => UniversalElementRole::Container,
391/// MyElement::Error => UniversalElementRole::Error,
392/// }
393/// }
394///
395/// fn is_root(&self) -> bool {
396/// matches!(self, MyElement::Root)
397/// }
398///
399/// fn is_error(&self) -> bool {
400/// matches!(self, MyElement::Error)
401/// }
402/// }
403/// ```
404macro_rules! define_element_type {
405 ($($bound:tt)*) => {
406 /// A trait for types that represent an element's kind in a syntax tree.
407 pub trait ElementType: Copy + Eq + Hash + Send + Sync + std::fmt::Debug $($bound)* {
408 /// The associated role type for this element kind.
409 type Role: ElementRole;
410
411 /// Returns the general syntactic role of this element.
412 fn role(&self) -> Self::Role;
413
414 /// Returns true if this element matches the specified language-specific role.
415 fn is_role(&self, role: Self::Role) -> bool {
416 self.role() == role
417 }
418
419 /// Returns true if this element matches the specified universal role.
420 fn is_universal(&self, role: UniversalElementRole) -> bool {
421 self.role().universal() == role
422 }
423
424 /// Returns true if this element represents the root of the parsed tree.
425 fn is_root(&self) -> bool {
426 self.is_universal(UniversalElementRole::Root)
427 }
428
429 /// Returns true if this element represents an error condition.
430 fn is_error(&self) -> bool {
431 self.is_universal(UniversalElementRole::Error)
432 }
433 }
434 };
435}
436
437#[cfg(feature = "serde")]
438define_element_type!(+ Serialize + for<'de> Deserialize<'de>);
439
440#[cfg(not(feature = "serde"))]
441define_element_type!();
442
443/// A trait for types that can represent an element's structural role.
444pub trait ElementRole: Copy + Eq + Send {
445 /// Maps this role to a universal, language-agnostic role.
446 fn universal(&self) -> UniversalElementRole;
447
448 /// Returns a specific name for this role, used for granular highlighting.
449 fn name(&self) -> &str;
450}
451
452/// Represents the general structural role of a syntax tree element.
453///
454/// # Universal Grammar
455///
456/// This mechanism is inspired by Noam Chomsky's Universal Grammar theory, applied
457/// here to the structural hierarchy of syntax trees. It posits that while the
458/// "Surface Structure" (the specific production rules of a grammar) varies across
459/// languages, they share a common "Deep Structure" (structural intent).
460///
461/// In the Oak framework, syntax tree elements are categorized by their role:
462/// - **Surface Structure**: Refers to specific node kinds defined by a language
463/// (e.g., Rust's `FnDeclaration`, SQL's `SelectStatement`, or YAML's `Mapping`).
464/// - **Deep Structure**: Refers to the universal structural patterns defined in this enum.
465///
466/// By mapping to these roles, we can raise sophisticated analysis across diverse
467/// language families:
468/// - **Containers & Statements**: Identify hierarchical scopes and their constituents
469/// (e.g., a SQL table is a container, its clauses are statements).
470/// - **Bindings & References**: Identify the flow of information and identifiers
471/// (e.g., an ASM label is a binding, a jump instruction is a reference).
472/// - **Values**: Identify the atomic data payload or expression results.
473///
474/// # Design Philosophy: The 99% Rule
475///
476/// This enum is designed to provide a "sufficiently complete" abstraction for common tool
477/// requirements (Highlighting, Outline, Navigation, and Refactoring) while maintaining
478/// language-agnostic simplicity.
479///
480/// ### 1. Structural Identity (The "What")
481/// Roles describe a node's primary structural responsibility in the tree, not its
482/// domain-specific semantic meaning. For example:
483/// - A "Class" or "Function" is structurally a [`UniversalElementRole::Definition`] and often a [`UniversalElementRole::Container`].
484/// - An "Import" is structurally a [`UniversalElementRole::Statement`] that contains a [`UniversalElementRole::Reference`].
485///
486/// ### 2. Broad Categories (The "How")
487/// We categorize elements into four major structural groups:
488/// - **Flow Control & logic**: [`UniversalElementRole::Statement`], [`UniversalElementRole::Expression`], [`UniversalElementRole::Call`], and [`UniversalElementRole::Root`].
489/// - **Symbol Management**: [`UniversalElementRole::Definition`], [`UniversalElementRole::Binding`], and [`UniversalElementRole::Reference`].
490/// - **Hierarchy & Scoping**: [`UniversalElementRole::Container`].
491/// - **Metadata & Auxiliaries**: [`UniversalElementRole::Typing`], [`UniversalElementRole::Metadata`], [`UniversalElementRole::Attribute`], [`UniversalElementRole::Documentation`], etc.
492///
493/// ### 3. Intent-Based Selection
494/// When a node could fit multiple roles, choose the one that represents its **primary
495/// structural intent**.
496/// - **Example**: In Rust, an `if` expression is both an `Expression` and a `Container`.
497/// However, its primary role in the tree is as an [`UniversalElementRole::Expression`] (producing a value),
498/// whereas its children (the blocks) are [`UniversalElementRole::Container`]s.
499/// - **Example**: In Markdown, a "List" is a [`UniversalElementRole::Container`], while each "ListItem" is a
500/// [`UniversalElementRole::Statement`] within that container.
501///
502/// ### 4. Intentional Exclusions
503/// We intentionally exclude roles that can be represented by combining existing roles or
504/// that require deep semantic analysis:
505/// - **Keyword-specific roles**: Roles like "Loop", "Conditional", or "Module" are excluded.
506/// These are surface-level distinctions. In the Deep Structure, they are all [`UniversalElementRole::Container`]s
507/// or [`UniversalElementRole::Statement`]s.
508/// - **Semantic Relationships**: Roles like "Inheritance", "Implementation", or "Dependency"
509/// are excluded. These are better handled by semantic graph analysis rather than
510/// syntactic tree roles.
511#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
512#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
513#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
514#[non_exhaustive]
515pub enum UniversalElementRole {
516 /// The top-level root of the syntax tree, representing the entire document or source file.
517 Root,
518
519 /// A high-level structural container that defines a scope or logical grouping.
520 Container,
521
522 /// A node that represents the entire declaration or definition of a symbol.
523 ///
524 /// This role identifies the "whole" entity that defines something in the code,
525 /// which is crucial for building symbol trees and navigation outlines.
526 ///
527 /// # Examples
528 /// - **Rust**: The entire `Fn` declaration block, `Struct` item, or `Enum`.
529 /// - **Markdown**: `Heading` or `LinkDefinition`.
530 /// - **SQL**: The whole `CREATE TABLE` or `CREATE PROCEDURE` statement.
531 /// - **ASM**: A `Proc` (procedure) block or a multi-line data definition.
532 /// - **YAML**: A schema-defined object or a complex configuration block.
533 Definition,
534
535 /// A node that specifically performs the act of binding a name to an entity.
536 ///
537 /// Unlike `Definition`, which represents the entire construct, `Binding` targets
538 /// the specific part (usually the identifier) that introduces the name.
539 ///
540 /// # Examples
541 /// - **Rust**: The identifier node in a `let` pattern or function name.
542 /// - **Markdown**: `LinkLabel` in a reference link definition.
543 /// - **SQL**: The `Table` name identifier in `CREATE TABLE`.
544 /// - **ASM**: A `Label` node (e.g., `main:`).
545 /// - **YAML**: The `Key` in a key-value mapping.
546 Binding,
547
548 /// A node that refers to an existing name or entity defined elsewhere.
549 ///
550 /// # Examples
551 /// - **Rust**: `PathExpr` (variable usage) or `MethodCall`.
552 /// - **Markdown**: `LinkReference` or `FootnoteReference`.
553 /// - **SQL**: `ColumnName` in a `SELECT` clause or `TableName` in `FROM`.
554 /// - **ASM**: A `Label` reference in a jump (e.g., `JMP main`).
555 /// - **YAML**: An `Alias` anchor (e.g., `*anchor_name`).
556 Reference,
557
558 /// A node representing a type signature, constraint, or type reference.
559 ///
560 /// This role distinguishes type information from general logic or values,
561 /// which is essential for type checking and intelligent completion.
562 ///
563 /// # Examples
564 /// - **Rust**: `TypePath` (e.g., `: i32`), `GenericArgument`, or `WhereClause`.
565 /// - **SQL**: `DataType` (e.g., `VARCHAR(255)` or `INT`).
566 /// - **ASM**: Size specifiers (e.g., `DWORD`, `PTR`).
567 /// - **TypeScript**: `TypeAnnotation` or `InterfaceDeclaration`.
568 Typing,
569
570 /// Structured comments or documentation nodes attached to other elements.
571 ///
572 /// Unlike raw `Comment` tokens, these are syntax nodes that may contain
573 /// their own internal structure (like Markdown or Tagged parameters).
574 ///
575 /// # Examples
576 /// - **Rust**: `DocComment` (e.g., `/// ...`).
577 /// - **Java**: `Javadoc` blocks.
578 /// - **Python**: `Docstring` literals.
579 Documentation,
580
581 /// High-level annotations, decorators, or macros that provide extra semantic info.
582 ///
583 /// # Metadata vs Attribute
584 /// - **Metadata**: Usually refers to language-level extensions that "decorate" an element
585 /// from the outside, often affecting compilation or runtime behavior (e.g., Rust attributes).
586 /// - **Attribute**: Usually refers to built-in, structural properties that are part of the
587 /// element's native definition (e.g., HTML attributes).
588 ///
589 /// # Examples
590 /// - **Rust**: `Attribute` (e.g., `#[derive(...)]`) or `MacroCall`.
591 /// - **Markdown**: `Frontmatter` (YAML/TOML header).
592 /// - **Java/TS**: `↯Decorator` or `↯Annotation`.
593 /// - **Python**: `↯decorator` syntax.
594 Metadata,
595
596 /// A specific property, flag, or attribute-value pair.
597 ///
598 /// Unlike `Metadata`, which decorates an element with external logic, `Attribute`
599 /// represents intrinsic properties defined by the language's schema or structure.
600 ///
601 /// # Examples
602 /// - **HTML/XML**: An `Attribute` (e.g., `id="main"`).
603 /// - **Markdown**: `LinkTitle` or `ImageAlt` text.
604 /// - **YAML**: A specific configuration property.
605 /// - **ASM**: Segment attributes (e.g., `READONLY`, `EXECUTE`).
606 Attribute,
607
608 /// The key part of an attribute, property, or configuration entry.
609 ///
610 /// This role is distinct because:
611 /// - It is not a **Reference** (it doesn't refer to an external symbol).
612 /// - It is not a traditional **Binding** (it doesn't define a symbol in a global or lexical scope).
613 /// - It is not a **Keyword** (it is typically a user-defined or schema-defined identifier).
614 ///
615 /// # Examples
616 /// - **HTML**: The `id` in `id="main"`.
617 /// - **Markdown**: `AttributeName` (in Pandoc-style `{ #id .class };`).
618 /// - **YAML**: The key in a property mapping.
619 /// - **TOML**: The key in a table entry.
620 AttributeKey,
621
622 /// A node that provides additional details or secondary information for another element.
623 ///
624 /// # Examples
625 /// - **Rust**: `GenericParameter` list, `FunctionParameter` list.
626 /// - **SQL**: `Constraint` details.
627 Detail,
628
629 /// A node that represents the name of an element, typically used in declarations.
630 ///
631 /// # Examples
632 /// - **Rust**: The name identifier in a function or struct definition.
633 /// - **HTML**: The tag name in an element.
634 Name,
635
636 /// A discrete syntactic unit within a container, representing a single
637 /// logical entry or instruction.
638 ///
639 /// This typically maps to a **Statement** in programming languages, or a standalone
640 /// instruction in assembly. In markup, it could represent a list item or a table row.
641 ///
642 /// # Examples
643 /// - **Rust**: A `Stmt` inside a block.
644 /// - **Markdown**: `ListItem` or `TableCell`.
645 /// - **SQL**: A standalone `Statement` or a `Clause` (like `WHERE`).
646 /// - **ASM**: A single `Instruction` (e.g., `NOP`).
647 Statement,
648
649 /// A node representing a computed result or a complex logical operation.
650 ///
651 /// Unlike a simple `Value` (which is an atomic literal), an `Expression` involves
652 /// operators or logic that must be evaluated.
653 ///
654 /// # Examples
655 /// - **Rust**: `BinaryExpr`, `UnaryExpr`, or `RangeExpr`.
656 /// - **SQL**: `BinaryOp` in a `WHERE` clause.
657 /// - **Python**: `ListComprehension` or `Lambda`.
658 Expression,
659
660 /// A node that performs an invocation or call to a function, method, or macro.
661 ///
662 /// This role identifies the active execution of a named entity with optional arguments.
663 ///
664 /// # Examples
665 /// - **Rust**: `CallExpr`, `MethodCallExpr`, or `MacroInvocation`.
666 /// - **SQL**: `FunctionCall` (e.g., `COUNT(*)`).
667 /// - **Excel**: A formula call.
668 Call,
669
670 /// A node representing an **atomic** data value or a primitive constant.
671 ///
672 /// This role is strictly for atomic values like numbers, strings, or booleans.
673 /// It **does not** include composite structures like arrays `[]` or objects `{}`,
674 /// which should be categorized as [`UniversalElementRole::Container`].
675 ///
676 /// # Examples
677 /// - **Rust**: `Literal` (strings, numbers, booleans).
678 /// - **Markdown**: `InlineCode`, `Emphasis`, or `Strong`.
679 /// - **SQL**: `Literal` values.
680 /// - **JSON/YAML**: Atomic `Scalar` values (strings, integers, nulls).
681 Value,
682
683 /// A node that acts as a host for content in a different language or a raw
684 /// fragment requiring a separate parsing pass (Language Injection).
685 ///
686 /// # Examples
687 /// - **HTML**: A `<script>` or `<style>` block containing JS/CSS.
688 /// - **Markdown**: `CodeBlock` (host for other languages).
689 /// - **Rust/Java**: A string literal containing SQL (if marked for injection).
690 /// - **PHP**: Raw HTML fragments outside of `<?php ... ?>` tags.
691 Embedded,
692
693 /// A node specifically created to represent a syntax error or recovery point
694 /// in the source code.
695 Error,
696
697 /// No specific structural role assigned or recognized for this element.
698 None,
699}
700
701impl ElementRole for UniversalElementRole {
702 fn universal(&self) -> UniversalElementRole {
703 *self
704 }
705
706 fn name(&self) -> &str {
707 match *self {
708 UniversalElementRole::Container => "meta.block",
709 UniversalElementRole::Statement => "meta.statement",
710 UniversalElementRole::Binding => "variable.other.declaration",
711 UniversalElementRole::Reference => "variable.other.usage",
712 UniversalElementRole::Call => "entity.name.function.call",
713 UniversalElementRole::Expression => "meta.expression",
714 UniversalElementRole::Value => "constant",
715 UniversalElementRole::Definition => "entity.name.function",
716 UniversalElementRole::Typing => "entity.name.type",
717 UniversalElementRole::Metadata => "meta.preprocessor",
718 UniversalElementRole::Attribute => "entity.other.attribute-name",
719 UniversalElementRole::AttributeKey => "entity.other.attribute-name.key",
720 UniversalElementRole::Detail => "meta.detail",
721 UniversalElementRole::Name => "entity.name",
722 UniversalElementRole::Embedded => "meta.embedded",
723 UniversalElementRole::Documentation => "comment.block.documentation",
724 UniversalElementRole::Root => "source",
725 UniversalElementRole::Error => "invalid",
726 UniversalElementRole::None => "none",
727 }
728 }
729}