oak_core/language/mod.rs
1#[cfg(feature = "serde")]
2use serde::{Deserialize, Serialize};
3use std::{fmt::Debug, hash::Hash};
4
5/// Represents the broad category a language belongs to.
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
7#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
8pub enum LanguageCategory {
9 /// General-purpose programming languages (e.g., Rust, C, Java).
10 Programming,
11 /// Markup and document languages (e.g., Markdown, HTML, Typst).
12 Markup,
13 /// Configuration and data serialization languages (e.g., YAML, JSON, TOML).
14 Config,
15 /// Styling languages (e.g., CSS, Sass, Less).
16 StyleSheet,
17 /// Domain-specific languages or specialized notation (e.g., SQL, Regex, Math).
18 Dsl,
19 /// Other or unclassified.
20 Other,
21}
22
23/// Language definition trait that coordinates all language-related types and behaviors.
24///
25/// This trait serves as the foundation for defining programming languages within the
26/// incremental parsing system. It acts as a marker trait that ties together various
27/// language-specific components like lexers, parsers, and rebuilders.
28///
29/// # Overview
30///
31/// The Language trait is the central abstraction that enables the parsing framework
32/// to be language-agnostic while still providing language-specific functionality.
33/// Each language implementation must define its own types for tokens, elements,
34/// and the root structure of the parsed tree.
35///
36/// # Design Philosophy
37///
38/// The trait follows a compositional design where:
39/// - `TokenType` defines the atomic units of the language (tokens)
40/// - `ElementType` defines the composite structures (nodes)
41/// - `TypedRoot` defines the top-level structure of the parsed document
42///
43/// This separation allows for maximum flexibility while maintaining type safety
44/// and performance characteristics required for incremental parsing.
45///
46/// # Examples
47///
48/// ```rust
49/// # use oak_core::{Language, TokenType, ElementType, UniversalTokenRole, UniversalElementRole};
50/// // Define a simple language
51/// #[derive(Clone)]
52/// struct MyLanguage;
53///
54/// impl Language for MyLanguage {
55/// const NAME: &'static str = "my-language";
56/// type TokenType = MyToken;
57/// type ElementType = MyElement;
58/// type TypedRoot = ();
59/// }
60///
61/// // With corresponding type definitions
62/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
63/// enum MyToken {
64/// Identifier,
65/// EndOfStream,
66/// }
67///
68/// impl TokenType for MyToken {
69/// const END_OF_STREAM: Self = MyToken::EndOfStream;
70/// type Role = UniversalTokenRole;
71/// fn role(&self) -> Self::Role { UniversalTokenRole::None }
72/// }
73///
74/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
75/// enum MyElement {}
76///
77/// impl ElementType for MyElement {
78/// type Role = UniversalElementRole;
79/// fn role(&self) -> Self::Role { UniversalElementRole::None }
80/// }
81/// ```
82pub trait Language: Send + Sync + 'static {
83 /// The name of the language (e.g., "rust", "sql").
84 const NAME: &'static str;
85
86 /// The category of the language.
87 const CATEGORY: LanguageCategory = LanguageCategory::Programming;
88
89 /// The token type used to represent different token and node types in the language.
90 ///
91 /// This associated type defines how different syntactic elements (tokens, nodes) are
92 /// categorized and identified within the language. It must implement `Copy` and `Eq`
93 /// to ensure efficient handling in the parsing system.
94 ///
95 /// # Requirements
96 ///
97 /// The token type must:
98 /// - Implement the `TokenType` trait
99 /// - Be copyable to enable efficient passing
100 /// - Support equality comparison for token matching
101 /// - Be sendable across thread boundaries
102 ///
103 /// # Examples
104 ///
105 /// ```
106 /// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
107 /// enum RustSyntaxKind {
108 /// LetKeyword,
109 /// Identifier,
110 /// Number,
111 /// // ... other token kinds
112 /// }
113 /// ```
114 type TokenType: TokenType;
115
116 /// The element type used to represent composite structures in the parsed tree.
117 ///
118 /// While tokens represent the atomic units of the language, elements represent
119 /// the composite structures formed by combining tokens according to grammar rules.
120 /// This includes expressions, statements, declarations, and other syntactic constructs.
121 ///
122 /// # Requirements
123 ///
124 /// The element type must:
125 /// - Implement the `ElementType` trait
126 /// - Be copyable for efficient handling
127 /// - Support equality comparison
128 /// - Be sendable across thread boundaries
129 type ElementType: ElementType;
130
131 /// The root type for the parsed tree that represents the top-level structure of the language.
132 ///
133 /// This associated type defines the structure of the root node in the parsed tree,
134 /// which typically contains the entire parsed source code organized according to the
135 /// language's grammar rules. The root type serves as the entry point for traversing
136 /// and manipulating the parsed representation.
137 ///
138 /// # Design Considerations
139 ///
140 /// The root type should:
141 /// - Contain references to all top-level language constructs
142 /// - Provide efficient access to the parsed content
143 /// - Support incremental updates when the source changes
144 ///
145 /// # Examples
146 ///
147 /// ```ignore
148 /// struct RustRoot {
149 /// items: Vec<RustItem>,
150 /// }
151 ///
152 /// struct RustRoot {
153 /// modules: Vec<Module>,
154 /// imports: Vec<Import>,
155 /// declarations: Vec<Declaration>,
156 /// }
157 /// ```
158 type TypedRoot;
159}
160
161/// Token type definitions for tokens in the parsing system.
162///
163/// This module provides the [`TokenType`] trait which serves as the foundation
164/// for defining different types of tokens in the parsing system.
165/// It enables categorization of token elements and provides methods for
166/// identifying their roles in the language grammar.
167///
168/// # Universal Grammar Philosophy
169///
170/// The role mechanism in Oak is inspired by the concept of "Universal Grammar".
171/// While every language has its own unique "Surface Structure" (its specific token kinds),
172/// most share a common "Deep Structure" (syntactic roles).
173///
174/// By mapping language-specific kinds to [`UniversalTokenRole`], we enable generic tools
175/// like highlighters and formatters to work across 100+ languages without deep
176/// knowledge of each one's specific grammar.
177///
178/// # Implementation Guidelines
179///
180/// When implementing this trait for a specific language:
181/// - Use an enum with discriminant values for efficient matching
182/// - Ensure all variants are Copy and Eq for performance
183/// - Include an END_OF_STREAM variant to signal input termination
184/// - Define a `Role` associated type and implement the `role()` method to provide
185/// syntactic context.
186///
187/// # Examples
188///
189/// ```ignore
190/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
191/// enum SimpleToken {
192/// Identifier,
193/// Number,
194/// Plus,
195/// EndOfStream,
196/// }
197///
198/// impl TokenType for SimpleToken {
199/// const END_OF_STREAM: Self = SimpleToken::EndOfStream;
200/// type Role = UniversalTokenRole; // Or a custom Role type
201///
202/// fn role(&self) -> Self::Role {
203/// match self {
204/// SimpleToken::Identifier => UniversalTokenRole::Name,
205/// SimpleToken::Number => UniversalTokenRole::Literal,
206/// SimpleToken::Plus => UniversalTokenRole::Operator,
207/// _ => UniversalTokenRole::None,
208/// }
209/// }
210///
211/// // ... other methods
212/// }
213/// ```
214pub trait TokenType: Copy + Eq + Hash + Send + Sync + 'static + std::fmt::Debug {
215 /// The associated role type for this token kind.
216 type Role: TokenRole;
217
218 /// A constant representing the end of the input stream.
219 ///
220 /// This special token type is used to signal that there are no more tokens
221 /// to process in the input. It's essential for parsers to recognize when
222 /// they've reached the end of the source code.
223 ///
224 /// # Implementation Notes
225 ///
226 /// This should be a specific variant of your token enum that represents
227 /// the end-of-stream condition. It's used throughout the parsing framework
228 /// to handle boundary conditions and termination logic.
229 const END_OF_STREAM: Self;
230
231 /// Returns the general syntactic role of this token.
232 ///
233 /// This provides a language-agnostic way for tools to understand the purpose
234 /// of a token (e.g., is it a name, a literal, or a keyword) across diverse
235 /// languages like SQL, ASM, YAML, or Rust.
236 fn role(&self) -> Self::Role;
237
238 /// Returns true if this token matches the specified language-specific role.
239 fn is_role(&self, role: Self::Role) -> bool {
240 self.role() == role
241 }
242
243 /// Returns true if this token matches the specified universal role.
244 fn is_universal(&self, role: UniversalTokenRole) -> bool {
245 self.role().universal() == role
246 }
247
248 /// Returns true if this token represents a comment.
249 ///
250 /// # Default Implementation
251 ///
252 /// Based on [`UniversalTokenRole::Comment`].
253 fn is_comment(&self) -> bool {
254 self.is_universal(UniversalTokenRole::Comment)
255 }
256
257 /// Returns true if this token represents whitespace.
258 ///
259 /// # Default Implementation
260 ///
261 /// Based on [`UniversalTokenRole::Whitespace`].
262 fn is_whitespace(&self) -> bool {
263 self.is_universal(UniversalTokenRole::Whitespace)
264 }
265
266 /// Returns true if this token represents an error condition.
267 ///
268 /// # Default Implementation
269 ///
270 /// Based on [`UniversalTokenRole::Error`].
271 fn is_error(&self) -> bool {
272 self.is_universal(UniversalTokenRole::Error)
273 }
274
275 /// Returns true if this token represents trivia (whitespace, comments, etc.).
276 ///
277 /// Trivia tokens are typically ignored during parsing but preserved for
278 /// formatting and tooling purposes. They don't contribute to the syntactic
279 /// structure of the language but are important for maintaining the original
280 /// source code formatting.
281 ///
282 /// # Default Implementation
283 ///
284 /// The default implementation considers a token as trivia if it is either
285 /// whitespace or a comment. Language implementations can override this
286 /// method if they have additional trivia categories.
287 ///
288 /// # Examples
289 ///
290 /// ```ignore
291 /// // Skip over trivia tokens during parsing
292 /// while current_token.is_ignored() {
293 /// advance_to_next_token();
294 /// }
295 /// ```
296 fn is_ignored(&self) -> bool {
297 self.is_whitespace() || self.is_comment()
298 }
299
300 /// Returns true if this token represents the end of the input stream.
301 ///
302 /// This method provides a convenient way to check if a token is the
303 /// special END_OF_STREAM token without directly comparing with the constant.
304 ///
305 /// # Examples
306 ///
307 /// ```ignore
308 /// // Loop until we reach the end of the input
309 /// while !current_token.is_end_of_stream() {
310 /// process_token(current_token);
311 /// current_token = next_token();
312 /// }
313 /// ```
314 fn is_end_of_stream(&self) -> bool {
315 *self == Self::END_OF_STREAM
316 }
317}
318
319/// A trait for types that can represent a token's syntactic role.
320pub trait TokenRole: Copy + Eq + Send {
321 /// Maps this role to a universal, language-agnostic role.
322 fn universal(&self) -> UniversalTokenRole;
323
324 /// Returns a specific name for this role, used for granular highlighting.
325 ///
326 /// For universal roles, this should return the standard scope name (e.g., "keyword").
327 /// For language-specific roles, it can return more specific names (e.g., "keyword.control").
328 fn name(&self) -> &str;
329}
330
331/// Represents the general syntactic role of a token across diverse languages.
332///
333/// # Universal Grammar
334///
335/// This mechanism is inspired by Noam Chomsky's Universal Grammar theory.
336/// It posits that while the "Surface Structure" (specific token kinds) of languages
337/// may vary wildly, they share a common "Deep Structure" (syntactic roles).
338///
339/// In the Oak framework:
340/// - **Surface Structure**: Refers to specific token kinds defined by a language (e.g., Rust's `PubKeyword`).
341/// - **Deep Structure**: Refers to the universal roles defined in this enum (e.g., [`UniversalTokenRole::Keyword`]).
342///
343/// By mapping to these roles, generic tools can identify names, literals, or operators
344/// across 100+ languages without needing to learn the specifics of each grammar.
345#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
346#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
347#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
348pub enum UniversalTokenRole {
349 /// Language reserved words or built-in commands (e.g., 'SELECT', 'let', 'MOV').
350 Keyword,
351 /// Identifiers, labels, keys, tags, or any name-like token.
352 Name,
353 /// Literal values like strings, numbers, booleans, or nulls.
354 Literal,
355 /// An escape sequence or a special character representation within a literal.
356 Escape,
357 /// Mathematical, logical, or structural operators (e.g., '+', '=>', 'LIKE').
358 Operator,
359 /// Structural characters like brackets, commas, semicolons.
360 Punctuation,
361 /// Developer annotations or documentation.
362 Comment,
363 /// Formatting characters like spaces or tabs.
364 Whitespace,
365 /// Malformed or unrecognized content.
366 Error,
367 /// No specific role assigned.
368 None,
369 /// End of stream marker.
370 Eof,
371}
372
373impl TokenRole for UniversalTokenRole {
374 fn universal(&self) -> UniversalTokenRole {
375 *self
376 }
377
378 fn name(&self) -> &str {
379 match *self {
380 UniversalTokenRole::Keyword => "keyword",
381 UniversalTokenRole::Name => "variable.other",
382 UniversalTokenRole::Literal => "constant",
383 UniversalTokenRole::Escape => "constant.character.escape",
384 UniversalTokenRole::Operator => "keyword.operator",
385 UniversalTokenRole::Punctuation => "punctuation",
386 UniversalTokenRole::Comment => "comment",
387 UniversalTokenRole::Whitespace => "punctuation.whitespace",
388 UniversalTokenRole::Error => "invalid",
389 UniversalTokenRole::None => "none",
390 UniversalTokenRole::Eof => "punctuation.eof",
391 }
392 }
393}
394
395/// Element type definitions for nodes in the parsed tree.
396///
397/// While tokens represent the atomic units of a language, elements represent the
398/// composite structures formed by combining tokens according to grammar rules.
399/// This includes expressions, statements, declarations, and other syntactic constructs.
400///
401/// # Universal Grammar Philosophy
402///
403/// Just like tokens, syntax tree elements are mapped from their "Surface Structure"
404/// (language-specific nodes) to a "Deep Structure" via [`UniversalElementRole`].
405///
406/// This allows structural analysis tools (like symbol outline extractors) to
407/// identify [`UniversalElementRole::Binding`] (definitions) or [`UniversalElementRole::Container`]
408/// (scopes/blocks) uniformly across different language families.
409///
410/// # Implementation Guidelines
411///
412/// When implementing this trait for a specific language:
413/// - Use an enum with discriminant values for efficient matching
414/// - Include a Root variant to identify the top-level element
415/// - Include an Error variant for malformed constructs
416/// - Define a `Role` associated type and implement the `role()` method.
417///
418/// # Examples
419///
420/// ```ignore
421/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
422/// enum MyElement {
423/// Root,
424/// FunctionDeclaration,
425/// Block,
426/// Error,
427/// }
428///
429/// impl ElementType for MyElement {
430/// type Role = UniversalElementRole;
431///
432/// fn role(&self) -> Self::Role {
433/// match self {
434/// MyElement::Root => UniversalElementRole::Root,
435/// MyElement::FunctionDeclaration => UniversalElementRole::Binding,
436/// MyElement::Block => UniversalElementRole::Container,
437/// MyElement::Error => UniversalElementRole::Error,
438/// }
439/// }
440///
441/// fn is_root(&self) -> bool {
442/// matches!(self, MyElement::Root)
443/// }
444///
445/// fn is_error(&self) -> bool {
446/// matches!(self, MyElement::Error)
447/// }
448/// }
449/// ```
450pub trait ElementType: Copy + Eq + Hash + Send + Sync + 'static + std::fmt::Debug {
451 /// The associated role type for this element kind.
452 type Role: ElementRole;
453
454 /// Returns the general syntactic role of this element.
455 ///
456 /// This helps external tools understand the structural purpose of a node
457 /// (e.g., is it a container, a binding, or a value) without deep language knowledge.
458 fn role(&self) -> Self::Role;
459
460 /// Returns true if this element matches the specified language-specific role.
461 fn is_role(&self, role: Self::Role) -> bool {
462 self.role() == role
463 }
464
465 /// Returns true if this element matches the specified universal role.
466 fn is_universal(&self, role: UniversalElementRole) -> bool {
467 self.role().universal() == role
468 }
469
470 /// Returns true if this element represents the root of the parsed tree.
471 ///
472 /// # Default Implementation
473 ///
474 /// Based on [`UniversalElementRole::Root`].
475 fn is_root(&self) -> bool {
476 self.is_universal(UniversalElementRole::Root)
477 }
478
479 /// Returns true if this element represents an error condition.
480 ///
481 /// # Default Implementation
482 ///
483 /// Based on [`UniversalElementRole::Error`].
484 fn is_error(&self) -> bool {
485 self.is_universal(UniversalElementRole::Error)
486 }
487}
488
489/// A trait for types that can represent an element's structural role.
490pub trait ElementRole: Copy + Eq + Send {
491 /// Maps this role to a universal, language-agnostic role.
492 fn universal(&self) -> UniversalElementRole;
493
494 /// Returns a specific name for this role, used for granular highlighting.
495 fn name(&self) -> &str;
496}
497
498/// Represents the general structural role of a syntax tree element.
499///
500/// # Universal Grammar
501///
502/// This mechanism is inspired by Noam Chomsky's Universal Grammar theory, applied
503/// here to the structural hierarchy of syntax trees. It posits that while the
504/// "Surface Structure" (the specific production rules of a grammar) varies across
505/// languages, they share a common "Deep Structure" (structural intent).
506///
507/// In the Oak framework, syntax tree elements are categorized by their role:
508/// - **Surface Structure**: Refers to specific node kinds defined by a language
509/// (e.g., Rust's `FnDeclaration`, SQL's `SelectStatement`, or YAML's `Mapping`).
510/// - **Deep Structure**: Refers to the universal structural patterns defined in this enum.
511///
512/// By mapping to these roles, we can perform sophisticated analysis across diverse
513/// language families:
514/// - **Containers & Statements**: Identify hierarchical scopes and their constituents
515/// (e.g., a SQL table is a container, its clauses are statements).
516/// - **Bindings & References**: Identify the flow of information and identifiers
517/// (e.g., an ASM label is a binding, a jump instruction is a reference).
518/// - **Values**: Identify the atomic data payload or expression results.
519///
520/// # Design Philosophy: The 99% Rule
521///
522/// This enum is designed to provide a "sufficiently complete" abstraction for common tool
523/// requirements (Highlighting, Outline, Navigation, and Refactoring) while maintaining
524/// language-agnostic simplicity.
525///
526/// ### 1. Structural Identity (The "What")
527/// Roles describe a node's primary structural responsibility in the tree, not its
528/// domain-specific semantic meaning. For example:
529/// - A "Class" or "Function" is structurally a [`Definition`] and often a [`Container`].
530/// - An "Import" is structurally a [`Statement`] that contains a [`Reference`].
531///
532/// ### 2. Broad Categories (The "How")
533/// We categorize elements into four major structural groups:
534/// - **Flow Control & logic**: [`Statement`], [`Expression`], [`Call`], and [`Root`].
535/// - **Symbol Management**: [`Definition`], [`Binding`], and [`Reference`].
536/// - **Hierarchy & Scoping**: [`Container`].
537/// - **Metadata & Auxiliaries**: [`Typing`], [`Metadata`], [`Attribute`], [`Documentation`], etc.
538///
539/// ### 3. Intent-Based Selection
540/// When a node could fit multiple roles, choose the one that represents its **primary
541/// structural intent**.
542/// - **Example**: In Rust, an `if` expression is both an `Expression` and a `Container`.
543/// However, its primary role in the tree is as an [`Expression`] (producing a value),
544/// whereas its children (the blocks) are [`Container`]s.
545/// - **Example**: In Markdown, a "List" is a [`Container`], while each "ListItem" is a
546/// [`Statement`] within that container.
547///
548/// ### 4. Intentional Exclusions
549/// We intentionally exclude roles that can be represented by combining existing roles or
550/// that require deep semantic analysis:
551/// - **Keyword-specific roles**: Roles like "Loop", "Conditional", or "Module" are excluded.
552/// These are surface-level distinctions. In the Deep Structure, they are all [`Container`]s
553/// or [`Statement`]s.
554/// - **Semantic Relationships**: Roles like "Inheritance", "Implementation", or "Dependency"
555/// are excluded. These are better handled by semantic graph analysis rather than
556/// syntactic tree roles.
557#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
558#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
559#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
560#[non_exhaustive]
561pub enum UniversalElementRole {
562 /// The top-level root of the syntax tree, representing the entire document or source file.
563 Root,
564
565 /// A high-level structural container that defines a scope or logical grouping.
566 Container,
567
568 /// A node that represents the entire declaration or definition of a symbol.
569 ///
570 /// This role identifies the "whole" entity that defines something in the code,
571 /// which is crucial for building symbol trees and navigation outlines.
572 ///
573 /// # Examples
574 /// - **Rust**: The entire `Fn` declaration block, `Struct` item, or `Enum`.
575 /// - **Markdown**: `Heading` or `LinkDefinition`.
576 /// - **SQL**: The whole `CREATE TABLE` or `CREATE PROCEDURE` statement.
577 /// - **ASM**: A `Proc` (procedure) block or a multi-line data definition.
578 /// - **YAML**: A schema-defined object or a complex configuration block.
579 Definition,
580
581 /// A node that specifically performs the act of binding a name to an entity.
582 ///
583 /// Unlike `Definition`, which represents the entire construct, `Binding` targets
584 /// the specific part (usually the identifier) that introduces the name.
585 ///
586 /// # Examples
587 /// - **Rust**: The identifier node in a `let` pattern or function name.
588 /// - **Markdown**: `LinkLabel` in a reference link definition.
589 /// - **SQL**: The `Table` name identifier in `CREATE TABLE`.
590 /// - **ASM**: A `Label` node (e.g., `main:`).
591 /// - **YAML**: The `Key` in a key-value mapping.
592 Binding,
593
594 /// A node that refers to an existing name or entity defined elsewhere.
595 ///
596 /// # Examples
597 /// - **Rust**: `PathExpr` (variable usage) or `MethodCall`.
598 /// - **Markdown**: `LinkReference` or `FootnoteReference`.
599 /// - **SQL**: `ColumnName` in a `SELECT` clause or `TableName` in `FROM`.
600 /// - **ASM**: A `Label` reference in a jump (e.g., `JMP main`).
601 /// - **YAML**: An `Alias` anchor (e.g., `*anchor_name`).
602 Reference,
603
604 /// A node representing a type signature, constraint, or type reference.
605 ///
606 /// This role distinguishes type information from general logic or values,
607 /// which is essential for type checking and intelligent completion.
608 ///
609 /// # Examples
610 /// - **Rust**: `TypePath` (e.g., `: i32`), `GenericArgument`, or `WhereClause`.
611 /// - **SQL**: `DataType` (e.g., `VARCHAR(255)` or `INT`).
612 /// - **ASM**: Size specifiers (e.g., `DWORD`, `PTR`).
613 /// - **TypeScript**: `TypeAnnotation` or `InterfaceDeclaration`.
614 Typing,
615
616 /// Structured comments or documentation nodes attached to other elements.
617 ///
618 /// Unlike raw `Comment` tokens, these are syntax nodes that may contain
619 /// their own internal structure (like Markdown or Tagged parameters).
620 ///
621 /// # Examples
622 /// - **Rust**: `DocComment` (e.g., `/// ...`).
623 /// - **Java**: `Javadoc` blocks.
624 /// - **Python**: `Docstring` literals.
625 Documentation,
626
627 /// High-level annotations, decorators, or macros that provide extra semantic info.
628 ///
629 /// # Metadata vs Attribute
630 /// - **Metadata**: Usually refers to language-level extensions that "decorate" an element
631 /// from the outside, often affecting compilation or runtime behavior (e.g., Rust attributes).
632 /// - **Attribute**: Usually refers to built-in, structural properties that are part of the
633 /// element's native definition (e.g., HTML attributes).
634 ///
635 /// # Examples
636 /// - **Rust**: `Attribute` (e.g., `#[derive(...)]`) or `MacroCall`.
637 /// - **Markdown**: `Frontmatter` (YAML/TOML header).
638 /// - **Java/TS**: `@Decorator` or `@Annotation`.
639 /// - **Python**: `@decorator` syntax.
640 Metadata,
641
642 /// A specific property, flag, or attribute-value pair.
643 ///
644 /// Unlike `Metadata`, which decorates an element with external logic, `Attribute`
645 /// represents intrinsic properties defined by the language's schema or structure.
646 ///
647 /// # Examples
648 /// - **HTML/XML**: An `Attribute` (e.g., `id="main"`).
649 /// - **Markdown**: `LinkTitle` or `ImageAlt` text.
650 /// - **YAML**: A specific configuration property.
651 /// - **ASM**: Segment attributes (e.g., `READONLY`, `EXECUTE`).
652 Attribute,
653
654 /// The key part of an attribute, property, or configuration entry.
655 ///
656 /// This role is distinct because:
657 /// - It is not a **Reference** (it doesn't refer to an external symbol).
658 /// - It is not a traditional **Binding** (it doesn't define a symbol in a global or lexical scope).
659 /// - It is not a **Keyword** (it is typically a user-defined or schema-defined identifier).
660 ///
661 /// # Examples
662 /// - **HTML**: The `id` in `id="main"`.
663 /// - **Markdown**: `AttributeName` (in Pandoc-style `{ #id .class }`).
664 /// - **YAML**: The key in a property mapping.
665 /// - **TOML**: The key in a table entry.
666 AttributeKey,
667
668 /// A node that provides additional details or secondary information for another element.
669 ///
670 /// # Examples
671 /// - **Rust**: `GenericParameter` list, `FunctionParameter` list.
672 /// - **SQL**: `Constraint` details.
673 Detail,
674
675 /// A node that represents the name of an element, typically used in declarations.
676 ///
677 /// # Examples
678 /// - **Rust**: The name identifier in a function or struct definition.
679 /// - **HTML**: The tag name in an element.
680 Name,
681
682 /// A discrete syntactic unit within a container, representing a single
683 /// logical entry or instruction.
684 ///
685 /// This typically maps to a **Statement** in programming languages, or a standalone
686 /// instruction in assembly. In markup, it could represent a list item or a table row.
687 ///
688 /// # Examples
689 /// - **Rust**: A `Stmt` inside a block.
690 /// - **Markdown**: `ListItem` or `TableCell`.
691 /// - **SQL**: A standalone `Statement` or a `Clause` (like `WHERE`).
692 /// - **ASM**: A single `Instruction` (e.g., `NOP`).
693 Statement,
694
695 /// A node representing a computed result or a complex logical operation.
696 ///
697 /// Unlike a simple `Value` (which is an atomic literal), an `Expression` involves
698 /// operators or logic that must be evaluated.
699 ///
700 /// # Examples
701 /// - **Rust**: `BinaryExpr`, `UnaryExpr`, or `RangeExpr`.
702 /// - **SQL**: `BinaryOp` in a `WHERE` clause.
703 /// - **Python**: `ListComprehension` or `Lambda`.
704 Expression,
705
706 /// A node that performs an invocation or call to a function, method, or macro.
707 ///
708 /// This role identifies the active execution of a named entity with optional arguments.
709 ///
710 /// # Examples
711 /// - **Rust**: `CallExpr`, `MethodCallExpr`, or `MacroInvocation`.
712 /// - **SQL**: `FunctionCall` (e.g., `COUNT(*)`).
713 /// - **Excel**: A formula call.
714 Call,
715
716 /// A node representing an **atomic** data value or a primitive constant.
717 ///
718 /// This role is strictly for atomic values like numbers, strings, or booleans.
719 /// It **does not** include composite structures like arrays `[]` or objects `{}`,
720 /// which should be categorized as [`UniversalElementRole::Container`].
721 ///
722 /// # Examples
723 /// - **Rust**: `Literal` (strings, numbers, booleans).
724 /// - **Markdown**: `InlineCode`, `Emphasis`, or `Strong`.
725 /// - **SQL**: `Literal` values.
726 /// - **JSON/YAML**: Atomic `Scalar` values (strings, integers, nulls).
727 Value,
728
729 /// A node that acts as a host for content in a different language or a raw
730 /// fragment requiring a separate parsing pass (Language Injection).
731 ///
732 /// # Examples
733 /// - **HTML**: A `<script>` or `<style>` block containing JS/CSS.
734 /// - **Markdown**: `CodeBlock` (host for other languages).
735 /// - **Rust/Java**: A string literal containing SQL (if marked for injection).
736 /// - **PHP**: Raw HTML fragments outside of `<?php ... ?>` tags.
737 Embedded,
738
739 /// A node specifically created to represent a syntax error or recovery point
740 /// in the source code.
741 Error,
742
743 /// No specific structural role assigned or recognized for this element.
744 None,
745}
746
747impl ElementRole for UniversalElementRole {
748 fn universal(&self) -> UniversalElementRole {
749 *self
750 }
751
752 fn name(&self) -> &str {
753 match *self {
754 UniversalElementRole::Container => "meta.block",
755 UniversalElementRole::Statement => "meta.statement",
756 UniversalElementRole::Binding => "variable.other.declaration",
757 UniversalElementRole::Reference => "variable.other.usage",
758 UniversalElementRole::Call => "entity.name.function.call",
759 UniversalElementRole::Expression => "meta.expression",
760 UniversalElementRole::Value => "constant",
761 UniversalElementRole::Definition => "entity.name.function",
762 UniversalElementRole::Typing => "entity.name.type",
763 UniversalElementRole::Metadata => "meta.preprocessor",
764 UniversalElementRole::Attribute => "entity.other.attribute-name",
765 UniversalElementRole::AttributeKey => "entity.other.attribute-name.key",
766 UniversalElementRole::Detail => "meta.detail",
767 UniversalElementRole::Name => "entity.name",
768 UniversalElementRole::Embedded => "meta.embedded",
769 UniversalElementRole::Documentation => "comment.block.documentation",
770 UniversalElementRole::Root => "source",
771 UniversalElementRole::Error => "invalid",
772 UniversalElementRole::None => "none",
773 }
774 }
775}