oak_core/language/mod.rs
1use serde::{Deserialize, Serialize};
2use std::{fmt::Debug, hash::Hash};
3
4/// Represents the broad category a language belongs to.
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
6pub enum LanguageCategory {
7 /// General-purpose programming languages (e.g., Rust, C, Java).
8 Programming,
9 /// Markup and document languages (e.g., Markdown, HTML, Typst).
10 Markup,
11 /// Configuration and data serialization languages (e.g., YAML, JSON, TOML).
12 Config,
13 /// Domain-specific languages or specialized notation (e.g., SQL, Regex, Math).
14 Dsl,
15 /// Other or unclassified.
16 Other,
17}
18
19/// Language definition trait that coordinates all language-related types and behaviors.
20///
21/// This trait serves as the foundation for defining programming languages within the
22/// incremental parsing system. It acts as a marker trait that ties together various
23/// language-specific components like lexers, parsers, and rebuilders.
24///
25/// # Overview
26///
27/// The Language trait is the central abstraction that enables the parsing framework
28/// to be language-agnostic while still providing language-specific functionality.
29/// Each language implementation must define its own types for tokens, elements,
30/// and the root structure of the parsed tree.
31///
32/// # Design Philosophy
33///
34/// The trait follows a compositional design where:
35/// - `TokenType` defines the atomic units of the language (tokens)
36/// - `ElementType` defines the composite structures (nodes)
37/// - `TypedRoot` defines the top-level structure of the parsed document
38///
39/// This separation allows for maximum flexibility while maintaining type safety
40/// and performance characteristics required for incremental parsing.
41///
42/// # Examples
43///
44/// ```rust
45/// # use oak_core::{Language, TokenType, ElementType, UniversalTokenRole, UniversalElementRole};
46/// // Define a simple language
47/// #[derive(Clone)]
48/// struct MyLanguage;
49///
50/// impl Language for MyLanguage {
51/// const NAME: &'static str = "my-language";
52/// type TokenType = MyToken;
53/// type ElementType = MyElement;
54/// type TypedRoot = ();
55/// }
56///
57/// // With corresponding type definitions
58/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
59/// enum MyToken {
60/// Identifier,
61/// EndOfStream,
62/// }
63///
64/// impl TokenType for MyToken {
65/// const END_OF_STREAM: Self = MyToken::EndOfStream;
66/// type Role = UniversalTokenRole;
67/// fn role(&self) -> Self::Role { UniversalTokenRole::None }
68/// }
69///
70/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
71/// enum MyElement {}
72///
73/// impl ElementType for MyElement {
74/// type Role = UniversalElementRole;
75/// fn role(&self) -> Self::Role { UniversalElementRole::None }
76/// }
77/// ```
78pub trait Language: Send + Sync + 'static {
79 /// The name of the language (e.g., "rust", "sql").
80 const NAME: &'static str;
81
82 /// The category of the language.
83 const CATEGORY: LanguageCategory = LanguageCategory::Programming;
84
85 /// The token type used to represent different token and node types in the language.
86 ///
87 /// This associated type defines how different syntactic elements (tokens, nodes) are
88 /// categorized and identified within the language. It must implement `Copy` and `Eq`
89 /// to ensure efficient handling in the parsing system.
90 ///
91 /// # Requirements
92 ///
93 /// The token type must:
94 /// - Implement the `TokenType` trait
95 /// - Be copyable to enable efficient passing
96 /// - Support equality comparison for token matching
97 /// - Be sendable across thread boundaries
98 ///
99 /// # Examples
100 ///
101 /// ```
102 /// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
103 /// enum RustSyntaxKind {
104 /// LetKeyword,
105 /// Identifier,
106 /// Number,
107 /// // ... other token kinds
108 /// }
109 /// ```
110 type TokenType: TokenType;
111
112 /// The element type used to represent composite structures in the parsed tree.
113 ///
114 /// While tokens represent the atomic units of the language, elements represent
115 /// the composite structures formed by combining tokens according to grammar rules.
116 /// This includes expressions, statements, declarations, and other syntactic constructs.
117 ///
118 /// # Requirements
119 ///
120 /// The element type must:
121 /// - Implement the `ElementType` trait
122 /// - Be copyable for efficient handling
123 /// - Support equality comparison
124 /// - Be sendable across thread boundaries
125 type ElementType: ElementType;
126
127 /// The root type for the parsed tree that represents the top-level structure of the language.
128 ///
129 /// This associated type defines the structure of the root node in the parsed tree,
130 /// which typically contains the entire parsed source code organized according to the
131 /// language's grammar rules. The root type serves as the entry point for traversing
132 /// and manipulating the parsed representation.
133 ///
134 /// # Design Considerations
135 ///
136 /// The root type should:
137 /// - Contain references to all top-level language constructs
138 /// - Provide efficient access to the parsed content
139 /// - Support incremental updates when the source changes
140 ///
141 /// # Examples
142 ///
143 /// ```ignore
144 /// struct RustRoot {
145 /// items: Vec<RustItem>,
146 /// }
147 ///
148 /// struct RustRoot {
149 /// modules: Vec<Module>,
150 /// imports: Vec<Import>,
151 /// declarations: Vec<Declaration>,
152 /// }
153 /// ```
154 type TypedRoot;
155}
156
157/// Token type definitions for tokens in the parsing system.
158///
159/// This module provides the [`TokenType`] trait which serves as the foundation
160/// for defining different types of tokens in the parsing system.
161/// It enables categorization of token elements and provides methods for
162/// identifying their roles in the language grammar.
163///
164/// # Universal Grammar Philosophy
165///
166/// The role mechanism in Oak is inspired by the concept of "Universal Grammar".
167/// While every language has its own unique "Surface Structure" (its specific token kinds),
168/// most share a common "Deep Structure" (syntactic roles).
169///
170/// By mapping language-specific kinds to [`UniversalTokenRole`], we enable generic tools
171/// like highlighters and formatters to work across 100+ languages without deep
172/// knowledge of each one's specific grammar.
173///
174/// # Implementation Guidelines
175///
176/// When implementing this trait for a specific language:
177/// - Use an enum with discriminant values for efficient matching
178/// - Ensure all variants are Copy and Eq for performance
179/// - Include an END_OF_STREAM variant to signal input termination
180/// - Define a `Role` associated type and implement the `role()` method to provide
181/// syntactic context.
182///
183/// # Examples
184///
185/// ```ignore
186/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
187/// enum SimpleToken {
188/// Identifier,
189/// Number,
190/// Plus,
191/// EndOfStream,
192/// }
193///
194/// impl TokenType for SimpleToken {
195/// const END_OF_STREAM: Self = SimpleToken::EndOfStream;
196/// type Role = UniversalTokenRole; // Or a custom Role type
197///
198/// fn role(&self) -> Self::Role {
199/// match self {
200/// SimpleToken::Identifier => UniversalTokenRole::Name,
201/// SimpleToken::Number => UniversalTokenRole::Literal,
202/// SimpleToken::Plus => UniversalTokenRole::Operator,
203/// _ => UniversalTokenRole::None,
204/// }
205/// }
206///
207/// // ... other methods
208/// }
209/// ```
210pub trait TokenType: Copy + Eq + Hash + Send + Sync + 'static + std::fmt::Debug {
211 /// The associated role type for this token kind.
212 type Role: TokenRole;
213
214 /// A constant representing the end of the input stream.
215 ///
216 /// This special token type is used to signal that there are no more tokens
217 /// to process in the input. It's essential for parsers to recognize when
218 /// they've reached the end of the source code.
219 ///
220 /// # Implementation Notes
221 ///
222 /// This should be a specific variant of your token enum that represents
223 /// the end-of-stream condition. It's used throughout the parsing framework
224 /// to handle boundary conditions and termination logic.
225 const END_OF_STREAM: Self;
226
227 /// Returns the general syntactic role of this token.
228 ///
229 /// This provides a language-agnostic way for tools to understand the purpose
230 /// of a token (e.g., is it a name, a literal, or a keyword) across diverse
231 /// languages like SQL, ASM, YAML, or Rust.
232 fn role(&self) -> Self::Role;
233
234 /// Returns true if this token matches the specified language-specific role.
235 fn is_role(&self, role: Self::Role) -> bool {
236 self.role() == role
237 }
238
239 /// Returns true if this token matches the specified universal role.
240 fn is_universal(&self, role: UniversalTokenRole) -> bool {
241 self.role().universal() == role
242 }
243
244 /// Returns true if this token represents a comment.
245 ///
246 /// # Default Implementation
247 ///
248 /// Based on [`UniversalTokenRole::Comment`].
249 fn is_comment(&self) -> bool {
250 self.is_universal(UniversalTokenRole::Comment)
251 }
252
253 /// Returns true if this token represents whitespace.
254 ///
255 /// # Default Implementation
256 ///
257 /// Based on [`UniversalTokenRole::Whitespace`].
258 fn is_whitespace(&self) -> bool {
259 self.is_universal(UniversalTokenRole::Whitespace)
260 }
261
262 /// Returns true if this token represents an error condition.
263 ///
264 /// # Default Implementation
265 ///
266 /// Based on [`UniversalTokenRole::Error`].
267 fn is_error(&self) -> bool {
268 self.is_universal(UniversalTokenRole::Error)
269 }
270
271 /// Returns true if this token represents trivia (whitespace, comments, etc.).
272 ///
273 /// Trivia tokens are typically ignored during parsing but preserved for
274 /// formatting and tooling purposes. They don't contribute to the syntactic
275 /// structure of the language but are important for maintaining the original
276 /// source code formatting.
277 ///
278 /// # Default Implementation
279 ///
280 /// The default implementation considers a token as trivia if it is either
281 /// whitespace or a comment. Language implementations can override this
282 /// method if they have additional trivia categories.
283 ///
284 /// # Examples
285 ///
286 /// ```ignore
287 /// // Skip over trivia tokens during parsing
288 /// while current_token.is_ignored() {
289 /// advance_to_next_token();
290 /// }
291 /// ```
292 fn is_ignored(&self) -> bool {
293 self.is_whitespace() || self.is_comment()
294 }
295
296 /// Returns true if this token represents the end of the input stream.
297 ///
298 /// This method provides a convenient way to check if a token is the
299 /// special END_OF_STREAM token without directly comparing with the constant.
300 ///
301 /// # Examples
302 ///
303 /// ```ignore
304 /// // Loop until we reach the end of the input
305 /// while !current_token.is_end_of_stream() {
306 /// process_token(current_token);
307 /// current_token = next_token();
308 /// }
309 /// ```
310 fn is_end_of_stream(&self) -> bool {
311 *self == Self::END_OF_STREAM
312 }
313}
314
315/// A trait for types that can represent a token's syntactic role.
316pub trait TokenRole: Copy + Eq + Send {
317 /// Maps this role to a universal, language-agnostic role.
318 fn universal(&self) -> UniversalTokenRole;
319
320 /// Returns a specific name for this role, used for granular highlighting.
321 ///
322 /// For universal roles, this should return the standard scope name (e.g., "keyword").
323 /// For language-specific roles, it can return more specific names (e.g., "keyword.control").
324 fn name(&self) -> &str;
325}
326
327/// Represents the general syntactic role of a token across diverse languages.
328///
329/// # Universal Grammar
330///
331/// This mechanism is inspired by Noam Chomsky's Universal Grammar theory.
332/// It posits that while the "Surface Structure" (specific token kinds) of languages
333/// may vary wildly, they share a common "Deep Structure" (syntactic roles).
334///
335/// In the Oak framework:
336/// - **Surface Structure**: Refers to specific token kinds defined by a language (e.g., Rust's `PubKeyword`).
337/// - **Deep Structure**: Refers to the universal roles defined in this enum (e.g., [`UniversalTokenRole::Keyword`]).
338///
339/// By mapping to these roles, generic tools can identify names, literals, or operators
340/// across 100+ languages without needing to learn the specifics of each grammar.
341#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
342pub enum UniversalTokenRole {
343 /// Language reserved words or built-in commands (e.g., 'SELECT', 'let', 'MOV').
344 Keyword,
345 /// Identifiers, labels, keys, tags, or any name-like token.
346 Name,
347 /// Literal values like strings, numbers, booleans, or nulls.
348 Literal,
349 /// An escape sequence or a special character representation within a literal.
350 Escape,
351 /// Mathematical, logical, or structural operators (e.g., '+', '=>', 'LIKE').
352 Operator,
353 /// Structural characters like brackets, commas, semicolons.
354 Punctuation,
355 /// Developer annotations or documentation.
356 Comment,
357 /// Formatting characters like spaces or tabs.
358 Whitespace,
359 /// Malformed or unrecognized content.
360 Error,
361 /// No specific role assigned.
362 None,
363 /// End of stream marker.
364 Eof,
365}
366
367impl TokenRole for UniversalTokenRole {
368 fn universal(&self) -> UniversalTokenRole {
369 *self
370 }
371
372 fn name(&self) -> &str {
373 match *self {
374 UniversalTokenRole::Keyword => "keyword",
375 UniversalTokenRole::Name => "variable.other",
376 UniversalTokenRole::Literal => "constant",
377 UniversalTokenRole::Escape => "constant.character.escape",
378 UniversalTokenRole::Operator => "keyword.operator",
379 UniversalTokenRole::Punctuation => "punctuation",
380 UniversalTokenRole::Comment => "comment",
381 UniversalTokenRole::Whitespace => "punctuation.whitespace",
382 UniversalTokenRole::Error => "invalid",
383 UniversalTokenRole::None => "none",
384 UniversalTokenRole::Eof => "punctuation.eof",
385 }
386 }
387}
388
389/// Element type definitions for nodes in the parsed tree.
390///
391/// While tokens represent the atomic units of a language, elements represent the
392/// composite structures formed by combining tokens according to grammar rules.
393/// This includes expressions, statements, declarations, and other syntactic constructs.
394///
395/// # Universal Grammar Philosophy
396///
397/// Just like tokens, syntax tree elements are mapped from their "Surface Structure"
398/// (language-specific nodes) to a "Deep Structure" via [`UniversalElementRole`].
399///
400/// This allows structural analysis tools (like symbol outline extractors) to
401/// identify [`UniversalElementRole::Binding`] (definitions) or [`UniversalElementRole::Container`]
402/// (scopes/blocks) uniformly across different language families.
403///
404/// # Implementation Guidelines
405///
406/// When implementing this trait for a specific language:
407/// - Use an enum with discriminant values for efficient matching
408/// - Include a Root variant to identify the top-level element
409/// - Include an Error variant for malformed constructs
410/// - Define a `Role` associated type and implement the `role()` method.
411///
412/// # Examples
413///
414/// ```ignore
415/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
416/// enum MyElement {
417/// Root,
418/// FunctionDeclaration,
419/// Block,
420/// Error,
421/// }
422///
423/// impl ElementType for MyElement {
424/// type Role = UniversalElementRole;
425///
426/// fn role(&self) -> Self::Role {
427/// match self {
428/// MyElement::Root => UniversalElementRole::Root,
429/// MyElement::FunctionDeclaration => UniversalElementRole::Binding,
430/// MyElement::Block => UniversalElementRole::Container,
431/// MyElement::Error => UniversalElementRole::Error,
432/// }
433/// }
434///
435/// fn is_root(&self) -> bool {
436/// matches!(self, MyElement::Root)
437/// }
438///
439/// fn is_error(&self) -> bool {
440/// matches!(self, MyElement::Error)
441/// }
442/// }
443/// ```
444pub trait ElementType: Copy + Eq + Hash + Send + Sync + 'static + std::fmt::Debug {
445 /// The associated role type for this element kind.
446 type Role: ElementRole;
447
448 /// Returns the general syntactic role of this element.
449 ///
450 /// This helps external tools understand the structural purpose of a node
451 /// (e.g., is it a container, a binding, or a value) without deep language knowledge.
452 fn role(&self) -> Self::Role;
453
454 /// Returns true if this element matches the specified language-specific role.
455 fn is_role(&self, role: Self::Role) -> bool {
456 self.role() == role
457 }
458
459 /// Returns true if this element matches the specified universal role.
460 fn is_universal(&self, role: UniversalElementRole) -> bool {
461 self.role().universal() == role
462 }
463
464 /// Returns true if this element represents the root of the parsed tree.
465 ///
466 /// # Default Implementation
467 ///
468 /// Based on [`UniversalElementRole::Root`].
469 fn is_root(&self) -> bool {
470 self.is_universal(UniversalElementRole::Root)
471 }
472
473 /// Returns true if this element represents an error condition.
474 ///
475 /// # Default Implementation
476 ///
477 /// Based on [`UniversalElementRole::Error`].
478 fn is_error(&self) -> bool {
479 self.is_universal(UniversalElementRole::Error)
480 }
481}
482
483/// A trait for types that can represent an element's structural role.
484pub trait ElementRole: Copy + Eq + Send {
485 /// Maps this role to a universal, language-agnostic role.
486 fn universal(&self) -> UniversalElementRole;
487
488 /// Returns a specific name for this role, used for granular highlighting.
489 fn name(&self) -> &str;
490}
491
492/// Represents the general structural role of a syntax tree element.
493///
494/// # Universal Grammar
495///
496/// This mechanism is inspired by Noam Chomsky's Universal Grammar theory, applied
497/// here to the structural hierarchy of syntax trees. It posits that while the
498/// "Surface Structure" (the specific production rules of a grammar) varies across
499/// languages, they share a common "Deep Structure" (structural intent).
500///
501/// In the Oak framework, syntax tree elements are categorized by their role:
502/// - **Surface Structure**: Refers to specific node kinds defined by a language
503/// (e.g., Rust's `FnDeclaration`, SQL's `SelectStatement`, or YAML's `Mapping`).
504/// - **Deep Structure**: Refers to the universal structural patterns defined in this enum.
505///
506/// By mapping to these roles, we can perform sophisticated analysis across diverse
507/// language families:
508/// - **Containers & Statements**: Identify hierarchical scopes and their constituents
509/// (e.g., a SQL table is a container, its clauses are statements).
510/// - **Bindings & References**: Identify the flow of information and identifiers
511/// (e.g., an ASM label is a binding, a jump instruction is a reference).
512/// - **Values**: Identify the atomic data payload or expression results.
513///
514/// # Design Philosophy: The 99% Rule
515///
516/// This enum is designed to provide a "sufficiently complete" abstraction for common tool
517/// requirements (Highlighting, Outline, Navigation, and Refactoring) while maintaining
518/// language-agnostic simplicity.
519///
520/// ### 1. Structural Identity (The "What")
521/// Roles describe a node's primary structural responsibility in the tree, not its
522/// domain-specific semantic meaning. For example:
523/// - A "Class" or "Function" is structurally a [`Definition`] and often a [`Container`].
524/// - An "Import" is structurally a [`Statement`] that contains a [`Reference`].
525///
526/// ### 2. Broad Categories (The "How")
527/// We categorize elements into four major structural groups:
528/// - **Flow Control & logic**: [`Statement`], [`Expression`], [`Call`], and [`Root`].
529/// - **Symbol Management**: [`Definition`], [`Binding`], and [`Reference`].
530/// - **Hierarchy & Scoping**: [`Container`].
531/// - **Metadata & Auxiliaries**: [`Typing`], [`Metadata`], [`Attribute`], [`Documentation`], etc.
532///
533/// ### 3. Intent-Based Selection
534/// When a node could fit multiple roles, choose the one that represents its **primary
535/// structural intent**.
536/// - **Example**: In Rust, an `if` expression is both an `Expression` and a `Container`.
537/// However, its primary role in the tree is as an [`Expression`] (producing a value),
538/// whereas its children (the blocks) are [`Container`]s.
539/// - **Example**: In Markdown, a "List" is a [`Container`], while each "ListItem" is a
540/// [`Statement`] within that container.
541///
542/// ### 4. Intentional Exclusions
543/// We intentionally exclude roles that can be represented by combining existing roles or
544/// that require deep semantic analysis:
545/// - **Keyword-specific roles**: Roles like "Loop", "Conditional", or "Module" are excluded.
546/// These are surface-level distinctions. In the Deep Structure, they are all [`Container`]s
547/// or [`Statement`]s.
548/// - **Semantic Relationships**: Roles like "Inheritance", "Implementation", or "Dependency"
549/// are excluded. These are better handled by semantic graph analysis rather than
550/// syntactic tree roles.
551#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
552#[non_exhaustive]
553pub enum UniversalElementRole {
554 /// The top-level root of the syntax tree, representing the entire document or source file.
555 Root,
556
557 /// A high-level structural container that defines a scope or logical grouping.
558 Container,
559
560 /// A node that represents the entire declaration or definition of a symbol.
561 ///
562 /// This role identifies the "whole" entity that defines something in the code,
563 /// which is crucial for building symbol trees and navigation outlines.
564 ///
565 /// # Examples
566 /// - **Rust**: The entire `Fn` declaration block, `Struct` item, or `Enum`.
567 /// - **Markdown**: `Heading` or `LinkDefinition`.
568 /// - **SQL**: The whole `CREATE TABLE` or `CREATE PROCEDURE` statement.
569 /// - **ASM**: A `Proc` (procedure) block or a multi-line data definition.
570 /// - **YAML**: A schema-defined object or a complex configuration block.
571 Definition,
572
573 /// A node that specifically performs the act of binding a name to an entity.
574 ///
575 /// Unlike `Definition`, which represents the entire construct, `Binding` targets
576 /// the specific part (usually the identifier) that introduces the name.
577 ///
578 /// # Examples
579 /// - **Rust**: The identifier node in a `let` pattern or function name.
580 /// - **Markdown**: `LinkLabel` in a reference link definition.
581 /// - **SQL**: The `Table` name identifier in `CREATE TABLE`.
582 /// - **ASM**: A `Label` node (e.g., `main:`).
583 /// - **YAML**: The `Key` in a key-value mapping.
584 Binding,
585
586 /// A node that refers to an existing name or entity defined elsewhere.
587 ///
588 /// # Examples
589 /// - **Rust**: `PathExpr` (variable usage) or `MethodCall`.
590 /// - **Markdown**: `LinkReference` or `FootnoteReference`.
591 /// - **SQL**: `ColumnName` in a `SELECT` clause or `TableName` in `FROM`.
592 /// - **ASM**: A `Label` reference in a jump (e.g., `JMP main`).
593 /// - **YAML**: An `Alias` anchor (e.g., `*anchor_name`).
594 Reference,
595
596 /// A node representing a type signature, constraint, or type reference.
597 ///
598 /// This role distinguishes type information from general logic or values,
599 /// which is essential for type checking and intelligent completion.
600 ///
601 /// # Examples
602 /// - **Rust**: `TypePath` (e.g., `: i32`), `GenericArgument`, or `WhereClause`.
603 /// - **SQL**: `DataType` (e.g., `VARCHAR(255)` or `INT`).
604 /// - **ASM**: Size specifiers (e.g., `DWORD`, `PTR`).
605 /// - **TypeScript**: `TypeAnnotation` or `InterfaceDeclaration`.
606 Typing,
607
608 /// Structured comments or documentation nodes attached to other elements.
609 ///
610 /// Unlike raw `Comment` tokens, these are syntax nodes that may contain
611 /// their own internal structure (like Markdown or Tagged parameters).
612 ///
613 /// # Examples
614 /// - **Rust**: `DocComment` (e.g., `/// ...`).
615 /// - **Java**: `Javadoc` blocks.
616 /// - **Python**: `Docstring` literals.
617 Documentation,
618
619 /// High-level annotations, decorators, or macros that provide extra semantic info.
620 ///
621 /// # Metadata vs Attribute
622 /// - **Metadata**: Usually refers to language-level extensions that "decorate" an element
623 /// from the outside, often affecting compilation or runtime behavior (e.g., Rust attributes).
624 /// - **Attribute**: Usually refers to built-in, structural properties that are part of the
625 /// element's native definition (e.g., HTML attributes).
626 ///
627 /// # Examples
628 /// - **Rust**: `Attribute` (e.g., `#[derive(...)]`) or `MacroCall`.
629 /// - **Markdown**: `Frontmatter` (YAML/TOML header).
630 /// - **Java/TS**: `@Decorator` or `@Annotation`.
631 /// - **Python**: `@decorator` syntax.
632 Metadata,
633
634 /// A specific property, flag, or attribute-value pair.
635 ///
636 /// Unlike `Metadata`, which decorates an element with external logic, `Attribute`
637 /// represents intrinsic properties defined by the language's schema or structure.
638 ///
639 /// # Examples
640 /// - **HTML/XML**: An `Attribute` (e.g., `id="main"`).
641 /// - **Markdown**: `LinkTitle` or `ImageAlt` text.
642 /// - **YAML**: A specific configuration property.
643 /// - **ASM**: Segment attributes (e.g., `READONLY`, `EXECUTE`).
644 Attribute,
645
646 /// The key part of an attribute, property, or configuration entry.
647 ///
648 /// This role is distinct because:
649 /// - It is not a **Reference** (it doesn't refer to an external symbol).
650 /// - It is not a traditional **Binding** (it doesn't define a symbol in a global or lexical scope).
651 /// - It is not a **Keyword** (it is typically a user-defined or schema-defined identifier).
652 ///
653 /// # Examples
654 /// - **HTML**: The `id` in `id="main"`.
655 /// - **Markdown**: `AttributeName` (in Pandoc-style `{ #id .class }`).
656 /// - **YAML**: The key in a property mapping.
657 /// - **TOML**: The key in a table entry.
658 AttributeKey,
659
660 /// A node that provides additional details or secondary information for another element.
661 ///
662 /// # Examples
663 /// - **Rust**: `GenericParameter` list, `FunctionParameter` list.
664 /// - **SQL**: `Constraint` details.
665 Detail,
666
667 /// A node that represents the name of an element, typically used in declarations.
668 ///
669 /// # Examples
670 /// - **Rust**: The name identifier in a function or struct definition.
671 /// - **HTML**: The tag name in an element.
672 Name,
673
674 /// A discrete syntactic unit within a container, representing a single
675 /// logical entry or instruction.
676 ///
677 /// This typically maps to a **Statement** in programming languages, or a standalone
678 /// instruction in assembly. In markup, it could represent a list item or a table row.
679 ///
680 /// # Examples
681 /// - **Rust**: A `Stmt` inside a block.
682 /// - **Markdown**: `ListItem` or `TableCell`.
683 /// - **SQL**: A standalone `Statement` or a `Clause` (like `WHERE`).
684 /// - **ASM**: A single `Instruction` (e.g., `NOP`).
685 Statement,
686
687 /// A node representing a computed result or a complex logical operation.
688 ///
689 /// Unlike a simple `Value` (which is an atomic literal), an `Expression` involves
690 /// operators or logic that must be evaluated.
691 ///
692 /// # Examples
693 /// - **Rust**: `BinaryExpr`, `UnaryExpr`, or `RangeExpr`.
694 /// - **SQL**: `BinaryOp` in a `WHERE` clause.
695 /// - **Python**: `ListComprehension` or `Lambda`.
696 Expression,
697
698 /// A node that performs an invocation or call to a function, method, or macro.
699 ///
700 /// This role identifies the active execution of a named entity with optional arguments.
701 ///
702 /// # Examples
703 /// - **Rust**: `CallExpr`, `MethodCallExpr`, or `MacroInvocation`.
704 /// - **SQL**: `FunctionCall` (e.g., `COUNT(*)`).
705 /// - **Excel**: A formula call.
706 Call,
707
708 /// A node representing an **atomic** data value or a primitive constant.
709 ///
710 /// This role is strictly for atomic values like numbers, strings, or booleans.
711 /// It **does not** include composite structures like arrays `[]` or objects `{}`,
712 /// which should be categorized as [`UniversalElementRole::Container`].
713 ///
714 /// # Examples
715 /// - **Rust**: `Literal` (strings, numbers, booleans).
716 /// - **Markdown**: `InlineCode`, `Emphasis`, or `Strong`.
717 /// - **SQL**: `Literal` values.
718 /// - **JSON/YAML**: Atomic `Scalar` values (strings, integers, nulls).
719 Value,
720
721 /// A node that acts as a host for content in a different language or a raw
722 /// fragment requiring a separate parsing pass (Language Injection).
723 ///
724 /// # Examples
725 /// - **HTML**: A `<script>` or `<style>` block containing JS/CSS.
726 /// - **Markdown**: `CodeBlock` (host for other languages).
727 /// - **Rust/Java**: A string literal containing SQL (if marked for injection).
728 /// - **PHP**: Raw HTML fragments outside of `<?php ... ?>` tags.
729 Embedded,
730
731 /// A node specifically created to represent a syntax error or recovery point
732 /// in the source code.
733 Error,
734
735 /// No specific structural role assigned or recognized for this element.
736 None,
737}
738
739impl ElementRole for UniversalElementRole {
740 fn universal(&self) -> UniversalElementRole {
741 *self
742 }
743
744 fn name(&self) -> &str {
745 match *self {
746 UniversalElementRole::Container => "meta.block",
747 UniversalElementRole::Statement => "meta.statement",
748 UniversalElementRole::Binding => "variable.other.declaration",
749 UniversalElementRole::Reference => "variable.other.usage",
750 UniversalElementRole::Call => "entity.name.function.call",
751 UniversalElementRole::Expression => "meta.expression",
752 UniversalElementRole::Value => "constant",
753 UniversalElementRole::Definition => "entity.name.function",
754 UniversalElementRole::Typing => "entity.name.type",
755 UniversalElementRole::Metadata => "meta.preprocessor",
756 UniversalElementRole::Attribute => "entity.other.attribute-name",
757 UniversalElementRole::AttributeKey => "entity.other.attribute-name.key",
758 UniversalElementRole::Detail => "meta.detail",
759 UniversalElementRole::Name => "entity.name",
760 UniversalElementRole::Embedded => "meta.embedded",
761 UniversalElementRole::Documentation => "comment.block.documentation",
762 UniversalElementRole::Root => "source",
763 UniversalElementRole::Error => "invalid",
764 UniversalElementRole::None => "none",
765 }
766 }
767}