oak_core/language/mod.rs
1#[cfg(feature = "serde")]
2use serde::{Deserialize, Serialize};
3use std::{fmt::Debug, hash::Hash};
4
5/// Represents the broad category a language belongs to.
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
7#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
8pub enum LanguageCategory {
9 /// General-purpose programming languages (e.g., Rust, C, Java).
10 Programming,
11 /// Markup and document languages (e.g., Markdown, HTML, Typst).
12 Markup,
13 /// Configuration and data serialization languages (e.g., YAML, JSON, TOML).
14 Config,
15 /// Styling languages (e.g., CSS, Sass, Less).
16 StyleSheet,
17 /// Domain-specific languages or specialized notation (e.g., SQL, Regex, Math).
18 Dsl,
19 /// Modeling languages (e.g., UML, Mermaid, PlantUML).
20 Modeling,
21 /// Other or unclassified.
22 Other,
23}
24
25/// Language definition trait that coordinates all language-related types and behaviors.
26///
27/// This trait serves as the foundation for defining programming languages within the
28/// incremental parsing system. It acts as a marker trait that ties together various
29/// language-specific components like lexers, parsers, and rebuilders.
30///
31/// # Overview
32///
33/// The Language trait is the central abstraction that enables the parsing framework
34/// to be language-agnostic while still providing language-specific functionality.
35/// Each language implementation must define its own types for tokens, elements,
36/// and the root structure of the parsed tree.
37///
38/// # Design Philosophy
39///
40/// The trait follows a compositional design where:
41/// - `TokenType` defines the atomic units of the language (tokens)
42/// - `ElementType` defines the composite structures (nodes)
43/// - `TypedRoot` defines the top-level structure of the parsed document
44///
45/// This separation allows for maximum flexibility while maintaining type safety
46/// and performance characteristics required for incremental parsing.
47///
48/// # Examples
49///
50/// ```rust
51/// # use oak_core::{Language, TokenType, ElementType, UniversalTokenRole, UniversalElementRole};
52/// // Define a simple language
53/// #[derive(Clone)]
54/// struct MyLanguage;
55///
56/// impl Language for MyLanguage {
57/// const NAME: &'static str = "my-language";
58/// type TokenType = MyToken;
59/// type ElementType = MyElement;
60/// type TypedRoot = ();
61/// }
62///
63/// // With corresponding type definitions
64/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
65/// enum MyToken {
66/// Identifier,
67/// EndOfStream,
68/// }
69///
70/// impl TokenType for MyToken {
71/// const END_OF_STREAM: Self = MyToken::EndOfStream;
72/// type Role = UniversalTokenRole;
73/// fn role(&self) -> Self::Role { UniversalTokenRole::None }
74/// }
75///
76/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
77/// enum MyElement {}
78///
79/// impl ElementType for MyElement {
80/// type Role = UniversalElementRole;
81/// fn role(&self) -> Self::Role { UniversalElementRole::None }
82/// }
83/// ```
84pub trait Language: Send + Sync {
85 /// The name of the language (e.g., "rust", "sql").
86 const NAME: &'static str;
87
88 /// The category of the language.
89 const CATEGORY: LanguageCategory = LanguageCategory::Programming;
90
91 /// The token type used to represent different token and node types in the language.
92 ///
93 /// This associated type defines how different syntactic elements (tokens, nodes) are
94 /// categorized and identified within the language. It must implement `Copy` and `Eq`
95 /// to ensure efficient handling in the parsing system.
96 ///
97 /// # Requirements
98 ///
99 /// The token type must:
100 /// - Implement the `TokenType` trait
101 /// - Be copyable to enable efficient passing
102 /// - Support equality comparison for token matching
103 /// - Be sendable across thread boundaries
104 ///
105 /// # Examples
106 ///
107 /// ```
108 /// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
109 /// enum RustSyntaxKind {
110 /// LetKeyword,
111 /// Identifier,
112 /// Number,
113 /// // ... other token kinds
114 /// }
115 /// ```
116 type TokenType: TokenType;
117
118 /// The element type used to represent composite structures in the parsed tree.
119 ///
120 /// While tokens represent the atomic units of the language, elements represent
121 /// the composite structures formed by combining tokens according to grammar rules.
122 /// This includes expressions, statements, declarations, and other syntactic constructs.
123 ///
124 /// # Requirements
125 ///
126 /// The element type must:
127 /// - Implement the `ElementType` trait
128 /// - Be copyable for efficient handling
129 /// - Support equality comparison
130 /// - Be sendable across thread boundaries
131 type ElementType: ElementType;
132
133 /// The root type for the parsed tree that represents the top-level structure of the language.
134 ///
135 /// This associated type defines the structure of the root node in the parsed tree,
136 /// which typically contains the entire parsed source code organized according to the
137 /// language's grammar rules. The root type serves as the entry point for traversing
138 /// and manipulating the parsed representation.
139 ///
140 /// # Design Considerations
141 ///
142 /// The root type should:
143 /// - Contain references to all top-level language constructs
144 /// - Provide efficient access to the parsed content
145 /// - Support incremental updates when the source changes
146 ///
147 /// # Examples
148 ///
149 /// ```ignore
150 /// struct RustRoot {
151 /// items: Vec<RustItem>,
152 /// }
153 ///
154 /// struct RustRoot {
155 /// modules: Vec<Module>,
156 /// imports: Vec<Import>,
157 /// declarations: Vec<Declaration>,
158 /// }
159 /// ```
160 type TypedRoot;
161}
162
163/// Token type definitions for tokens in the parsing system.
164///
165/// This module provides the [`TokenType`] trait which serves as the foundation
166/// for defining different types of tokens in the parsing system.
167/// It enables categorization of token elements and provides methods for
168/// identifying their roles in the language grammar.
169///
170/// # Universal Grammar Philosophy
171///
172/// The role mechanism in Oak is inspired by the concept of "Universal Grammar".
173/// While every language has its own unique "Surface Structure" (its specific token kinds),
174/// most share a common "Deep Structure" (syntactic roles).
175///
176/// By mapping language-specific kinds to [`UniversalTokenRole`], we enable generic tools
177/// like highlighters and formatters to work across 100+ languages without deep
178/// knowledge of each one's specific grammar.
179///
180/// # Implementation Guidelines
181///
182/// When implementing this trait for a specific language:
183/// - Use an enum with discriminant values for efficient matching
184/// - Ensure all variants are Copy and Eq for performance
185/// - Include an END_OF_STREAM variant to signal input termination
186/// - Define a `Role` associated type and implement the `role()` method to provide
187/// syntactic context.
188///
189/// # Examples
190///
191/// ```ignore
192/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
193/// enum SimpleToken {
194/// Identifier,
195/// Number,
196/// Plus,
197/// EndOfStream,
198/// }
199///
200/// impl TokenType for SimpleToken {
201/// const END_OF_STREAM: Self = SimpleToken::EndOfStream;
202/// type Role = UniversalTokenRole; // Or a custom Role type
203///
204/// fn role(&self) -> Self::Role {
205/// match self {
206/// SimpleToken::Identifier => UniversalTokenRole::Name,
207/// SimpleToken::Number => UniversalTokenRole::Literal,
208/// SimpleToken::Plus => UniversalTokenRole::Operator,
209/// _ => UniversalTokenRole::None,
210/// }
211/// }
212///
213/// // ... other methods
214/// }
215/// ```
216pub trait TokenType: Copy + Eq + Hash + Send + Sync + std::fmt::Debug {
217 /// The associated role type for this token kind.
218 type Role: TokenRole;
219
220 /// A constant representing the end of the input stream.
221 ///
222 /// This special token type is used to signal that there are no more tokens
223 /// to process in the input. It's essential for parsers to recognize when
224 /// they've reached the end of the source code.
225 ///
226 /// # Implementation Notes
227 ///
228 /// This should be a specific variant of your token enum that represents
229 /// the end-of-stream condition. It's used throughout the parsing framework
230 /// to handle boundary conditions and termination logic.
231 const END_OF_STREAM: Self;
232
233 /// Returns the general syntactic role of this token.
234 ///
235 /// This provides a language-agnostic way for tools to understand the purpose
236 /// of a token (e.g., is it a name, a literal, or a keyword) across diverse
237 /// languages like SQL, ASM, YAML, or Rust.
238 fn role(&self) -> Self::Role;
239
240 /// Returns true if this token matches the specified language-specific role.
241 fn is_role(&self, role: Self::Role) -> bool {
242 self.role() == role
243 }
244
245 /// Returns true if this token matches the specified universal role.
246 fn is_universal(&self, role: UniversalTokenRole) -> bool {
247 self.role().universal() == role
248 }
249
250 /// Returns true if this token represents a comment.
251 ///
252 /// # Default Implementation
253 ///
254 /// Based on [`UniversalTokenRole::Comment`].
255 fn is_comment(&self) -> bool {
256 self.is_universal(UniversalTokenRole::Comment)
257 }
258
259 /// Returns true if this token represents whitespace.
260 ///
261 /// # Default Implementation
262 ///
263 /// Based on [`UniversalTokenRole::Whitespace`].
264 fn is_whitespace(&self) -> bool {
265 self.is_universal(UniversalTokenRole::Whitespace)
266 }
267
268 /// Returns true if this token represents an error condition.
269 ///
270 /// # Default Implementation
271 ///
272 /// Based on [`UniversalTokenRole::Error`].
273 fn is_error(&self) -> bool {
274 self.is_universal(UniversalTokenRole::Error)
275 }
276
277 /// Returns true if this token represents trivia (whitespace, comments, etc.).
278 ///
279 /// Trivia tokens are typically ignored during parsing but preserved for
280 /// formatting and tooling purposes. They don't contribute to the syntactic
281 /// structure of the language but are important for maintaining the original
282 /// source code formatting.
283 ///
284 /// # Default Implementation
285 ///
286 /// The default implementation considers a token as trivia if it is either
287 /// whitespace or a comment. Language implementations can override this
288 /// method if they have additional trivia categories.
289 ///
290 /// # Examples
291 ///
292 /// ```ignore
293 /// // Skip over trivia tokens during parsing
294 /// while current_token.is_ignored() {
295 /// advance_to_next_token();
296 /// }
297 /// ```
298 fn is_ignored(&self) -> bool {
299 self.is_whitespace() || self.is_comment()
300 }
301
302 /// Returns true if this token represents the end of the input stream.
303 ///
304 /// This method provides a convenient way to check if a token is the
305 /// special END_OF_STREAM token without directly comparing with the constant.
306 ///
307 /// # Examples
308 ///
309 /// ```ignore
310 /// // Loop until we reach the end of the input
311 /// while !current_token.is_end_of_stream() {
312 /// process_token(current_token);
313 /// current_token = next_token();
314 /// }
315 /// ```
316 fn is_end_of_stream(&self) -> bool {
317 *self == Self::END_OF_STREAM
318 }
319}
320
321/// A trait for types that can represent a token's syntactic role.
322pub trait TokenRole: Copy + Eq + Send {
323 /// Maps this role to a universal, language-agnostic role.
324 fn universal(&self) -> UniversalTokenRole;
325
326 /// Returns a specific name for this role, used for granular highlighting.
327 ///
328 /// For universal roles, this should return the standard scope name (e.g., "keyword").
329 /// For language-specific roles, it can return more specific names (e.g., "keyword.control").
330 fn name(&self) -> &str;
331}
332
333/// Represents the general syntactic role of a token across diverse languages.
334///
335/// # Universal Grammar
336///
337/// This mechanism is inspired by Noam Chomsky's Universal Grammar theory.
338/// It posits that while the "Surface Structure" (specific token kinds) of languages
339/// may vary wildly, they share a common "Deep Structure" (syntactic roles).
340///
341/// In the Oak framework:
342/// - **Surface Structure**: Refers to specific token kinds defined by a language (e.g., Rust's `PubKeyword`).
343/// - **Deep Structure**: Refers to the universal roles defined in this enum (e.g., [`UniversalTokenRole::Keyword`]).
344///
345/// By mapping to these roles, generic tools can identify names, literals, or operators
346/// across 100+ languages without needing to learn the specifics of each grammar.
347#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
348#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
349#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
350pub enum UniversalTokenRole {
351 /// Language reserved words or built-in commands (e.g., 'SELECT', 'let', 'MOV').
352 Keyword,
353 /// Identifiers, labels, keys, tags, or any name-like token.
354 Name,
355 /// Literal values like strings, numbers, booleans, or nulls.
356 Literal,
357 /// An escape sequence or a special character representation within a literal.
358 Escape,
359 /// Mathematical, logical, or structural operators (e.g., '+', '=>', 'LIKE').
360 Operator,
361 /// Structural characters like brackets, commas, semicolons.
362 Punctuation,
363 /// Developer annotations or documentation.
364 Comment,
365 /// Formatting characters like spaces or tabs.
366 Whitespace,
367 /// Malformed or unrecognized content.
368 Error,
369 /// No specific role assigned.
370 None,
371 /// End of stream marker.
372 Eof,
373}
374
375impl TokenRole for UniversalTokenRole {
376 fn universal(&self) -> UniversalTokenRole {
377 *self
378 }
379
380 fn name(&self) -> &str {
381 match *self {
382 UniversalTokenRole::Keyword => "keyword",
383 UniversalTokenRole::Name => "variable.other",
384 UniversalTokenRole::Literal => "constant",
385 UniversalTokenRole::Escape => "constant.character.escape",
386 UniversalTokenRole::Operator => "keyword.operator",
387 UniversalTokenRole::Punctuation => "punctuation",
388 UniversalTokenRole::Comment => "comment",
389 UniversalTokenRole::Whitespace => "punctuation.whitespace",
390 UniversalTokenRole::Error => "invalid",
391 UniversalTokenRole::None => "none",
392 UniversalTokenRole::Eof => "punctuation.eof",
393 }
394 }
395}
396
397/// Element type definitions for nodes in the parsed tree.
398///
399/// While tokens represent the atomic units of a language, elements represent the
400/// composite structures formed by combining tokens according to grammar rules.
401/// This includes expressions, statements, declarations, and other syntactic constructs.
402///
403/// # Universal Grammar Philosophy
404///
405/// Just like tokens, syntax tree elements are mapped from their "Surface Structure"
406/// (language-specific nodes) to a "Deep Structure" via [`UniversalElementRole`].
407///
408/// This allows structural analysis tools (like symbol outline extractors) to
409/// identify [`UniversalElementRole::Binding`] (definitions) or [`UniversalElementRole::Container`]
410/// (scopes/blocks) uniformly across different language families.
411///
412/// # Implementation Guidelines
413///
414/// When implementing this trait for a specific language:
415/// - Use an enum with discriminant values for efficient matching
416/// - Include a Root variant to identify the top-level element
417/// - Include an Error variant for malformed constructs
418/// - Define a `Role` associated type and implement the `role()` method.
419///
420/// # Examples
421///
422/// ```ignore
423/// #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
424/// enum MyElement {
425/// Root,
426/// FunctionDeclaration,
427/// Block,
428/// Error,
429/// }
430///
431/// impl ElementType for MyElement {
432/// type Role = UniversalElementRole;
433///
434/// fn role(&self) -> Self::Role {
435/// match self {
436/// MyElement::Root => UniversalElementRole::Root,
437/// MyElement::FunctionDeclaration => UniversalElementRole::Binding,
438/// MyElement::Block => UniversalElementRole::Container,
439/// MyElement::Error => UniversalElementRole::Error,
440/// }
441/// }
442///
443/// fn is_root(&self) -> bool {
444/// matches!(self, MyElement::Root)
445/// }
446///
447/// fn is_error(&self) -> bool {
448/// matches!(self, MyElement::Error)
449/// }
450/// }
451/// ```
452pub trait ElementType: Copy + Eq + Hash + Send + Sync + std::fmt::Debug {
453 /// The associated role type for this element kind.
454 type Role: ElementRole;
455
456 /// Returns the general syntactic role of this element.
457 ///
458 /// This helps external tools understand the structural purpose of a node
459 /// (e.g., is it a container, a binding, or a value) without deep language knowledge.
460 fn role(&self) -> Self::Role;
461
462 /// Returns true if this element matches the specified language-specific role.
463 fn is_role(&self, role: Self::Role) -> bool {
464 self.role() == role
465 }
466
467 /// Returns true if this element matches the specified universal role.
468 fn is_universal(&self, role: UniversalElementRole) -> bool {
469 self.role().universal() == role
470 }
471
472 /// Returns true if this element represents the root of the parsed tree.
473 ///
474 /// # Default Implementation
475 ///
476 /// Based on [`UniversalElementRole::Root`].
477 fn is_root(&self) -> bool {
478 self.is_universal(UniversalElementRole::Root)
479 }
480
481 /// Returns true if this element represents an error condition.
482 ///
483 /// # Default Implementation
484 ///
485 /// Based on [`UniversalElementRole::Error`].
486 fn is_error(&self) -> bool {
487 self.is_universal(UniversalElementRole::Error)
488 }
489}
490
491/// A trait for types that can represent an element's structural role.
492pub trait ElementRole: Copy + Eq + Send {
493 /// Maps this role to a universal, language-agnostic role.
494 fn universal(&self) -> UniversalElementRole;
495
496 /// Returns a specific name for this role, used for granular highlighting.
497 fn name(&self) -> &str;
498}
499
500/// Represents the general structural role of a syntax tree element.
501///
502/// # Universal Grammar
503///
504/// This mechanism is inspired by Noam Chomsky's Universal Grammar theory, applied
505/// here to the structural hierarchy of syntax trees. It posits that while the
506/// "Surface Structure" (the specific production rules of a grammar) varies across
507/// languages, they share a common "Deep Structure" (structural intent).
508///
509/// In the Oak framework, syntax tree elements are categorized by their role:
510/// - **Surface Structure**: Refers to specific node kinds defined by a language
511/// (e.g., Rust's `FnDeclaration`, SQL's `SelectStatement`, or YAML's `Mapping`).
512/// - **Deep Structure**: Refers to the universal structural patterns defined in this enum.
513///
514/// By mapping to these roles, we can raise sophisticated analysis across diverse
515/// language families:
516/// - **Containers & Statements**: Identify hierarchical scopes and their constituents
517/// (e.g., a SQL table is a container, its clauses are statements).
518/// - **Bindings & References**: Identify the flow of information and identifiers
519/// (e.g., an ASM label is a binding, a jump instruction is a reference).
520/// - **Values**: Identify the atomic data payload or expression results.
521///
522/// # Design Philosophy: The 99% Rule
523///
524/// This enum is designed to provide a "sufficiently complete" abstraction for common tool
525/// requirements (Highlighting, Outline, Navigation, and Refactoring) while maintaining
526/// language-agnostic simplicity.
527///
528/// ### 1. Structural Identity (The "What")
529/// Roles describe a node's primary structural responsibility in the tree, not its
530/// domain-specific semantic meaning. For example:
531/// - A "Class" or "Function" is structurally a [`UniversalElementRole::Definition`] and often a [`UniversalElementRole::Container`].
532/// - An "Import" is structurally a [`UniversalElementRole::Statement`] that contains a [`UniversalElementRole::Reference`].
533///
534/// ### 2. Broad Categories (The "How")
535/// We categorize elements into four major structural groups:
536/// - **Flow Control & logic**: [`UniversalElementRole::Statement`], [`UniversalElementRole::Expression`], [`UniversalElementRole::Call`], and [`UniversalElementRole::Root`].
537/// - **Symbol Management**: [`UniversalElementRole::Definition`], [`UniversalElementRole::Binding`], and [`UniversalElementRole::Reference`].
538/// - **Hierarchy & Scoping**: [`UniversalElementRole::Container`].
539/// - **Metadata & Auxiliaries**: [`UniversalElementRole::Typing`], [`UniversalElementRole::Metadata`], [`UniversalElementRole::Attribute`], [`UniversalElementRole::Documentation`], etc.
540///
541/// ### 3. Intent-Based Selection
542/// When a node could fit multiple roles, choose the one that represents its **primary
543/// structural intent**.
544/// - **Example**: In Rust, an `if` expression is both an `Expression` and a `Container`.
545/// However, its primary role in the tree is as an [`UniversalElementRole::Expression`] (producing a value),
546/// whereas its children (the blocks) are [`UniversalElementRole::Container`]s.
547/// - **Example**: In Markdown, a "List" is a [`UniversalElementRole::Container`], while each "ListItem" is a
548/// [`UniversalElementRole::Statement`] within that container.
549///
550/// ### 4. Intentional Exclusions
551/// We intentionally exclude roles that can be represented by combining existing roles or
552/// that require deep semantic analysis:
553/// - **Keyword-specific roles**: Roles like "Loop", "Conditional", or "Module" are excluded.
554/// These are surface-level distinctions. In the Deep Structure, they are all [`UniversalElementRole::Container`]s
555/// or [`UniversalElementRole::Statement`]s.
556/// - **Semantic Relationships**: Roles like "Inheritance", "Implementation", or "Dependency"
557/// are excluded. These are better handled by semantic graph analysis rather than
558/// syntactic tree roles.
559#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
560#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
561#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
562#[non_exhaustive]
563pub enum UniversalElementRole {
564 /// The top-level root of the syntax tree, representing the entire document or source file.
565 Root,
566
567 /// A high-level structural container that defines a scope or logical grouping.
568 Container,
569
570 /// A node that represents the entire declaration or definition of a symbol.
571 ///
572 /// This role identifies the "whole" entity that defines something in the code,
573 /// which is crucial for building symbol trees and navigation outlines.
574 ///
575 /// # Examples
576 /// - **Rust**: The entire `Fn` declaration block, `Struct` item, or `Enum`.
577 /// - **Markdown**: `Heading` or `LinkDefinition`.
578 /// - **SQL**: The whole `CREATE TABLE` or `CREATE PROCEDURE` statement.
579 /// - **ASM**: A `Proc` (procedure) block or a multi-line data definition.
580 /// - **YAML**: A schema-defined object or a complex configuration block.
581 Definition,
582
583 /// A node that specifically performs the act of binding a name to an entity.
584 ///
585 /// Unlike `Definition`, which represents the entire construct, `Binding` targets
586 /// the specific part (usually the identifier) that introduces the name.
587 ///
588 /// # Examples
589 /// - **Rust**: The identifier node in a `let` pattern or function name.
590 /// - **Markdown**: `LinkLabel` in a reference link definition.
591 /// - **SQL**: The `Table` name identifier in `CREATE TABLE`.
592 /// - **ASM**: A `Label` node (e.g., `main:`).
593 /// - **YAML**: The `Key` in a key-value mapping.
594 Binding,
595
596 /// A node that refers to an existing name or entity defined elsewhere.
597 ///
598 /// # Examples
599 /// - **Rust**: `PathExpr` (variable usage) or `MethodCall`.
600 /// - **Markdown**: `LinkReference` or `FootnoteReference`.
601 /// - **SQL**: `ColumnName` in a `SELECT` clause or `TableName` in `FROM`.
602 /// - **ASM**: A `Label` reference in a jump (e.g., `JMP main`).
603 /// - **YAML**: An `Alias` anchor (e.g., `*anchor_name`).
604 Reference,
605
606 /// A node representing a type signature, constraint, or type reference.
607 ///
608 /// This role distinguishes type information from general logic or values,
609 /// which is essential for type checking and intelligent completion.
610 ///
611 /// # Examples
612 /// - **Rust**: `TypePath` (e.g., `: i32`), `GenericArgument`, or `WhereClause`.
613 /// - **SQL**: `DataType` (e.g., `VARCHAR(255)` or `INT`).
614 /// - **ASM**: Size specifiers (e.g., `DWORD`, `PTR`).
615 /// - **TypeScript**: `TypeAnnotation` or `InterfaceDeclaration`.
616 Typing,
617
618 /// Structured comments or documentation nodes attached to other elements.
619 ///
620 /// Unlike raw `Comment` tokens, these are syntax nodes that may contain
621 /// their own internal structure (like Markdown or Tagged parameters).
622 ///
623 /// # Examples
624 /// - **Rust**: `DocComment` (e.g., `/// ...`).
625 /// - **Java**: `Javadoc` blocks.
626 /// - **Python**: `Docstring` literals.
627 Documentation,
628
629 /// High-level annotations, decorators, or macros that provide extra semantic info.
630 ///
631 /// # Metadata vs Attribute
632 /// - **Metadata**: Usually refers to language-level extensions that "decorate" an element
633 /// from the outside, often affecting compilation or runtime behavior (e.g., Rust attributes).
634 /// - **Attribute**: Usually refers to built-in, structural properties that are part of the
635 /// element's native definition (e.g., HTML attributes).
636 ///
637 /// # Examples
638 /// - **Rust**: `Attribute` (e.g., `#[derive(...)]`) or `MacroCall`.
639 /// - **Markdown**: `Frontmatter` (YAML/TOML header).
640 /// - **Java/TS**: `↯Decorator` or `↯Annotation`.
641 /// - **Python**: `↯decorator` syntax.
642 Metadata,
643
644 /// A specific property, flag, or attribute-value pair.
645 ///
646 /// Unlike `Metadata`, which decorates an element with external logic, `Attribute`
647 /// represents intrinsic properties defined by the language's schema or structure.
648 ///
649 /// # Examples
650 /// - **HTML/XML**: An `Attribute` (e.g., `id="main"`).
651 /// - **Markdown**: `LinkTitle` or `ImageAlt` text.
652 /// - **YAML**: A specific configuration property.
653 /// - **ASM**: Segment attributes (e.g., `READONLY`, `EXECUTE`).
654 Attribute,
655
656 /// The key part of an attribute, property, or configuration entry.
657 ///
658 /// This role is distinct because:
659 /// - It is not a **Reference** (it doesn't refer to an external symbol).
660 /// - It is not a traditional **Binding** (it doesn't define a symbol in a global or lexical scope).
661 /// - It is not a **Keyword** (it is typically a user-defined or schema-defined identifier).
662 ///
663 /// # Examples
664 /// - **HTML**: The `id` in `id="main"`.
665 /// - **Markdown**: `AttributeName` (in Pandoc-style `{ #id .class };`).
666 /// - **YAML**: The key in a property mapping.
667 /// - **TOML**: The key in a table entry.
668 AttributeKey,
669
670 /// A node that provides additional details or secondary information for another element.
671 ///
672 /// # Examples
673 /// - **Rust**: `GenericParameter` list, `FunctionParameter` list.
674 /// - **SQL**: `Constraint` details.
675 Detail,
676
677 /// A node that represents the name of an element, typically used in declarations.
678 ///
679 /// # Examples
680 /// - **Rust**: The name identifier in a function or struct definition.
681 /// - **HTML**: The tag name in an element.
682 Name,
683
684 /// A discrete syntactic unit within a container, representing a single
685 /// logical entry or instruction.
686 ///
687 /// This typically maps to a **Statement** in programming languages, or a standalone
688 /// instruction in assembly. In markup, it could represent a list item or a table row.
689 ///
690 /// # Examples
691 /// - **Rust**: A `Stmt` inside a block.
692 /// - **Markdown**: `ListItem` or `TableCell`.
693 /// - **SQL**: A standalone `Statement` or a `Clause` (like `WHERE`).
694 /// - **ASM**: A single `Instruction` (e.g., `NOP`).
695 Statement,
696
697 /// A node representing a computed result or a complex logical operation.
698 ///
699 /// Unlike a simple `Value` (which is an atomic literal), an `Expression` involves
700 /// operators or logic that must be evaluated.
701 ///
702 /// # Examples
703 /// - **Rust**: `BinaryExpr`, `UnaryExpr`, or `RangeExpr`.
704 /// - **SQL**: `BinaryOp` in a `WHERE` clause.
705 /// - **Python**: `ListComprehension` or `Lambda`.
706 Expression,
707
708 /// A node that performs an invocation or call to a function, method, or macro.
709 ///
710 /// This role identifies the active execution of a named entity with optional arguments.
711 ///
712 /// # Examples
713 /// - **Rust**: `CallExpr`, `MethodCallExpr`, or `MacroInvocation`.
714 /// - **SQL**: `FunctionCall` (e.g., `COUNT(*)`).
715 /// - **Excel**: A formula call.
716 Call,
717
718 /// A node representing an **atomic** data value or a primitive constant.
719 ///
720 /// This role is strictly for atomic values like numbers, strings, or booleans.
721 /// It **does not** include composite structures like arrays `[]` or objects `{}`,
722 /// which should be categorized as [`UniversalElementRole::Container`].
723 ///
724 /// # Examples
725 /// - **Rust**: `Literal` (strings, numbers, booleans).
726 /// - **Markdown**: `InlineCode`, `Emphasis`, or `Strong`.
727 /// - **SQL**: `Literal` values.
728 /// - **JSON/YAML**: Atomic `Scalar` values (strings, integers, nulls).
729 Value,
730
731 /// A node that acts as a host for content in a different language or a raw
732 /// fragment requiring a separate parsing pass (Language Injection).
733 ///
734 /// # Examples
735 /// - **HTML**: A `<script>` or `<style>` block containing JS/CSS.
736 /// - **Markdown**: `CodeBlock` (host for other languages).
737 /// - **Rust/Java**: A string literal containing SQL (if marked for injection).
738 /// - **PHP**: Raw HTML fragments outside of `<?php ... ?>` tags.
739 Embedded,
740
741 /// A node specifically created to represent a syntax error or recovery point
742 /// in the source code.
743 Error,
744
745 /// No specific structural role assigned or recognized for this element.
746 None,
747}
748
749impl ElementRole for UniversalElementRole {
750 fn universal(&self) -> UniversalElementRole {
751 *self
752 }
753
754 fn name(&self) -> &str {
755 match *self {
756 UniversalElementRole::Container => "meta.block",
757 UniversalElementRole::Statement => "meta.statement",
758 UniversalElementRole::Binding => "variable.other.declaration",
759 UniversalElementRole::Reference => "variable.other.usage",
760 UniversalElementRole::Call => "entity.name.function.call",
761 UniversalElementRole::Expression => "meta.expression",
762 UniversalElementRole::Value => "constant",
763 UniversalElementRole::Definition => "entity.name.function",
764 UniversalElementRole::Typing => "entity.name.type",
765 UniversalElementRole::Metadata => "meta.preprocessor",
766 UniversalElementRole::Attribute => "entity.other.attribute-name",
767 UniversalElementRole::AttributeKey => "entity.other.attribute-name.key",
768 UniversalElementRole::Detail => "meta.detail",
769 UniversalElementRole::Name => "entity.name",
770 UniversalElementRole::Embedded => "meta.embedded",
771 UniversalElementRole::Documentation => "comment.block.documentation",
772 UniversalElementRole::Root => "source",
773 UniversalElementRole::Error => "invalid",
774 UniversalElementRole::None => "none",
775 }
776 }
777}