Skip to main content

normalize_languages/
traits.rs

1//! Core trait for language support.
2
3use tree_sitter::Node;
4
5// Re-export core types from normalize-facts-core
6pub use normalize_facts_core::{Import, Symbol, SymbolKind, Visibility};
7
8/// Location of a container's body (for prepend/append editing operations)
9#[derive(Debug)]
10pub struct ContainerBody {
11    /// Byte offset where body content starts (after opening delimiter/heading)
12    pub content_start: usize,
13    /// Byte offset where body content ends (before closing delimiter)
14    pub content_end: usize,
15    /// Indentation string for new content inserted into the body
16    pub inner_indent: String,
17    /// True if the body has no meaningful content (empty, only pass/braces)
18    pub is_empty: bool,
19}
20
21/// Information about what a class/container implements or extends.
22#[derive(Debug, Default)]
23pub struct ImplementsInfo {
24    /// True if this is an interface/protocol/trait definition (not a concrete class).
25    pub is_interface: bool,
26    /// List of implemented interfaces, superclasses, or mixed-in traits.
27    pub implements: Vec<String>,
28}
29
30/// Embedded content block (e.g., JS in Vue, CSS in HTML)
31#[derive(Debug, Clone)]
32pub struct EmbeddedBlock {
33    /// Grammar to use for parsing (e.g., "javascript", "css")
34    pub grammar: &'static str,
35    /// Extracted source content
36    pub content: String,
37    /// 1-indexed start line in the parent file
38    pub start_line: usize,
39}
40
41/// Capability trait: language has code symbols (functions, classes, types, etc.).
42///
43/// Config and data languages (CSS, HTML, JSON, TOML, XML, YAML) don't implement this.
44/// All general-purpose programming languages do.
45/// Access via `lang.as_symbols()` rather than `lang.has_symbols()`.
46pub trait LanguageSymbols: Language {}
47
48/// Capability trait: language can contain embedded blocks in another language.
49///
50/// Only a handful of multi-language formats implement this (Vue, HTML, Svelte).
51/// Access via `lang.as_embedded()` rather than casting.
52pub trait LanguageEmbedded: Language {
53    /// Extract embedded content from a node (e.g., JS/CSS in Vue/HTML).
54    /// Returns None for nodes that don't contain embedded code in another language.
55    fn embedded_content(&self, node: &Node, content: &str) -> Option<EmbeddedBlock>;
56}
57
58// === Helper functions for common extractor patterns ===
59
60/// Create a simple symbol with standard defaults.
61///
62/// Used by languages with straightforward function/method syntax where symbols:
63/// - Have public visibility
64/// - Use first line as signature
65/// - Have no attributes or children
66/// - Don't implement interfaces
67///
68/// Languages using this: cmake, glsl, graphql, hlsl, awk, elm, fish, haskell,
69/// jq, julia, ocaml, powershell, zsh
70pub fn simple_symbol(
71    node: &tree_sitter::Node,
72    content: &str,
73    name: &str,
74    kind: SymbolKind,
75    docstring: Option<String>,
76) -> Symbol {
77    let text = &content[node.byte_range()];
78    let first_line = text.lines().next().unwrap_or(text);
79
80    Symbol {
81        name: name.to_string(),
82        kind,
83        signature: first_line.trim().to_string(),
84        docstring,
85        attributes: Vec::new(),
86        start_line: node.start_position().row + 1,
87        end_line: node.end_position().row + 1,
88        visibility: Visibility::Public,
89        children: Vec::new(),
90        is_interface_impl: false,
91        implements: Vec::new(),
92    }
93}
94
95/// Create a simple function symbol (convenience wrapper).
96pub fn simple_function_symbol(
97    node: &tree_sitter::Node,
98    content: &str,
99    name: &str,
100    docstring: Option<String>,
101) -> Symbol {
102    simple_symbol(node, content, name, SymbolKind::Function, docstring)
103}
104
105/// Unified language support trait.
106///
107/// Each language implements this trait to provide:
108/// - Node kind classification
109/// - Symbol extraction (functions, classes, types)
110/// - Import/export parsing
111/// - Complexity analysis nodes
112/// - Visibility detection
113/// - Edit support (container bodies, docstrings)
114pub trait Language: Send + Sync {
115    /// Display name for this language (e.g., "Python", "C++")
116    fn name(&self) -> &'static str;
117
118    /// File extensions this language handles (e.g., ["py", "pyi", "pyw"])
119    fn extensions(&self) -> &'static [&'static str];
120
121    /// Grammar name for arborium (e.g., "python", "rust")
122    fn grammar_name(&self) -> &'static str;
123
124    /// Capability query: returns `Some(self)` if this language has code symbols
125    /// (functions, classes, types, etc.). Returns `None` for config/data languages.
126    /// Implement `LanguageSymbols` and override this to opt in.
127    fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
128        None
129    }
130
131    // === Symbol Building ===
132
133    /// Extract the docstring for a definition node.
134    /// Called by generic extraction for every tagged symbol.
135    /// Returns None if this language has no docstring convention or the node has no docstring.
136    fn extract_docstring(&self, _node: &Node, _content: &str) -> Option<String> {
137        None
138    }
139
140    /// Extract attributes/annotations/decorators attached to a definition node.
141    /// Called by generic extraction for every tagged symbol.
142    /// Returns empty vec if this language has no attribute convention.
143    fn extract_attributes(&self, _node: &Node, _content: &str) -> Vec<String> {
144        Vec::new()
145    }
146
147    /// Extract interfaces/traits/superclasses that a container node implements/extends.
148    /// Called by generic extraction for container nodes only.
149    fn extract_implements(&self, _node: &Node, _content: &str) -> ImplementsInfo {
150        ImplementsInfo::default()
151    }
152
153    /// Build the display signature for a definition node.
154    /// Default: first line of the node's source text (trimmed).
155    /// Override for languages where first-line is incomplete (e.g. Rust, Go, Java).
156    fn build_signature(&self, node: &Node, content: &str) -> String {
157        let text = &content[node.byte_range()];
158        text.lines().next().unwrap_or(text).trim().to_string()
159    }
160
161    /// Refine the symbol kind for a tagged node.
162    /// Called after tag classification assigns an initial kind (e.g. `definition.class` → `Class`).
163    /// Languages can override this to return a more specific kind based on the node's concrete type.
164    /// Default: return `tag_kind` unchanged.
165    fn refine_kind(&self, node: &Node, _content: &str, tag_kind: SymbolKind) -> SymbolKind {
166        let _ = node;
167        tag_kind
168    }
169
170    // === Import/Export ===
171
172    /// Extract imports from an import node (may return multiple)
173    fn extract_imports(&self, _node: &Node, _content: &str) -> Vec<Import> {
174        Vec::new()
175    }
176
177    /// Format an import as source code.
178    /// If `names` is Some, only include those names (for multi-import filtering).
179    /// If `names` is None, format the complete import.
180    fn format_import(&self, _import: &Import, _names: Option<&[&str]>) -> String {
181        String::new()
182    }
183
184    // === Display/Formatting ===
185
186    /// Suffix to append to signatures for tree-sitter parsing.
187    /// Function signatures are incomplete code fragments that need closing tokens
188    /// to parse correctly (e.g., Rust `fn foo()` needs `{}`, Lua `function foo()` needs `end`).
189    /// Returns the suffix to append, or empty string if none needed.
190    fn signature_suffix(&self) -> &'static str {
191        ""
192    }
193
194    // === Visibility ===
195
196    /// Get visibility of a node.
197    ///
198    /// This is a genuine interface method (not just an impl helper): `normalize-deps`
199    /// calls it externally during export detection to decide which tagged nodes are
200    /// public. The alternative — calling `extract_function/container/type()` and
201    /// inspecting `symbol.visibility` — would be correct but unnecessarily heavy
202    /// (computes signature, docstring, etc. just to check one field).
203    fn get_visibility(&self, _node: &Node, _content: &str) -> Visibility {
204        Visibility::Public
205    }
206
207    /// Check if a symbol is a test (for filtering).
208    fn is_test_symbol(&self, _symbol: &Symbol) -> bool {
209        false
210    }
211
212    /// Glob patterns (relative, using `**` wildcards) that identify dedicated test files.
213    /// Used to build a GlobSet for fast batch matching.
214    /// Return `&[]` for languages with no dedicated test files (e.g. those using only inline tests).
215    fn test_file_globs(&self) -> &'static [&'static str] {
216        &[]
217    }
218
219    /// Capability query: returns `Some(self)` if this language can contain embedded blocks
220    /// in another language (e.g., JS in Vue, CSS in HTML). Returns `None` for most languages.
221    /// Implement `LanguageEmbedded` and override this to opt in.
222    fn as_embedded(&self) -> Option<&dyn LanguageEmbedded> {
223        None
224    }
225
226    // === Edit Support ===
227
228    /// Find the body node of a container (for prepend/append)
229    fn container_body<'a>(&self, _node: &'a Node<'a>) -> Option<Node<'a>> {
230        None
231    }
232
233    /// Detect if first child of body is a docstring
234    fn body_has_docstring(&self, _body: &Node, _content: &str) -> bool {
235        false
236    }
237
238    /// Analyze a container body node and return the editable byte range.
239    /// `body_node` is the node returned by `container_body`.
240    /// Returns None if this language doesn't support container body editing.
241    fn analyze_container_body(
242        &self,
243        _body_node: &Node,
244        _content: &str,
245        _inner_indent: &str,
246    ) -> Option<ContainerBody> {
247        None
248    }
249
250    // === Module-level documentation ===
251
252    /// Extract the module-level doc comment from raw file source.
253    ///
254    /// Called when viewing a file (not a specific symbol) to populate `ViewReport.summary`.
255    /// Returns `None` if this language has no module-doc convention or the file has none.
256    ///
257    /// Conventions by language:
258    /// - Rust: leading `//!` inner-doc comment lines
259    /// - Python: first statement is a string literal (`"""..."""`)
260    /// - Go: line comment(s) immediately before `package foo`
261    /// - JavaScript/TypeScript: leading `/** ... */` block comment at top of file
262    /// - Ruby: leading `#` comment block (ignoring `# frozen_string_literal` lines)
263    fn extract_module_doc(&self, _src: &str) -> Option<String> {
264        None
265    }
266
267    // === Helpers ===
268
269    /// Get the name of a node (typically via "name" field)
270    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
271        node.child_by_field_name("name")
272            .map(|n| &content[n.byte_range()])
273    }
274}