normalize_languages/traits.rs
1//! Core trait for language support.
2
3use tree_sitter::Node;
4
5// Re-export core types from normalize-facts-core
6pub use normalize_facts_core::{Import, Symbol, SymbolKind, Visibility};
7
8/// Location of a container's body (for prepend/append editing operations)
9#[derive(Debug)]
10pub struct ContainerBody {
11 /// Byte offset where body content starts (after opening delimiter/heading)
12 pub content_start: usize,
13 /// Byte offset where body content ends (before closing delimiter)
14 pub content_end: usize,
15 /// Indentation string for new content inserted into the body
16 pub inner_indent: String,
17 /// True if the body has no meaningful content (empty, only pass/braces)
18 pub is_empty: bool,
19}
20
21/// Information about what a class/container implements or extends.
22#[derive(Debug, Default)]
23pub struct ImplementsInfo {
24 /// True if this is an interface/protocol/trait definition (not a concrete class).
25 pub is_interface: bool,
26 /// List of implemented interfaces, superclasses, or mixed-in traits.
27 pub implements: Vec<String>,
28}
29
30/// Embedded content block (e.g., JS in Vue, CSS in HTML)
31#[derive(Debug, Clone)]
32pub struct EmbeddedBlock {
33 /// Grammar to use for parsing (e.g., "javascript", "css")
34 pub grammar: &'static str,
35 /// Extracted source content
36 pub content: String,
37 /// 1-indexed start line in the parent file
38 pub start_line: usize,
39}
40
41/// Capability trait: language has code symbols (functions, classes, types, etc.).
42///
43/// Config and data languages (CSS, HTML, JSON, TOML, XML, YAML) don't implement this.
44/// All general-purpose programming languages do.
45/// Access via `lang.as_symbols()` rather than `lang.has_symbols()`.
46pub trait LanguageSymbols: Language {}
47
48/// Capability trait: language can contain embedded blocks in another language.
49///
50/// Only a handful of multi-language formats implement this (Vue, HTML, Svelte).
51/// Access via `lang.as_embedded()` rather than casting.
52pub trait LanguageEmbedded: Language {
53 /// Extract embedded content from a node (e.g., JS/CSS in Vue/HTML).
54 /// Returns None for nodes that don't contain embedded code in another language.
55 fn embedded_content(&self, node: &Node, content: &str) -> Option<EmbeddedBlock>;
56}
57
58// === Helper functions for common extractor patterns ===
59
60/// Create a simple symbol with standard defaults.
61///
62/// Used by languages with straightforward function/method syntax where symbols:
63/// - Have public visibility
64/// - Use first line as signature
65/// - Have no attributes or children
66/// - Don't implement interfaces
67///
68/// Languages using this: cmake, glsl, graphql, hlsl, awk, elm, fish, haskell,
69/// jq, julia, ocaml, powershell, zsh
70pub fn simple_symbol(
71 node: &tree_sitter::Node,
72 content: &str,
73 name: &str,
74 kind: SymbolKind,
75 docstring: Option<String>,
76) -> Symbol {
77 let text = &content[node.byte_range()];
78 let first_line = text.lines().next().unwrap_or(text);
79
80 Symbol {
81 name: name.to_string(),
82 kind,
83 signature: first_line.trim().to_string(),
84 docstring,
85 attributes: Vec::new(),
86 start_line: node.start_position().row + 1,
87 end_line: node.end_position().row + 1,
88 visibility: Visibility::Public,
89 children: Vec::new(),
90 is_interface_impl: false,
91 implements: Vec::new(),
92 }
93}
94
95/// Create a simple function symbol (convenience wrapper).
96pub fn simple_function_symbol(
97 node: &tree_sitter::Node,
98 content: &str,
99 name: &str,
100 docstring: Option<String>,
101) -> Symbol {
102 simple_symbol(node, content, name, SymbolKind::Function, docstring)
103}
104
105/// Unified language support trait.
106///
107/// Each language implements this trait to provide:
108/// - Node kind classification
109/// - Symbol extraction (functions, classes, types)
110/// - Import/export parsing
111/// - Complexity analysis nodes
112/// - Visibility detection
113/// - Edit support (container bodies, docstrings)
114pub trait Language: Send + Sync {
115 /// Display name for this language (e.g., "Python", "C++")
116 fn name(&self) -> &'static str;
117
118 /// File extensions this language handles (e.g., ["py", "pyi", "pyw"])
119 fn extensions(&self) -> &'static [&'static str];
120
121 /// Grammar name for arborium (e.g., "python", "rust")
122 fn grammar_name(&self) -> &'static str;
123
124 /// Capability query: returns `Some(self)` if this language has code symbols
125 /// (functions, classes, types, etc.). Returns `None` for config/data languages.
126 /// Implement `LanguageSymbols` and override this to opt in.
127 fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
128 None
129 }
130
131 // === Symbol Building ===
132
133 /// Extract the docstring for a definition node.
134 /// Called by generic extraction for every tagged symbol.
135 /// Returns None if this language has no docstring convention or the node has no docstring.
136 fn extract_docstring(&self, _node: &Node, _content: &str) -> Option<String> {
137 None
138 }
139
140 /// Extract attributes/annotations/decorators attached to a definition node.
141 /// Called by generic extraction for every tagged symbol.
142 /// Returns empty vec if this language has no attribute convention.
143 fn extract_attributes(&self, _node: &Node, _content: &str) -> Vec<String> {
144 Vec::new()
145 }
146
147 /// Extract interfaces/traits/superclasses that a container node implements/extends.
148 /// Called by generic extraction for container nodes only.
149 fn extract_implements(&self, _node: &Node, _content: &str) -> ImplementsInfo {
150 ImplementsInfo::default()
151 }
152
153 /// Build the display signature for a definition node.
154 /// Default: first line of the node's source text (trimmed).
155 /// Override for languages where first-line is incomplete (e.g. Rust, Go, Java).
156 fn build_signature(&self, node: &Node, content: &str) -> String {
157 let text = &content[node.byte_range()];
158 text.lines().next().unwrap_or(text).trim().to_string()
159 }
160
161 /// Refine the symbol kind for a tagged node.
162 /// Called after tag classification assigns an initial kind (e.g. `definition.class` → `Class`).
163 /// Languages can override this to return a more specific kind based on the node's concrete type.
164 /// Default: return `tag_kind` unchanged.
165 fn refine_kind(&self, node: &Node, _content: &str, tag_kind: SymbolKind) -> SymbolKind {
166 let _ = node;
167 tag_kind
168 }
169
170 // === Import/Export ===
171
172 /// Extract imports from an import node (may return multiple)
173 fn extract_imports(&self, _node: &Node, _content: &str) -> Vec<Import> {
174 Vec::new()
175 }
176
177 /// Format an import as source code.
178 /// If `names` is Some, only include those names (for multi-import filtering).
179 /// If `names` is None, format the complete import.
180 fn format_import(&self, _import: &Import, _names: Option<&[&str]>) -> String {
181 String::new()
182 }
183
184 // === Display/Formatting ===
185
186 /// Suffix to append to signatures for tree-sitter parsing.
187 /// Function signatures are incomplete code fragments that need closing tokens
188 /// to parse correctly (e.g., Rust `fn foo()` needs `{}`, Lua `function foo()` needs `end`).
189 /// Returns the suffix to append, or empty string if none needed.
190 fn signature_suffix(&self) -> &'static str {
191 ""
192 }
193
194 // === Visibility ===
195
196 /// Get visibility of a node.
197 ///
198 /// This is a genuine interface method (not just an impl helper): `normalize-deps`
199 /// calls it externally during export detection to decide which tagged nodes are
200 /// public. The alternative — calling `extract_function/container/type()` and
201 /// inspecting `symbol.visibility` — would be correct but unnecessarily heavy
202 /// (computes signature, docstring, etc. just to check one field).
203 fn get_visibility(&self, _node: &Node, _content: &str) -> Visibility {
204 Visibility::Public
205 }
206
207 /// Check if a symbol is a test (for filtering).
208 fn is_test_symbol(&self, _symbol: &Symbol) -> bool {
209 false
210 }
211
212 /// Glob patterns (relative, using `**` wildcards) that identify dedicated test files.
213 /// Used to build a GlobSet for fast batch matching.
214 /// Return `&[]` for languages with no dedicated test files (e.g. those using only inline tests).
215 fn test_file_globs(&self) -> &'static [&'static str] {
216 &[]
217 }
218
219 /// Capability query: returns `Some(self)` if this language can contain embedded blocks
220 /// in another language (e.g., JS in Vue, CSS in HTML). Returns `None` for most languages.
221 /// Implement `LanguageEmbedded` and override this to opt in.
222 fn as_embedded(&self) -> Option<&dyn LanguageEmbedded> {
223 None
224 }
225
226 // === Edit Support ===
227
228 /// Find the body node of a container (for prepend/append)
229 fn container_body<'a>(&self, _node: &'a Node<'a>) -> Option<Node<'a>> {
230 None
231 }
232
233 /// Detect if first child of body is a docstring
234 fn body_has_docstring(&self, _body: &Node, _content: &str) -> bool {
235 false
236 }
237
238 /// Analyze a container body node and return the editable byte range.
239 /// `body_node` is the node returned by `container_body`.
240 /// Returns None if this language doesn't support container body editing.
241 fn analyze_container_body(
242 &self,
243 _body_node: &Node,
244 _content: &str,
245 _inner_indent: &str,
246 ) -> Option<ContainerBody> {
247 None
248 }
249
250 // === Module-level documentation ===
251
252 /// Extract the module-level doc comment from raw file source.
253 ///
254 /// Called when viewing a file (not a specific symbol) to populate `ViewReport.summary`.
255 /// Returns `None` if this language has no module-doc convention or the file has none.
256 ///
257 /// Conventions by language:
258 /// - Rust: leading `//!` inner-doc comment lines
259 /// - Python: first statement is a string literal (`"""..."""`)
260 /// - Go: line comment(s) immediately before `package foo`
261 /// - JavaScript/TypeScript: leading `/** ... */` block comment at top of file
262 /// - Ruby: leading `#` comment block (ignoring `# frozen_string_literal` lines)
263 fn extract_module_doc(&self, _src: &str) -> Option<String> {
264 None
265 }
266
267 // === Helpers ===
268
269 /// Get the name of a node (typically via "name" field)
270 fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
271 node.child_by_field_name("name")
272 .map(|n| &content[n.byte_range()])
273 }
274}