normalize_languages/traits.rs
1//! Core trait for language support.
2
3use std::path::{Path, PathBuf};
4use tree_sitter::Node;
5
6// Re-export core types from normalize-facts-core
7pub use normalize_facts_core::{Import, Symbol, SymbolKind, Visibility};
8
9/// Configuration discovered from workspace manifests for module resolution.
10pub struct ResolverConfig {
11 /// Workspace root used for relative path anchoring.
12 pub workspace_root: PathBuf,
13 /// Language-specific path mappings (e.g. tsconfig paths, Cargo workspace members).
14 pub path_mappings: Vec<(String, PathBuf)>,
15 /// Additional search roots (e.g. PYTHONPATH entries, Go module cache).
16 pub search_roots: Vec<PathBuf>,
17}
18
19/// A parsed import specifier.
20pub struct ImportSpec {
21 /// Raw specifier string (e.g. "std::collections::HashMap", "./utils", "numpy").
22 pub raw: String,
23 /// Whether this is a relative import (starts with ./ or ../).
24 pub is_relative: bool,
25 /// The imported names, if specified (e.g. `use foo::{bar, baz}` → ["bar", "baz"]).
26 /// Empty for glob/wildcard imports.
27 pub names: Vec<String>,
28 /// True if this is a glob/wildcard import (e.g. `use foo::*`, `from x import *`).
29 pub is_glob: bool,
30}
31
32/// A resolved module identifier.
33pub struct ModuleId {
34 pub canonical_path: String,
35}
36
37/// Result of resolving an import specifier to a file.
38pub enum Resolution {
39 /// Resolved to exactly one file + exported name.
40 Resolved(PathBuf, String),
41 /// Multiple possible resolutions (ambiguous).
42 Ambiguous(Vec<(PathBuf, String)>),
43 /// Could not resolve.
44 NotFound,
45 /// This language has no module system; resolution is not applicable.
46 NotApplicable,
47}
48
49/// Per-language module resolver.
50///
51/// Implements the Rust/TS/Python/etc-specific logic for turning an import
52/// specifier into a resolved file path.
53pub trait ModuleResolver: Send + Sync {
54 /// Read workspace config from the given root (e.g. Cargo.toml, tsconfig.json).
55 fn workspace_config(&self, root: &Path) -> ResolverConfig;
56 /// Return the canonical module identity/ies of a file within the workspace.
57 fn module_of_file(&self, root: &Path, file: &Path, cfg: &ResolverConfig) -> Vec<ModuleId>;
58 /// Resolve an import specifier from `from_file` to a target file + name.
59 fn resolve(&self, from_file: &Path, spec: &ImportSpec, cfg: &ResolverConfig) -> Resolution;
60}
61
62/// Location of a container's body (for prepend/append editing operations)
63#[derive(Debug)]
64pub struct ContainerBody {
65 /// Byte offset where body content starts (after opening delimiter/heading)
66 pub content_start: usize,
67 /// Byte offset where body content ends (before closing delimiter)
68 pub content_end: usize,
69 /// Indentation string for new content inserted into the body
70 pub inner_indent: String,
71 /// True if the body has no meaningful content (empty, only pass/braces)
72 pub is_empty: bool,
73}
74
75/// Information about what a class/container implements or extends.
76#[derive(Debug, Default)]
77pub struct ImplementsInfo {
78 /// True if this is an interface/protocol/trait definition (not a concrete class).
79 pub is_interface: bool,
80 /// List of implemented interfaces, superclasses, or mixed-in traits.
81 pub implements: Vec<String>,
82}
83
84/// Embedded content block (e.g., JS in Vue, CSS in HTML)
85#[derive(Debug, Clone)]
86pub struct EmbeddedBlock {
87 /// Grammar to use for parsing (e.g., "javascript", "css")
88 pub grammar: &'static str,
89 /// Extracted source content
90 pub content: String,
91 /// 1-indexed start line in the parent file
92 pub start_line: usize,
93}
94
95/// Capability trait: language has code symbols (functions, classes, types, etc.).
96///
97/// Config and data languages (CSS, HTML, JSON, TOML, XML, YAML) don't implement this.
98/// All general-purpose programming languages do.
99/// Access via `lang.as_symbols()` rather than `lang.has_symbols()`.
100pub trait LanguageSymbols: Language {}
101
102/// Capability trait: language can contain embedded blocks in another language.
103///
104/// Only a handful of multi-language formats implement this (Vue, HTML, Svelte).
105/// Access via `lang.as_embedded()` rather than casting.
106pub trait LanguageEmbedded: Language {
107 /// Extract embedded content from a node (e.g., JS/CSS in Vue/HTML).
108 /// Returns None for nodes that don't contain embedded code in another language.
109 fn embedded_content(&self, node: &Node, content: &str) -> Option<EmbeddedBlock>;
110}
111
112// === Helper functions for common extractor patterns ===
113
114/// Create a simple symbol with standard defaults.
115///
116/// Used by languages with straightforward function/method syntax where symbols:
117/// - Have public visibility
118/// - Use first line as signature
119/// - Have no attributes or children
120/// - Don't implement interfaces
121///
122/// Languages using this: cmake, glsl, graphql, hlsl, awk, elm, fish, haskell,
123/// jq, julia, ocaml, powershell, zsh
124pub fn simple_symbol(
125 node: &tree_sitter::Node,
126 content: &str,
127 name: &str,
128 kind: SymbolKind,
129 docstring: Option<String>,
130) -> Symbol {
131 let text = &content[node.byte_range()];
132 let first_line = text.lines().next().unwrap_or(text);
133
134 Symbol {
135 name: name.to_string(),
136 kind,
137 signature: first_line.trim().to_string(),
138 docstring,
139 attributes: Vec::new(),
140 start_line: node.start_position().row + 1,
141 end_line: node.end_position().row + 1,
142 visibility: Visibility::Public,
143 children: Vec::new(),
144 is_interface_impl: false,
145 implements: Vec::new(),
146 }
147}
148
149/// Create a simple function symbol (convenience wrapper).
150pub fn simple_function_symbol(
151 node: &tree_sitter::Node,
152 content: &str,
153 name: &str,
154 docstring: Option<String>,
155) -> Symbol {
156 simple_symbol(node, content, name, SymbolKind::Function, docstring)
157}
158
159/// Unified language support trait.
160///
161/// Each language implements this trait to provide:
162/// - Node kind classification
163/// - Symbol extraction (functions, classes, types)
164/// - Import/export parsing
165/// - Complexity analysis nodes
166/// - Visibility detection
167/// - Edit support (container bodies, docstrings)
168pub trait Language: Send + Sync {
169 /// Display name for this language (e.g., "Python", "C++")
170 fn name(&self) -> &'static str;
171
172 /// File extensions this language handles (e.g., ["py", "pyi", "pyw"])
173 fn extensions(&self) -> &'static [&'static str];
174
175 /// Grammar name for arborium (e.g., "python", "rust")
176 fn grammar_name(&self) -> &'static str;
177
178 /// Capability query: returns `Some(self)` if this language has code symbols
179 /// (functions, classes, types, etc.). Returns `None` for config/data languages.
180 /// Implement `LanguageSymbols` and override this to opt in.
181 fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
182 None
183 }
184
185 // === Symbol Building ===
186
187 /// Extract the docstring for a definition node.
188 /// Called by generic extraction for every tagged symbol.
189 /// Returns None if this language has no docstring convention or the node has no docstring.
190 fn extract_docstring(&self, _node: &Node, _content: &str) -> Option<String> {
191 None
192 }
193
194 /// Extract attributes/annotations/decorators attached to a definition node.
195 /// Called by generic extraction for every tagged symbol.
196 /// Returns empty vec if this language has no attribute convention.
197 fn extract_attributes(&self, _node: &Node, _content: &str) -> Vec<String> {
198 Vec::new()
199 }
200
201 /// Extract interfaces/traits/superclasses that a container node implements/extends.
202 /// Called by generic extraction for container nodes only.
203 fn extract_implements(&self, _node: &Node, _content: &str) -> ImplementsInfo {
204 ImplementsInfo::default()
205 }
206
207 /// Build the display signature for a definition node.
208 /// Default: first line of the node's source text (trimmed).
209 /// Override for languages where first-line is incomplete (e.g. Rust, Go, Java).
210 fn build_signature(&self, node: &Node, content: &str) -> String {
211 let text = &content[node.byte_range()];
212 text.lines().next().unwrap_or(text).trim().to_string()
213 }
214
215 /// Refine the symbol kind for a tagged node.
216 /// Called after tag classification assigns an initial kind (e.g. `definition.class` → `Class`).
217 /// Languages can override this to return a more specific kind based on the node's concrete type.
218 /// Default: return `tag_kind` unchanged.
219 fn refine_kind(&self, node: &Node, _content: &str, tag_kind: SymbolKind) -> SymbolKind {
220 let _ = node;
221 tag_kind
222 }
223
224 // === Import/Export ===
225
226 /// Extract imports from an import node (may return multiple)
227 fn extract_imports(&self, _node: &Node, _content: &str) -> Vec<Import> {
228 Vec::new()
229 }
230
231 /// Format an import as source code.
232 /// If `names` is Some, only include those names (for multi-import filtering).
233 /// If `names` is None, format the complete import.
234 fn format_import(&self, _import: &Import, _names: Option<&[&str]>) -> String {
235 String::new()
236 }
237
238 // === Display/Formatting ===
239
240 /// Suffix to append to signatures for tree-sitter parsing.
241 /// Function signatures are incomplete code fragments that need closing tokens
242 /// to parse correctly (e.g., Rust `fn foo()` needs `{}`, Lua `function foo()` needs `end`).
243 /// Returns the suffix to append, or empty string if none needed.
244 fn signature_suffix(&self) -> &'static str {
245 ""
246 }
247
248 // === Visibility ===
249
250 /// Get visibility of a node.
251 ///
252 /// This is a genuine interface method (not just an impl helper): `normalize-deps`
253 /// calls it externally during export detection to decide which tagged nodes are
254 /// public. The alternative — calling `extract_function/container/type()` and
255 /// inspecting `symbol.visibility` — would be correct but unnecessarily heavy
256 /// (computes signature, docstring, etc. just to check one field).
257 fn get_visibility(&self, _node: &Node, _content: &str) -> Visibility {
258 Visibility::Public
259 }
260
261 /// Check if a symbol is a test (for filtering).
262 fn is_test_symbol(&self, _symbol: &Symbol) -> bool {
263 false
264 }
265
266 /// Glob patterns (relative, using `**` wildcards) that identify dedicated test files.
267 /// Used to build a GlobSet for fast batch matching.
268 /// Return `&[]` for languages with no dedicated test files (e.g. those using only inline tests).
269 fn test_file_globs(&self) -> &'static [&'static str] {
270 &[]
271 }
272
273 /// Capability query: returns `Some(self)` if this language can contain embedded blocks
274 /// in another language (e.g., JS in Vue, CSS in HTML). Returns `None` for most languages.
275 /// Implement `LanguageEmbedded` and override this to opt in.
276 fn as_embedded(&self) -> Option<&dyn LanguageEmbedded> {
277 None
278 }
279
280 // === Edit Support ===
281
282 /// Find the body node of a container (for prepend/append)
283 fn container_body<'a>(&self, _node: &'a Node<'a>) -> Option<Node<'a>> {
284 None
285 }
286
287 /// Detect if first child of body is a docstring
288 fn body_has_docstring(&self, _body: &Node, _content: &str) -> bool {
289 false
290 }
291
292 /// Analyze a container body node and return the editable byte range.
293 /// `body_node` is the node returned by `container_body`.
294 /// Returns None if this language doesn't support container body editing.
295 fn analyze_container_body(
296 &self,
297 _body_node: &Node,
298 _content: &str,
299 _inner_indent: &str,
300 ) -> Option<ContainerBody> {
301 None
302 }
303
304 // === Module-level documentation ===
305
306 /// Extract the module-level doc comment from raw file source.
307 ///
308 /// Called when viewing a file (not a specific symbol) to populate `ViewReport.summary`.
309 /// Returns `None` if this language has no module-doc convention or the file has none.
310 ///
311 /// Conventions by language:
312 /// - Rust: leading `//!` inner-doc comment lines
313 /// - Python: first statement is a string literal (`"""..."""`)
314 /// - Go: line comment(s) immediately before `package foo`
315 /// - JavaScript/TypeScript: leading `/** ... */` block comment at top of file
316 /// - Ruby: leading `#` comment block (ignoring `# frozen_string_literal` lines)
317 fn extract_module_doc(&self, _src: &str) -> Option<String> {
318 None
319 }
320
321 // === Module Resolution ===
322
323 /// Return the module resolver for this language, if it has one.
324 ///
325 /// Languages with a module system (Rust, TypeScript, Python, Go, etc.) implement
326 /// this to enable cross-file name resolution. Languages without a module system
327 /// (Bash, GLSL, etc.) return `None`.
328 fn module_resolver(&self) -> Option<&dyn ModuleResolver> {
329 None
330 }
331
332 // === Helpers ===
333
334 /// Get the name of a node (typically via "name" field)
335 fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
336 node.child_by_field_name("name")
337 .map(|n| &content[n.byte_range()])
338 }
339}