Skip to main content

normalize_languages/
traits.rs

1//! Core trait for language support.
2
3use std::path::{Path, PathBuf};
4use tree_sitter::Node;
5
6// Re-export core types from normalize-facts-core
7pub use normalize_facts_core::{Import, Symbol, SymbolKind, Visibility};
8
9/// Configuration discovered from workspace manifests for module resolution.
10pub struct ResolverConfig {
11    /// Workspace root used for relative path anchoring.
12    pub workspace_root: PathBuf,
13    /// Language-specific path mappings (e.g. tsconfig paths, Cargo workspace members).
14    pub path_mappings: Vec<(String, PathBuf)>,
15    /// Additional search roots (e.g. PYTHONPATH entries, Go module cache).
16    pub search_roots: Vec<PathBuf>,
17}
18
19/// A parsed import specifier.
20pub struct ImportSpec {
21    /// Raw specifier string (e.g. "std::collections::HashMap", "./utils", "numpy").
22    pub raw: String,
23    /// Whether this is a relative import (starts with ./ or ../).
24    pub is_relative: bool,
25    /// The imported names, if specified (e.g. `use foo::{bar, baz}` → ["bar", "baz"]).
26    /// Empty for glob/wildcard imports.
27    pub names: Vec<String>,
28    /// True if this is a glob/wildcard import (e.g. `use foo::*`, `from x import *`).
29    pub is_glob: bool,
30}
31
32/// A resolved module identifier.
33pub struct ModuleId {
34    pub canonical_path: String,
35}
36
37/// Result of resolving an import specifier to a file.
38pub enum Resolution {
39    /// Resolved to exactly one file + exported name.
40    Resolved(PathBuf, String),
41    /// Multiple possible resolutions (ambiguous).
42    Ambiguous(Vec<(PathBuf, String)>),
43    /// Could not resolve.
44    NotFound,
45    /// This language has no module system; resolution is not applicable.
46    NotApplicable,
47}
48
49/// Per-language module resolver.
50///
51/// Implements the Rust/TS/Python/etc-specific logic for turning an import
52/// specifier into a resolved file path.
53pub trait ModuleResolver: Send + Sync {
54    /// Read workspace config from the given root (e.g. Cargo.toml, tsconfig.json).
55    fn workspace_config(&self, root: &Path) -> ResolverConfig;
56    /// Return the canonical module identity/ies of a file within the workspace.
57    fn module_of_file(&self, root: &Path, file: &Path, cfg: &ResolverConfig) -> Vec<ModuleId>;
58    /// Resolve an import specifier from `from_file` to a target file + name.
59    fn resolve(&self, from_file: &Path, spec: &ImportSpec, cfg: &ResolverConfig) -> Resolution;
60}
61
62/// Location of a container's body (for prepend/append editing operations)
63#[derive(Debug)]
64pub struct ContainerBody {
65    /// Byte offset where body content starts (after opening delimiter/heading)
66    pub content_start: usize,
67    /// Byte offset where body content ends (before closing delimiter)
68    pub content_end: usize,
69    /// Indentation string for new content inserted into the body
70    pub inner_indent: String,
71    /// True if the body has no meaningful content (empty, only pass/braces)
72    pub is_empty: bool,
73}
74
75/// Information about what a class/container implements or extends.
76#[derive(Debug, Default)]
77pub struct ImplementsInfo {
78    /// True if this is an interface/protocol/trait definition (not a concrete class).
79    pub is_interface: bool,
80    /// List of implemented interfaces, superclasses, or mixed-in traits.
81    pub implements: Vec<String>,
82}
83
84/// Embedded content block (e.g., JS in Vue, CSS in HTML)
85#[derive(Debug, Clone)]
86pub struct EmbeddedBlock {
87    /// Grammar to use for parsing (e.g., "javascript", "css")
88    pub grammar: &'static str,
89    /// Extracted source content
90    pub content: String,
91    /// 1-indexed start line in the parent file
92    pub start_line: usize,
93}
94
95/// Capability trait: language has code symbols (functions, classes, types, etc.).
96///
97/// Config and data languages (CSS, HTML, JSON, TOML, XML, YAML) don't implement this.
98/// All general-purpose programming languages do.
99/// Access via `lang.as_symbols()` rather than `lang.has_symbols()`.
100pub trait LanguageSymbols: Language {}
101
102/// Capability trait: language can contain embedded blocks in another language.
103///
104/// Only a handful of multi-language formats implement this (Vue, HTML, Svelte).
105/// Access via `lang.as_embedded()` rather than casting.
106pub trait LanguageEmbedded: Language {
107    /// Extract embedded content from a node (e.g., JS/CSS in Vue/HTML).
108    /// Returns None for nodes that don't contain embedded code in another language.
109    fn embedded_content(&self, node: &Node, content: &str) -> Option<EmbeddedBlock>;
110}
111
112// === Helper functions for common extractor patterns ===
113
114/// Create a simple symbol with standard defaults.
115///
116/// Used by languages with straightforward function/method syntax where symbols:
117/// - Have public visibility
118/// - Use first line as signature
119/// - Have no attributes or children
120/// - Don't implement interfaces
121///
122/// Languages using this: cmake, glsl, graphql, hlsl, awk, elm, fish, haskell,
123/// jq, julia, ocaml, powershell, zsh
124pub fn simple_symbol(
125    node: &tree_sitter::Node,
126    content: &str,
127    name: &str,
128    kind: SymbolKind,
129    docstring: Option<String>,
130) -> Symbol {
131    let text = &content[node.byte_range()];
132    let first_line = text.lines().next().unwrap_or(text);
133
134    Symbol {
135        name: name.to_string(),
136        kind,
137        signature: first_line.trim().to_string(),
138        docstring,
139        attributes: Vec::new(),
140        start_line: node.start_position().row + 1,
141        end_line: node.end_position().row + 1,
142        visibility: Visibility::Public,
143        children: Vec::new(),
144        is_interface_impl: false,
145        implements: Vec::new(),
146    }
147}
148
149/// Create a simple function symbol (convenience wrapper).
150pub fn simple_function_symbol(
151    node: &tree_sitter::Node,
152    content: &str,
153    name: &str,
154    docstring: Option<String>,
155) -> Symbol {
156    simple_symbol(node, content, name, SymbolKind::Function, docstring)
157}
158
159/// Unified language support trait.
160///
161/// Each language implements this trait to provide:
162/// - Node kind classification
163/// - Symbol extraction (functions, classes, types)
164/// - Import/export parsing
165/// - Complexity analysis nodes
166/// - Visibility detection
167/// - Edit support (container bodies, docstrings)
168pub trait Language: Send + Sync {
169    /// Display name for this language (e.g., "Python", "C++")
170    fn name(&self) -> &'static str;
171
172    /// File extensions this language handles (e.g., ["py", "pyi", "pyw"])
173    fn extensions(&self) -> &'static [&'static str];
174
175    /// Grammar name for arborium (e.g., "python", "rust")
176    fn grammar_name(&self) -> &'static str;
177
178    /// Capability query: returns `Some(self)` if this language has code symbols
179    /// (functions, classes, types, etc.). Returns `None` for config/data languages.
180    /// Implement `LanguageSymbols` and override this to opt in.
181    fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
182        None
183    }
184
185    // === Symbol Building ===
186
187    /// Extract the docstring for a definition node.
188    /// Called by generic extraction for every tagged symbol.
189    /// Returns None if this language has no docstring convention or the node has no docstring.
190    fn extract_docstring(&self, _node: &Node, _content: &str) -> Option<String> {
191        None
192    }
193
194    /// Extract attributes/annotations/decorators attached to a definition node.
195    /// Called by generic extraction for every tagged symbol.
196    /// Returns empty vec if this language has no attribute convention.
197    fn extract_attributes(&self, _node: &Node, _content: &str) -> Vec<String> {
198        Vec::new()
199    }
200
201    /// Extract interfaces/traits/superclasses that a container node implements/extends.
202    /// Called by generic extraction for container nodes only.
203    fn extract_implements(&self, _node: &Node, _content: &str) -> ImplementsInfo {
204        ImplementsInfo::default()
205    }
206
207    /// Build the display signature for a definition node.
208    /// Default: first line of the node's source text (trimmed).
209    /// Override for languages where first-line is incomplete (e.g. Rust, Go, Java).
210    fn build_signature(&self, node: &Node, content: &str) -> String {
211        let text = &content[node.byte_range()];
212        text.lines().next().unwrap_or(text).trim().to_string()
213    }
214
215    /// Refine the symbol kind for a tagged node.
216    /// Called after tag classification assigns an initial kind (e.g. `definition.class` → `Class`).
217    /// Languages can override this to return a more specific kind based on the node's concrete type.
218    /// Default: return `tag_kind` unchanged.
219    fn refine_kind(&self, node: &Node, _content: &str, tag_kind: SymbolKind) -> SymbolKind {
220        let _ = node;
221        tag_kind
222    }
223
224    // === Import/Export ===
225
226    /// Extract imports from an import node (may return multiple)
227    fn extract_imports(&self, _node: &Node, _content: &str) -> Vec<Import> {
228        Vec::new()
229    }
230
231    /// Format an import as source code.
232    /// If `names` is Some, only include those names (for multi-import filtering).
233    /// If `names` is None, format the complete import.
234    fn format_import(&self, _import: &Import, _names: Option<&[&str]>) -> String {
235        String::new()
236    }
237
238    // === Display/Formatting ===
239
240    /// Suffix to append to signatures for tree-sitter parsing.
241    /// Function signatures are incomplete code fragments that need closing tokens
242    /// to parse correctly (e.g., Rust `fn foo()` needs `{}`, Lua `function foo()` needs `end`).
243    /// Returns the suffix to append, or empty string if none needed.
244    fn signature_suffix(&self) -> &'static str {
245        ""
246    }
247
248    // === Visibility ===
249
250    /// Get visibility of a node.
251    ///
252    /// This is a genuine interface method (not just an impl helper): `normalize-deps`
253    /// calls it externally during export detection to decide which tagged nodes are
254    /// public. The alternative — calling `extract_function/container/type()` and
255    /// inspecting `symbol.visibility` — would be correct but unnecessarily heavy
256    /// (computes signature, docstring, etc. just to check one field).
257    fn get_visibility(&self, _node: &Node, _content: &str) -> Visibility {
258        Visibility::Public
259    }
260
261    /// Check if a symbol is a test (for filtering).
262    fn is_test_symbol(&self, _symbol: &Symbol) -> bool {
263        false
264    }
265
266    /// Glob patterns (relative, using `**` wildcards) that identify dedicated test files.
267    /// Used to build a GlobSet for fast batch matching.
268    /// Return `&[]` for languages with no dedicated test files (e.g. those using only inline tests).
269    fn test_file_globs(&self) -> &'static [&'static str] {
270        &[]
271    }
272
273    /// Capability query: returns `Some(self)` if this language can contain embedded blocks
274    /// in another language (e.g., JS in Vue, CSS in HTML). Returns `None` for most languages.
275    /// Implement `LanguageEmbedded` and override this to opt in.
276    fn as_embedded(&self) -> Option<&dyn LanguageEmbedded> {
277        None
278    }
279
280    // === Edit Support ===
281
282    /// Find the body node of a container (for prepend/append)
283    fn container_body<'a>(&self, _node: &'a Node<'a>) -> Option<Node<'a>> {
284        None
285    }
286
287    /// Detect if first child of body is a docstring
288    fn body_has_docstring(&self, _body: &Node, _content: &str) -> bool {
289        false
290    }
291
292    /// Analyze a container body node and return the editable byte range.
293    /// `body_node` is the node returned by `container_body`.
294    /// Returns None if this language doesn't support container body editing.
295    fn analyze_container_body(
296        &self,
297        _body_node: &Node,
298        _content: &str,
299        _inner_indent: &str,
300    ) -> Option<ContainerBody> {
301        None
302    }
303
304    // === Module-level documentation ===
305
306    /// Extract the module-level doc comment from raw file source.
307    ///
308    /// Called when viewing a file (not a specific symbol) to populate `ViewReport.summary`.
309    /// Returns `None` if this language has no module-doc convention or the file has none.
310    ///
311    /// Conventions by language:
312    /// - Rust: leading `//!` inner-doc comment lines
313    /// - Python: first statement is a string literal (`"""..."""`)
314    /// - Go: line comment(s) immediately before `package foo`
315    /// - JavaScript/TypeScript: leading `/** ... */` block comment at top of file
316    /// - Ruby: leading `#` comment block (ignoring `# frozen_string_literal` lines)
317    fn extract_module_doc(&self, _src: &str) -> Option<String> {
318        None
319    }
320
321    // === Module Resolution ===
322
323    /// Return the module resolver for this language, if it has one.
324    ///
325    /// Languages with a module system (Rust, TypeScript, Python, Go, etc.) implement
326    /// this to enable cross-file name resolution. Languages without a module system
327    /// (Bash, GLSL, etc.) return `None`.
328    fn module_resolver(&self) -> Option<&dyn ModuleResolver> {
329        None
330    }
331
332    // === Helpers ===
333
334    /// Get the name of a node (typically via "name" field)
335    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
336        node.child_by_field_name("name")
337            .map(|n| &content[n.byte_range()])
338    }
339}