Skip to main content

normalize_languages/
traits.rs

1//! Core trait for language support.
2
3use crate::external_packages::ResolvedPackage;
4use std::path::{Path, PathBuf};
5use tree_sitter::Node;
6
7/// Symbol kind classification
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum SymbolKind {
10    Function,
11    Method,
12    Class,
13    Struct,
14    Enum,
15    Trait,
16    Interface,
17    Module,
18    Type,
19    Constant,
20    Variable,
21    Heading,
22}
23
24impl SymbolKind {
25    pub fn as_str(&self) -> &'static str {
26        match self {
27            SymbolKind::Function => "function",
28            SymbolKind::Method => "method",
29            SymbolKind::Class => "class",
30            SymbolKind::Struct => "struct",
31            SymbolKind::Enum => "enum",
32            SymbolKind::Trait => "trait",
33            SymbolKind::Interface => "interface",
34            SymbolKind::Module => "module",
35            SymbolKind::Type => "type",
36            SymbolKind::Constant => "constant",
37            SymbolKind::Variable => "variable",
38            SymbolKind::Heading => "heading",
39        }
40    }
41}
42
43/// Symbol visibility
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
45pub enum Visibility {
46    #[default]
47    Public,
48    Private,
49    Protected,
50    Internal,
51}
52
53/// How a language determines symbol visibility
54#[derive(Debug, Clone, Copy, PartialEq, Eq)]
55pub enum VisibilityMechanism {
56    /// Explicit export keyword (JS/TS: `export function foo()`)
57    ExplicitExport,
58    /// Access modifier keywords (Java, Scala, C#: `public`, `private`, `protected`)
59    AccessModifier,
60    /// Naming convention (Go: uppercase = public, Python: underscore = private)
61    NamingConvention,
62    /// Header-based (C/C++: symbols in headers are public, source files are private)
63    HeaderBased,
64    /// Everything is public by default (Ruby modules, Lua)
65    AllPublic,
66    /// Not applicable (data formats like JSON, YAML, TOML)
67    NotApplicable,
68}
69
70/// A code symbol extracted from source
71#[derive(Debug, Clone)]
72pub struct Symbol {
73    pub name: String,
74    pub kind: SymbolKind,
75    pub signature: String,
76    pub docstring: Option<String>,
77    pub attributes: Vec<String>,
78    pub start_line: usize,
79    pub end_line: usize,
80    pub visibility: Visibility,
81    pub children: Vec<Symbol>,
82    /// True if this symbol implements an interface/trait (e.g., method in `impl Trait for Type`)
83    pub is_interface_impl: bool,
84    /// Parent interfaces/classes this symbol extends or implements (for semantic matching)
85    pub implements: Vec<String>,
86}
87
88/// An import statement
89#[derive(Debug, Clone)]
90pub struct Import {
91    pub module: String,
92    pub names: Vec<String>,
93    pub alias: Option<String>,
94    pub is_wildcard: bool,
95    pub is_relative: bool,
96    pub line: usize,
97}
98
99impl Import {
100    /// Format as a readable summary (module + names)
101    pub fn format_summary(&self) -> String {
102        if self.is_wildcard {
103            format!("{}::*", self.module)
104        } else if self.names.is_empty() {
105            self.module.clone()
106        } else if self.names.len() == 1 {
107            format!("{}::{}", self.module, self.names[0])
108        } else {
109            format!("{}::{{{}}}", self.module, self.names.join(", "))
110        }
111    }
112}
113
114/// An export declaration
115#[derive(Debug, Clone)]
116pub struct Export {
117    pub name: String,
118    pub kind: SymbolKind,
119    pub line: usize,
120}
121
122/// Embedded content block (e.g., JS in Vue, CSS in HTML)
123#[derive(Debug, Clone)]
124pub struct EmbeddedBlock {
125    /// Grammar to use for parsing (e.g., "javascript", "css")
126    pub grammar: &'static str,
127    /// Extracted source content
128    pub content: String,
129    /// 1-indexed start line in the parent file
130    pub start_line: usize,
131}
132
133// === Helper functions for should_skip_package_entry ===
134
135/// Check if name is a dotfile/dotdir (starts with '.')
136pub fn skip_dotfiles(name: &str) -> bool {
137    name.starts_with('.')
138}
139
140/// Check if name has one of the given extensions
141pub fn has_extension(name: &str, extensions: &[&str]) -> bool {
142    extensions
143        .iter()
144        .any(|ext| name.ends_with(&format!(".{}", ext)))
145}
146
147// === Helper functions for common extractor patterns ===
148
149/// Create a simple symbol with standard defaults.
150///
151/// Used by languages with straightforward function/method syntax where symbols:
152/// - Have public visibility
153/// - Use first line as signature
154/// - Have no attributes or children
155/// - Don't implement interfaces
156///
157/// Languages using this: cmake, glsl, graphql, hlsl, awk, elm, fish, haskell,
158/// jq, julia, ocaml, powershell, zsh
159pub fn simple_symbol(
160    node: &tree_sitter::Node,
161    content: &str,
162    name: &str,
163    kind: SymbolKind,
164    docstring: Option<String>,
165) -> Symbol {
166    let text = &content[node.byte_range()];
167    let first_line = text.lines().next().unwrap_or(text);
168
169    Symbol {
170        name: name.to_string(),
171        kind,
172        signature: first_line.trim().to_string(),
173        docstring,
174        attributes: Vec::new(),
175        start_line: node.start_position().row + 1,
176        end_line: node.end_position().row + 1,
177        visibility: Visibility::Public,
178        children: Vec::new(),
179        is_interface_impl: false,
180        implements: Vec::new(),
181    }
182}
183
184/// Create a simple function symbol (convenience wrapper).
185pub fn simple_function_symbol(
186    node: &tree_sitter::Node,
187    content: &str,
188    name: &str,
189    docstring: Option<String>,
190) -> Symbol {
191    simple_symbol(node, content, name, SymbolKind::Function, docstring)
192}
193
194/// Unified language support trait.
195///
196/// Each language implements this trait to provide:
197/// - Node kind classification
198/// - Symbol extraction (functions, classes, types)
199/// - Import/export parsing
200/// - Complexity analysis nodes
201/// - Visibility detection
202/// - Edit support (container bodies, docstrings)
203pub trait Language: Send + Sync {
204    /// Display name for this language (e.g., "Python", "C++")
205    fn name(&self) -> &'static str;
206
207    /// File extensions this language handles (e.g., ["py", "pyi", "pyw"])
208    fn extensions(&self) -> &'static [&'static str];
209
210    /// Grammar name for arborium (e.g., "python", "rust")
211    fn grammar_name(&self) -> &'static str;
212
213    /// Whether this language has code symbols (functions, classes, etc.)
214    fn has_symbols(&self) -> bool;
215
216    // === Node Classification ===
217
218    /// Container nodes that can hold methods (class, impl, module)
219    fn container_kinds(&self) -> &'static [&'static str];
220
221    /// Function/method definition nodes
222    fn function_kinds(&self) -> &'static [&'static str];
223
224    /// Type definition nodes (struct, enum, interface, type alias)
225    fn type_kinds(&self) -> &'static [&'static str];
226
227    /// Import statement nodes
228    fn import_kinds(&self) -> &'static [&'static str];
229
230    /// AST node kinds that may contain publicly visible symbols.
231    /// For JS/TS: export_statement nodes.
232    /// For Go/Java/Python: function/class/type declaration nodes.
233    /// The extract_public_symbols() method filters by actual visibility.
234    fn public_symbol_kinds(&self) -> &'static [&'static str];
235
236    /// How this language determines symbol visibility
237    fn visibility_mechanism(&self) -> VisibilityMechanism;
238
239    // === Symbol Extraction ===
240
241    /// Extract symbol from a function/method node
242    fn extract_function(&self, node: &Node, content: &str, in_container: bool) -> Option<Symbol>;
243
244    /// Extract symbol from a container node (class, impl, module)
245    fn extract_container(&self, node: &Node, content: &str) -> Option<Symbol>;
246
247    /// Extract symbol from a type definition node
248    fn extract_type(&self, node: &Node, content: &str) -> Option<Symbol>;
249
250    /// Extract docstring/doc comment for a node
251    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String>;
252
253    /// Extract attributes/decorators for a node (e.g., #[test], @Test)
254    fn extract_attributes(&self, node: &Node, content: &str) -> Vec<String>;
255
256    // === Import/Export ===
257
258    /// Extract imports from an import node (may return multiple)
259    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import>;
260
261    /// Format an import as source code.
262    /// If `names` is Some, only include those names (for multi-import filtering).
263    /// If `names` is None, format the complete import.
264    fn format_import(&self, import: &Import, names: Option<&[&str]>) -> String;
265
266    /// Extract public symbols from a node.
267    /// The node is one of the kinds from public_symbol_kinds().
268    /// For JS/TS: extracts exported names from export statements.
269    /// For Go/Java/Python: checks visibility and returns public symbols.
270    fn extract_public_symbols(&self, node: &Node, content: &str) -> Vec<Export>;
271
272    // === Scope Analysis ===
273
274    /// Nodes that create new variable scopes (for scope analysis)
275    /// Includes: loops, blocks, comprehensions, lambdas, with statements
276    /// Note: Functions and containers (from function_kinds/container_kinds) also create scopes
277    fn scope_creating_kinds(&self) -> &'static [&'static str];
278
279    // === Control Flow ===
280
281    /// Nodes that affect control flow (for CFG analysis)
282    /// Includes: if, for, while, return, break, continue, try, match
283    fn control_flow_kinds(&self) -> &'static [&'static str];
284
285    // === Complexity ===
286
287    /// Nodes that increase cyclomatic complexity
288    fn complexity_nodes(&self) -> &'static [&'static str];
289
290    /// Nodes that indicate nesting depth
291    fn nesting_nodes(&self) -> &'static [&'static str];
292
293    // === Display/Formatting ===
294
295    /// Suffix to append to signatures for tree-sitter parsing.
296    /// Function signatures are incomplete code fragments that need closing tokens
297    /// to parse correctly (e.g., Rust `fn foo()` needs `{}`, Lua `function foo()` needs `end`).
298    /// Returns the suffix to append, or empty string if none needed.
299    fn signature_suffix(&self) -> &'static str;
300
301    // === Visibility ===
302
303    /// Check if a node is public/exported
304    fn is_public(&self, node: &Node, content: &str) -> bool;
305
306    /// Get visibility of a node
307    fn get_visibility(&self, node: &Node, content: &str) -> Visibility;
308
309    /// Check if a symbol is a test (for filtering).
310    /// Each language must implement this - test conventions are language-specific.
311    fn is_test_symbol(&self, symbol: &Symbol) -> bool;
312
313    // === Embedded Languages ===
314
315    /// Extract embedded content from a node (e.g., JS/CSS in Vue/HTML).
316    /// Returns None for nodes that don't contain embedded code in another language.
317    fn embedded_content(&self, node: &Node, content: &str) -> Option<EmbeddedBlock>;
318
319    // === Edit Support ===
320
321    /// Find the body node of a container (for prepend/append)
322    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>>;
323
324    /// Detect if first child of body is a docstring
325    fn body_has_docstring(&self, body: &Node, content: &str) -> bool;
326
327    // === Helpers ===
328
329    /// Get the name of a node (typically via "name" field)
330    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str>;
331
332    /// Convert a file path to a module name for this language.
333    /// Used to find "importers" - files that import a given file.
334    /// Returns None for languages without module systems or where not applicable.
335    fn file_path_to_module_name(&self, path: &Path) -> Option<String>;
336
337    /// Convert a module name to candidate file paths (inverse of file_path_to_module_name).
338    /// Returns relative paths that could contain the module.
339    /// Used for wildcard import resolution (e.g., `from foo import *`).
340    fn module_name_to_paths(&self, module: &str) -> Vec<String>;
341
342    // === Import Resolution ===
343
344    /// Language key for package index cache (e.g., "python", "go", "js").
345    fn lang_key(&self) -> &'static str;
346
347    /// Resolve a local import within the project.
348    ///
349    /// Handles project-relative imports (e.g., `from . import foo`, `crate::`,
350    /// `./module`, relative includes).
351    fn resolve_local_import(
352        &self,
353        import_name: &str,
354        current_file: &Path,
355        project_root: &Path,
356    ) -> Option<PathBuf>;
357
358    /// Resolve an external import to its source location.
359    ///
360    /// Returns the path to stdlib or installed packages.
361    fn resolve_external_import(
362        &self,
363        import_name: &str,
364        project_root: &Path,
365    ) -> Option<ResolvedPackage>;
366
367    /// Check if an import is from the standard library.
368    fn is_stdlib_import(&self, import_name: &str, project_root: &Path) -> bool;
369
370    /// Get the language/runtime version (for package index versioning).
371    fn get_version(&self, project_root: &Path) -> Option<String>;
372
373    /// Find package cache/installation directory.
374    fn find_package_cache(&self, project_root: &Path) -> Option<PathBuf>;
375
376    /// File extensions to index when caching a package.
377    fn indexable_extensions(&self) -> &'static [&'static str];
378
379    // === Package Indexing ===
380
381    /// Find standard library directory (if applicable).
382    /// Returns None for languages without a separate stdlib to index.
383    fn find_stdlib(&self, project_root: &Path) -> Option<PathBuf>;
384
385    /// Should this entry be skipped when indexing packages?
386    /// Called for each file/directory in package directories.
387    /// Use helper functions `skip_dotfiles()` and `has_extension(name, self.indexable_extensions())` for common checks.
388    fn should_skip_package_entry(&self, name: &str, is_dir: bool) -> bool;
389
390    /// Get the module/package name from a directory entry name.
391    fn package_module_name(&self, entry_name: &str) -> String;
392
393    /// Return package sources to index for this language.
394    /// Each source describes a directory containing packages.
395    fn package_sources(&self, project_root: &Path) -> Vec<PackageSource>;
396
397    /// Discover packages in a source directory.
398    /// Returns (package_name, path) pairs for all packages found.
399    /// Use provided helpers: discover_flat_packages, discover_recursive_packages, discover_npm_scoped_packages.
400    fn discover_packages(&self, source: &PackageSource) -> Vec<(String, PathBuf)>;
401
402    /// Discover packages in a flat directory (each entry is a package).
403    fn discover_flat_packages(&self, source_path: &Path) -> Vec<(String, PathBuf)> {
404        let entries = match std::fs::read_dir(source_path) {
405            Ok(e) => e,
406            Err(_) => return Vec::new(),
407        };
408
409        let mut packages = Vec::new();
410        for entry in entries.flatten() {
411            let path = entry.path();
412            let name = entry.file_name().to_string_lossy().to_string();
413
414            if self.should_skip_package_entry(&name, path.is_dir()) {
415                continue;
416            }
417
418            let module_name = self.package_module_name(&name);
419            packages.push((module_name, path));
420        }
421        packages
422    }
423
424    /// Discover packages recursively (each file with matching extension is a package).
425    fn discover_recursive_packages(
426        &self,
427        base_path: &Path,
428        current_path: &Path,
429    ) -> Vec<(String, PathBuf)> {
430        let entries = match std::fs::read_dir(current_path) {
431            Ok(e) => e,
432            Err(_) => return Vec::new(),
433        };
434
435        let mut packages = Vec::new();
436        for entry in entries.flatten() {
437            let path = entry.path();
438            let name = entry.file_name().to_string_lossy().to_string();
439            let is_dir = path.is_dir();
440
441            if self.should_skip_package_entry(&name, is_dir) {
442                continue;
443            }
444
445            if is_dir {
446                packages.extend(self.discover_recursive_packages(base_path, &path));
447            } else {
448                // Get relative path from base as module name
449                let rel_path = path
450                    .strip_prefix(base_path)
451                    .map(|p| p.to_string_lossy().to_string())
452                    .unwrap_or_else(|_| name);
453                packages.push((rel_path, path));
454            }
455        }
456        packages
457    }
458
459    /// Find the entry point file for a package path.
460    /// If path is a file, returns it directly.
461    /// If path is a directory, looks for language-specific entry points.
462    fn find_package_entry(&self, path: &Path) -> Option<PathBuf>;
463
464    /// Discover packages in npm-scoped directory (handles @scope/package).
465    fn discover_npm_scoped_packages(&self, source_path: &Path) -> Vec<(String, PathBuf)> {
466        let entries = match std::fs::read_dir(source_path) {
467            Ok(e) => e,
468            Err(_) => return Vec::new(),
469        };
470
471        let mut packages = Vec::new();
472        for entry in entries.flatten() {
473            let path = entry.path();
474            let name = entry.file_name().to_string_lossy().to_string();
475
476            if self.should_skip_package_entry(&name, path.is_dir()) {
477                continue;
478            }
479
480            if name.starts_with('@') && path.is_dir() {
481                // Scoped package - iterate contents
482                if let Ok(scoped_entries) = std::fs::read_dir(&path) {
483                    for scoped_entry in scoped_entries.flatten() {
484                        let scoped_path = scoped_entry.path();
485                        let scoped_name = scoped_entry.file_name().to_string_lossy().to_string();
486                        if self.should_skip_package_entry(&scoped_name, scoped_path.is_dir()) {
487                            continue;
488                        }
489                        let full_name = format!("{}/{}", name, scoped_name);
490                        packages.push((full_name, scoped_path));
491                    }
492                }
493            } else {
494                let module_name = self.package_module_name(&name);
495                packages.push((module_name, path));
496            }
497        }
498        packages
499    }
500}
501
502/// A source of packages to index.
503#[derive(Debug, Clone)]
504pub struct PackageSource {
505    /// Display name (e.g., "stdlib", "site-packages", "node_modules")
506    pub name: &'static str,
507    /// Path to the source directory
508    pub path: PathBuf,
509    /// How to traverse this source
510    pub kind: PackageSourceKind,
511    /// Whether packages here are version-specific (affects max_version in index)
512    pub version_specific: bool,
513}
514
515/// How to traverse a package source directory.
516#[derive(Debug, Clone, Copy, PartialEq, Eq)]
517pub enum PackageSourceKind {
518    /// Flat directory of packages (Python site-packages, node_modules)
519    /// Each top-level entry is a package.
520    Flat,
521    /// Recursive directory (Go stdlib, C++ includes)
522    /// Packages are identified by having indexable files.
523    Recursive,
524    /// NPM-style scoped packages (@scope/package)
525    NpmScoped,
526    /// Maven repository structure (group/artifact/version)
527    Maven,
528    /// Gradle cache structure (group/artifact/version/hash)
529    Gradle,
530    /// Cargo registry structure (index/crate-version)
531    Cargo,
532    /// Deno cache structure (needs special handling for npm vs URL deps)
533    Deno,
534}