normalize_languages/traits.rs
1//! Core trait for language support.
2
3use crate::external_packages::ResolvedPackage;
4use std::path::{Path, PathBuf};
5use tree_sitter::Node;
6
7/// Symbol kind classification
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum SymbolKind {
10 Function,
11 Method,
12 Class,
13 Struct,
14 Enum,
15 Trait,
16 Interface,
17 Module,
18 Type,
19 Constant,
20 Variable,
21 Heading,
22}
23
24impl SymbolKind {
25 pub fn as_str(&self) -> &'static str {
26 match self {
27 SymbolKind::Function => "function",
28 SymbolKind::Method => "method",
29 SymbolKind::Class => "class",
30 SymbolKind::Struct => "struct",
31 SymbolKind::Enum => "enum",
32 SymbolKind::Trait => "trait",
33 SymbolKind::Interface => "interface",
34 SymbolKind::Module => "module",
35 SymbolKind::Type => "type",
36 SymbolKind::Constant => "constant",
37 SymbolKind::Variable => "variable",
38 SymbolKind::Heading => "heading",
39 }
40 }
41}
42
43/// Symbol visibility
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
45pub enum Visibility {
46 #[default]
47 Public,
48 Private,
49 Protected,
50 Internal,
51}
52
53/// How a language determines symbol visibility
54#[derive(Debug, Clone, Copy, PartialEq, Eq)]
55pub enum VisibilityMechanism {
56 /// Explicit export keyword (JS/TS: `export function foo()`)
57 ExplicitExport,
58 /// Access modifier keywords (Java, Scala, C#: `public`, `private`, `protected`)
59 AccessModifier,
60 /// Naming convention (Go: uppercase = public, Python: underscore = private)
61 NamingConvention,
62 /// Header-based (C/C++: symbols in headers are public, source files are private)
63 HeaderBased,
64 /// Everything is public by default (Ruby modules, Lua)
65 AllPublic,
66 /// Not applicable (data formats like JSON, YAML, TOML)
67 NotApplicable,
68}
69
70/// A code symbol extracted from source
71#[derive(Debug, Clone)]
72pub struct Symbol {
73 pub name: String,
74 pub kind: SymbolKind,
75 pub signature: String,
76 pub docstring: Option<String>,
77 pub attributes: Vec<String>,
78 pub start_line: usize,
79 pub end_line: usize,
80 pub visibility: Visibility,
81 pub children: Vec<Symbol>,
82 /// True if this symbol implements an interface/trait (e.g., method in `impl Trait for Type`)
83 pub is_interface_impl: bool,
84 /// Parent interfaces/classes this symbol extends or implements (for semantic matching)
85 pub implements: Vec<String>,
86}
87
88/// An import statement
89#[derive(Debug, Clone)]
90pub struct Import {
91 pub module: String,
92 pub names: Vec<String>,
93 pub alias: Option<String>,
94 pub is_wildcard: bool,
95 pub is_relative: bool,
96 pub line: usize,
97}
98
99impl Import {
100 /// Format as a readable summary (module + names)
101 pub fn format_summary(&self) -> String {
102 if self.is_wildcard {
103 format!("{}::*", self.module)
104 } else if self.names.is_empty() {
105 self.module.clone()
106 } else if self.names.len() == 1 {
107 format!("{}::{}", self.module, self.names[0])
108 } else {
109 format!("{}::{{{}}}", self.module, self.names.join(", "))
110 }
111 }
112}
113
114/// An export declaration
115#[derive(Debug, Clone)]
116pub struct Export {
117 pub name: String,
118 pub kind: SymbolKind,
119 pub line: usize,
120}
121
122/// Embedded content block (e.g., JS in Vue, CSS in HTML)
123#[derive(Debug, Clone)]
124pub struct EmbeddedBlock {
125 /// Grammar to use for parsing (e.g., "javascript", "css")
126 pub grammar: &'static str,
127 /// Extracted source content
128 pub content: String,
129 /// 1-indexed start line in the parent file
130 pub start_line: usize,
131}
132
133// === Helper functions for should_skip_package_entry ===
134
135/// Check if name is a dotfile/dotdir (starts with '.')
136pub fn skip_dotfiles(name: &str) -> bool {
137 name.starts_with('.')
138}
139
140/// Check if name has one of the given extensions
141pub fn has_extension(name: &str, extensions: &[&str]) -> bool {
142 extensions
143 .iter()
144 .any(|ext| name.ends_with(&format!(".{}", ext)))
145}
146
147// === Helper functions for common extractor patterns ===
148
149/// Create a simple symbol with standard defaults.
150///
151/// Used by languages with straightforward function/method syntax where symbols:
152/// - Have public visibility
153/// - Use first line as signature
154/// - Have no attributes or children
155/// - Don't implement interfaces
156///
157/// Languages using this: cmake, glsl, graphql, hlsl, awk, elm, fish, haskell,
158/// jq, julia, ocaml, powershell, zsh
159pub fn simple_symbol(
160 node: &tree_sitter::Node,
161 content: &str,
162 name: &str,
163 kind: SymbolKind,
164 docstring: Option<String>,
165) -> Symbol {
166 let text = &content[node.byte_range()];
167 let first_line = text.lines().next().unwrap_or(text);
168
169 Symbol {
170 name: name.to_string(),
171 kind,
172 signature: first_line.trim().to_string(),
173 docstring,
174 attributes: Vec::new(),
175 start_line: node.start_position().row + 1,
176 end_line: node.end_position().row + 1,
177 visibility: Visibility::Public,
178 children: Vec::new(),
179 is_interface_impl: false,
180 implements: Vec::new(),
181 }
182}
183
184/// Create a simple function symbol (convenience wrapper).
185pub fn simple_function_symbol(
186 node: &tree_sitter::Node,
187 content: &str,
188 name: &str,
189 docstring: Option<String>,
190) -> Symbol {
191 simple_symbol(node, content, name, SymbolKind::Function, docstring)
192}
193
194/// Unified language support trait.
195///
196/// Each language implements this trait to provide:
197/// - Node kind classification
198/// - Symbol extraction (functions, classes, types)
199/// - Import/export parsing
200/// - Complexity analysis nodes
201/// - Visibility detection
202/// - Edit support (container bodies, docstrings)
203pub trait Language: Send + Sync {
204 /// Display name for this language (e.g., "Python", "C++")
205 fn name(&self) -> &'static str;
206
207 /// File extensions this language handles (e.g., ["py", "pyi", "pyw"])
208 fn extensions(&self) -> &'static [&'static str];
209
210 /// Grammar name for arborium (e.g., "python", "rust")
211 fn grammar_name(&self) -> &'static str;
212
213 /// Whether this language has code symbols (functions, classes, etc.)
214 fn has_symbols(&self) -> bool;
215
216 // === Node Classification ===
217
218 /// Container nodes that can hold methods (class, impl, module)
219 fn container_kinds(&self) -> &'static [&'static str];
220
221 /// Function/method definition nodes
222 fn function_kinds(&self) -> &'static [&'static str];
223
224 /// Type definition nodes (struct, enum, interface, type alias)
225 fn type_kinds(&self) -> &'static [&'static str];
226
227 /// Import statement nodes
228 fn import_kinds(&self) -> &'static [&'static str];
229
230 /// AST node kinds that may contain publicly visible symbols.
231 /// For JS/TS: export_statement nodes.
232 /// For Go/Java/Python: function/class/type declaration nodes.
233 /// The extract_public_symbols() method filters by actual visibility.
234 fn public_symbol_kinds(&self) -> &'static [&'static str];
235
236 /// How this language determines symbol visibility
237 fn visibility_mechanism(&self) -> VisibilityMechanism;
238
239 // === Symbol Extraction ===
240
241 /// Extract symbol from a function/method node
242 fn extract_function(&self, node: &Node, content: &str, in_container: bool) -> Option<Symbol>;
243
244 /// Extract symbol from a container node (class, impl, module)
245 fn extract_container(&self, node: &Node, content: &str) -> Option<Symbol>;
246
247 /// Extract symbol from a type definition node
248 fn extract_type(&self, node: &Node, content: &str) -> Option<Symbol>;
249
250 /// Extract docstring/doc comment for a node
251 fn extract_docstring(&self, node: &Node, content: &str) -> Option<String>;
252
253 /// Extract attributes/decorators for a node (e.g., #[test], @Test)
254 fn extract_attributes(&self, node: &Node, content: &str) -> Vec<String>;
255
256 // === Import/Export ===
257
258 /// Extract imports from an import node (may return multiple)
259 fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import>;
260
261 /// Format an import as source code.
262 /// If `names` is Some, only include those names (for multi-import filtering).
263 /// If `names` is None, format the complete import.
264 fn format_import(&self, import: &Import, names: Option<&[&str]>) -> String;
265
266 /// Extract public symbols from a node.
267 /// The node is one of the kinds from public_symbol_kinds().
268 /// For JS/TS: extracts exported names from export statements.
269 /// For Go/Java/Python: checks visibility and returns public symbols.
270 fn extract_public_symbols(&self, node: &Node, content: &str) -> Vec<Export>;
271
272 // === Scope Analysis ===
273
274 /// Nodes that create new variable scopes (for scope analysis)
275 /// Includes: loops, blocks, comprehensions, lambdas, with statements
276 /// Note: Functions and containers (from function_kinds/container_kinds) also create scopes
277 fn scope_creating_kinds(&self) -> &'static [&'static str];
278
279 // === Control Flow ===
280
281 /// Nodes that affect control flow (for CFG analysis)
282 /// Includes: if, for, while, return, break, continue, try, match
283 fn control_flow_kinds(&self) -> &'static [&'static str];
284
285 // === Complexity ===
286
287 /// Nodes that increase cyclomatic complexity
288 fn complexity_nodes(&self) -> &'static [&'static str];
289
290 /// Nodes that indicate nesting depth
291 fn nesting_nodes(&self) -> &'static [&'static str];
292
293 // === Display/Formatting ===
294
295 /// Suffix to append to signatures for tree-sitter parsing.
296 /// Function signatures are incomplete code fragments that need closing tokens
297 /// to parse correctly (e.g., Rust `fn foo()` needs `{}`, Lua `function foo()` needs `end`).
298 /// Returns the suffix to append, or empty string if none needed.
299 fn signature_suffix(&self) -> &'static str;
300
301 // === Visibility ===
302
303 /// Check if a node is public/exported
304 fn is_public(&self, node: &Node, content: &str) -> bool;
305
306 /// Get visibility of a node
307 fn get_visibility(&self, node: &Node, content: &str) -> Visibility;
308
309 /// Check if a symbol is a test (for filtering).
310 /// Each language must implement this - test conventions are language-specific.
311 fn is_test_symbol(&self, symbol: &Symbol) -> bool;
312
313 // === Embedded Languages ===
314
315 /// Extract embedded content from a node (e.g., JS/CSS in Vue/HTML).
316 /// Returns None for nodes that don't contain embedded code in another language.
317 fn embedded_content(&self, node: &Node, content: &str) -> Option<EmbeddedBlock>;
318
319 // === Edit Support ===
320
321 /// Find the body node of a container (for prepend/append)
322 fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>>;
323
324 /// Detect if first child of body is a docstring
325 fn body_has_docstring(&self, body: &Node, content: &str) -> bool;
326
327 // === Helpers ===
328
329 /// Get the name of a node (typically via "name" field)
330 fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str>;
331
332 /// Convert a file path to a module name for this language.
333 /// Used to find "importers" - files that import a given file.
334 /// Returns None for languages without module systems or where not applicable.
335 fn file_path_to_module_name(&self, path: &Path) -> Option<String>;
336
337 /// Convert a module name to candidate file paths (inverse of file_path_to_module_name).
338 /// Returns relative paths that could contain the module.
339 /// Used for wildcard import resolution (e.g., `from foo import *`).
340 fn module_name_to_paths(&self, module: &str) -> Vec<String>;
341
342 // === Import Resolution ===
343
344 /// Language key for package index cache (e.g., "python", "go", "js").
345 fn lang_key(&self) -> &'static str;
346
347 /// Resolve a local import within the project.
348 ///
349 /// Handles project-relative imports (e.g., `from . import foo`, `crate::`,
350 /// `./module`, relative includes).
351 fn resolve_local_import(
352 &self,
353 import_name: &str,
354 current_file: &Path,
355 project_root: &Path,
356 ) -> Option<PathBuf>;
357
358 /// Resolve an external import to its source location.
359 ///
360 /// Returns the path to stdlib or installed packages.
361 fn resolve_external_import(
362 &self,
363 import_name: &str,
364 project_root: &Path,
365 ) -> Option<ResolvedPackage>;
366
367 /// Check if an import is from the standard library.
368 fn is_stdlib_import(&self, import_name: &str, project_root: &Path) -> bool;
369
370 /// Get the language/runtime version (for package index versioning).
371 fn get_version(&self, project_root: &Path) -> Option<String>;
372
373 /// Find package cache/installation directory.
374 fn find_package_cache(&self, project_root: &Path) -> Option<PathBuf>;
375
376 /// File extensions to index when caching a package.
377 fn indexable_extensions(&self) -> &'static [&'static str];
378
379 // === Package Indexing ===
380
381 /// Find standard library directory (if applicable).
382 /// Returns None for languages without a separate stdlib to index.
383 fn find_stdlib(&self, project_root: &Path) -> Option<PathBuf>;
384
385 /// Should this entry be skipped when indexing packages?
386 /// Called for each file/directory in package directories.
387 /// Use helper functions `skip_dotfiles()` and `has_extension(name, self.indexable_extensions())` for common checks.
388 fn should_skip_package_entry(&self, name: &str, is_dir: bool) -> bool;
389
390 /// Get the module/package name from a directory entry name.
391 fn package_module_name(&self, entry_name: &str) -> String;
392
393 /// Return package sources to index for this language.
394 /// Each source describes a directory containing packages.
395 fn package_sources(&self, project_root: &Path) -> Vec<PackageSource>;
396
397 /// Discover packages in a source directory.
398 /// Returns (package_name, path) pairs for all packages found.
399 /// Use provided helpers: discover_flat_packages, discover_recursive_packages, discover_npm_scoped_packages.
400 fn discover_packages(&self, source: &PackageSource) -> Vec<(String, PathBuf)>;
401
402 /// Discover packages in a flat directory (each entry is a package).
403 fn discover_flat_packages(&self, source_path: &Path) -> Vec<(String, PathBuf)> {
404 let entries = match std::fs::read_dir(source_path) {
405 Ok(e) => e,
406 Err(_) => return Vec::new(),
407 };
408
409 let mut packages = Vec::new();
410 for entry in entries.flatten() {
411 let path = entry.path();
412 let name = entry.file_name().to_string_lossy().to_string();
413
414 if self.should_skip_package_entry(&name, path.is_dir()) {
415 continue;
416 }
417
418 let module_name = self.package_module_name(&name);
419 packages.push((module_name, path));
420 }
421 packages
422 }
423
424 /// Discover packages recursively (each file with matching extension is a package).
425 fn discover_recursive_packages(
426 &self,
427 base_path: &Path,
428 current_path: &Path,
429 ) -> Vec<(String, PathBuf)> {
430 let entries = match std::fs::read_dir(current_path) {
431 Ok(e) => e,
432 Err(_) => return Vec::new(),
433 };
434
435 let mut packages = Vec::new();
436 for entry in entries.flatten() {
437 let path = entry.path();
438 let name = entry.file_name().to_string_lossy().to_string();
439 let is_dir = path.is_dir();
440
441 if self.should_skip_package_entry(&name, is_dir) {
442 continue;
443 }
444
445 if is_dir {
446 packages.extend(self.discover_recursive_packages(base_path, &path));
447 } else {
448 // Get relative path from base as module name
449 let rel_path = path
450 .strip_prefix(base_path)
451 .map(|p| p.to_string_lossy().to_string())
452 .unwrap_or_else(|_| name);
453 packages.push((rel_path, path));
454 }
455 }
456 packages
457 }
458
459 /// Find the entry point file for a package path.
460 /// If path is a file, returns it directly.
461 /// If path is a directory, looks for language-specific entry points.
462 fn find_package_entry(&self, path: &Path) -> Option<PathBuf>;
463
464 /// Discover packages in npm-scoped directory (handles @scope/package).
465 fn discover_npm_scoped_packages(&self, source_path: &Path) -> Vec<(String, PathBuf)> {
466 let entries = match std::fs::read_dir(source_path) {
467 Ok(e) => e,
468 Err(_) => return Vec::new(),
469 };
470
471 let mut packages = Vec::new();
472 for entry in entries.flatten() {
473 let path = entry.path();
474 let name = entry.file_name().to_string_lossy().to_string();
475
476 if self.should_skip_package_entry(&name, path.is_dir()) {
477 continue;
478 }
479
480 if name.starts_with('@') && path.is_dir() {
481 // Scoped package - iterate contents
482 if let Ok(scoped_entries) = std::fs::read_dir(&path) {
483 for scoped_entry in scoped_entries.flatten() {
484 let scoped_path = scoped_entry.path();
485 let scoped_name = scoped_entry.file_name().to_string_lossy().to_string();
486 if self.should_skip_package_entry(&scoped_name, scoped_path.is_dir()) {
487 continue;
488 }
489 let full_name = format!("{}/{}", name, scoped_name);
490 packages.push((full_name, scoped_path));
491 }
492 }
493 } else {
494 let module_name = self.package_module_name(&name);
495 packages.push((module_name, path));
496 }
497 }
498 packages
499 }
500}
501
502/// A source of packages to index.
503#[derive(Debug, Clone)]
504pub struct PackageSource {
505 /// Display name (e.g., "stdlib", "site-packages", "node_modules")
506 pub name: &'static str,
507 /// Path to the source directory
508 pub path: PathBuf,
509 /// How to traverse this source
510 pub kind: PackageSourceKind,
511 /// Whether packages here are version-specific (affects max_version in index)
512 pub version_specific: bool,
513}
514
515/// How to traverse a package source directory.
516#[derive(Debug, Clone, Copy, PartialEq, Eq)]
517pub enum PackageSourceKind {
518 /// Flat directory of packages (Python site-packages, node_modules)
519 /// Each top-level entry is a package.
520 Flat,
521 /// Recursive directory (Go stdlib, C++ includes)
522 /// Packages are identified by having indexable files.
523 Recursive,
524 /// NPM-style scoped packages (@scope/package)
525 NpmScoped,
526 /// Maven repository structure (group/artifact/version)
527 Maven,
528 /// Gradle cache structure (group/artifact/version/hash)
529 Gradle,
530 /// Cargo registry structure (index/crate-version)
531 Cargo,
532 /// Deno cache structure (needs special handling for npm vs URL deps)
533 Deno,
534}