Skip to main content

srcwalk/index/
symbol.rs

1//! Materialized symbol index — pre-computes symbol-to-file mappings for O(1) resolution.
2//!
3//! Instead of walking the entire tree on every symbol query, `SymbolIndex::build()`
4//! parses all code files in scope using tree-sitter and stores (`symbol_name` -> locations)
5//! in a concurrent `DashMap`. Subsequent lookups are O(1) hash lookups plus a filter.
6
7use std::fs;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use std::time::SystemTime;
11
12use dashmap::DashMap;
13
14use crate::lang::detect_file_type;
15use crate::lang::outline::outline_language;
16use crate::lang::treesitter::{extract_definition_name, DEFINITION_KINDS};
17use crate::types::FileType;
18
19/// Maximum file size to index (500 KB). Matches the limit in symbol search.
20const MAX_FILE_SIZE: u64 = 500_000;
21
22/// Per-file extraction result: (path, mtime, extracted symbols).
23type FileSymbols = (PathBuf, SystemTime, Vec<(Arc<str>, u32, bool)>);
24
25/// A location where a symbol appears in the codebase.
26#[derive(Clone, Debug)]
27pub struct SymbolLocation {
28    pub path: PathBuf,
29    pub line: u32,
30    pub is_definition: bool,
31    pub mtime: SystemTime,
32}
33
34/// Pre-computed symbol-to-file index for O(1) lookups.
35///
36/// Uses `DashMap` for lock-free concurrent reads and writes.
37/// Keys are `Arc<str>` for memory-efficient string interning — many lookups
38/// against the same symbol names benefit from shared allocations.
39pub struct SymbolIndex {
40    /// `symbol_name` -> list of locations
41    symbols: DashMap<Arc<str>, Vec<SymbolLocation>>,
42    /// file -> mtime when last indexed
43    indexed_files: DashMap<PathBuf, SystemTime>,
44}
45
46impl Default for SymbolIndex {
47    fn default() -> Self {
48        Self::new()
49    }
50}
51
52impl SymbolIndex {
53    /// Create an empty symbol index.
54    #[must_use]
55    pub fn new() -> Self {
56        Self {
57            symbols: DashMap::new(),
58            indexed_files: DashMap::new(),
59        }
60    }
61
62    /// Build the index by walking all code files in `scope`.
63    ///
64    /// Uses `ignore::WalkBuilder` with the same directory filtering as search
65    /// (skipping `.git`, `node_modules`, `target`, etc.) and processes files
66    /// in parallel via rayon for speed.
67    pub fn build(&self, scope: &Path) {
68        use ignore::WalkBuilder;
69        use rayon::prelude::*;
70
71        // Collect file paths first, then process in parallel with rayon.
72        // We use WalkBuilder for directory filtering but rayon for parallelism
73        // because rayon gives us better work-stealing than ignore's parallel walker
74        // for CPU-bound tree-sitter parsing.
75        let files: Vec<PathBuf> = WalkBuilder::new(scope)
76            .follow_links(true)
77            .hidden(false)
78            .git_ignore(false)
79            .git_global(false)
80            .git_exclude(false)
81            .ignore(false)
82            .parents(false)
83            .filter_entry(|entry| {
84                if entry.file_type().is_some_and(|ft| ft.is_dir()) {
85                    if let Some(name) = entry.file_name().to_str() {
86                        return !crate::search::io::SKIP_DIRS.contains(&name);
87                    }
88                }
89                true
90            })
91            .build()
92            .filter_map(|entry| {
93                let entry = entry.ok()?;
94                if !entry.file_type()?.is_file() {
95                    return None;
96                }
97                let path = entry.into_path();
98                // Only index code files that have tree-sitter grammars
99                if let FileType::Code(lang) = detect_file_type(&path) {
100                    if outline_language(lang).is_some() {
101                        // Skip oversized files
102                        if let Ok(meta) = fs::metadata(&path) {
103                            if meta.len() <= MAX_FILE_SIZE {
104                                return Some(path);
105                            }
106                        }
107                    }
108                }
109                None
110            })
111            .collect();
112
113        // Process files in parallel with rayon
114        let results: Vec<FileSymbols> = files
115            .par_iter()
116            .filter_map(|path| {
117                let content = fs::read_to_string(path).ok()?;
118                let mtime = fs::metadata(path)
119                    .and_then(|m| m.modified())
120                    .unwrap_or(SystemTime::UNIX_EPOCH);
121                let symbols = extract_symbols(path, &content);
122                if symbols.is_empty() {
123                    // Still record the file as indexed even if no symbols found
124                    Some((path.clone(), mtime, Vec::new()))
125                } else {
126                    Some((path.clone(), mtime, symbols))
127                }
128            })
129            .collect();
130
131        // Insert results into the DashMaps
132        for (path, mtime, symbols) in results {
133            self.indexed_files.insert(path.clone(), mtime);
134            for (name, line, is_def) in symbols {
135                let loc = SymbolLocation {
136                    path: path.clone(),
137                    line,
138                    is_definition: is_def,
139                    mtime,
140                };
141                self.symbols.entry(name).or_default().push(loc);
142            }
143        }
144    }
145
146    /// Check if the index has been built for the given scope.
147    ///
148    /// Simple heuristic: returns true if any indexed file path starts with `scope`.
149    #[must_use]
150    pub fn is_built(&self, scope: &Path) -> bool {
151        self.indexed_files
152            .iter()
153            .any(|entry| entry.key().starts_with(scope))
154    }
155
156    /// Look up all locations of a symbol within `scope`.
157    ///
158    /// Returns matching locations filtered to paths within `scope`.
159    /// Results may include stale entries if files have changed since indexing --
160    /// callers can check `mtime` against the current file if freshness matters.
161    #[must_use]
162    pub fn lookup(&self, name: &str, scope: &Path) -> Vec<SymbolLocation> {
163        let key: Arc<str> = Arc::from(name);
164        let Some(locations) = self.symbols.get(&key) else {
165            return Vec::new();
166        };
167        locations
168            .iter()
169            .filter(|loc| loc.path.starts_with(scope))
170            .cloned()
171            .collect()
172    }
173
174    /// Look up only definition locations of a symbol within `scope`.
175    ///
176    /// Same as `lookup` but filters to `is_definition == true`.
177    #[must_use]
178    pub fn lookup_definitions(&self, name: &str, scope: &Path) -> Vec<SymbolLocation> {
179        let key: Arc<str> = Arc::from(name);
180        let Some(locations) = self.symbols.get(&key) else {
181            return Vec::new();
182        };
183        locations
184            .iter()
185            .filter(|loc| loc.is_definition && loc.path.starts_with(scope))
186            .cloned()
187            .collect()
188    }
189
190    /// Index a single file, updating the symbol maps.
191    ///
192    /// Used for incremental updates when a file changes.
193    /// Removes old entries for this file before inserting new ones.
194    pub fn index_file(&self, path: &Path, content: &str) {
195        let mtime = fs::metadata(path)
196            .and_then(|m| m.modified())
197            .unwrap_or(SystemTime::UNIX_EPOCH);
198
199        // Remove old entries for this file from all symbol lists
200        let old_mtime = self.indexed_files.get(path).map(|r| *r.value());
201        if old_mtime.is_some() {
202            self.symbols.iter_mut().for_each(|mut entry| {
203                entry.value_mut().retain(|loc| loc.path != path);
204            });
205        }
206
207        // Extract and insert new symbols
208        let symbols = extract_symbols(path, content);
209        self.indexed_files.insert(path.to_path_buf(), mtime);
210
211        for (name, line, is_def) in symbols {
212            let loc = SymbolLocation {
213                path: path.to_path_buf(),
214                line,
215                is_definition: is_def,
216                mtime,
217            };
218            self.symbols.entry(name).or_default().push(loc);
219        }
220    }
221
222    /// Number of unique symbol names in the index.
223    #[must_use]
224    pub fn symbol_count(&self) -> usize {
225        self.symbols.len()
226    }
227
228    /// Number of indexed files.
229    #[must_use]
230    pub fn file_count(&self) -> usize {
231        self.indexed_files.len()
232    }
233}
234
235/// Extract all symbol definitions from a file using tree-sitter.
236///
237/// Returns a list of `(name, line_number, is_definition)` tuples.
238/// Line numbers are 1-based (matching the convention used in search results).
239///
240/// Only extracts definitions (function, struct, trait, class, etc.) --
241/// not usages. This keeps the index focused and compact.
242fn extract_symbols(path: &Path, content: &str) -> Vec<(Arc<str>, u32, bool)> {
243    let FileType::Code(lang) = detect_file_type(path) else {
244        return Vec::new();
245    };
246
247    let Some(ts_lang) = outline_language(lang) else {
248        return Vec::new();
249    };
250
251    let mut parser = tree_sitter::Parser::new();
252    if parser.set_language(&ts_lang).is_err() {
253        return Vec::new();
254    }
255
256    let Some(tree) = parser.parse(content, None) else {
257        return Vec::new();
258    };
259
260    let lines: Vec<&str> = content.lines().collect();
261    let mut symbols = Vec::new();
262
263    walk_definitions(tree.root_node(), &lines, &mut symbols, lang, 0);
264
265    symbols
266}
267
268/// Recursively walk tree-sitter AST nodes to find all definitions.
269///
270/// Unlike `search::symbol::walk_for_definitions` which searches for a specific name,
271/// this extracts ALL definition names for index building.
272/// Depth-limited to 3 levels (matching search behavior) to avoid descending
273/// into deeply nested anonymous blocks.
274fn walk_definitions(
275    node: tree_sitter::Node,
276    lines: &[&str],
277    symbols: &mut Vec<(Arc<str>, u32, bool)>,
278    lang: crate::types::Lang,
279    depth: usize,
280) {
281    if depth > 3 {
282        return;
283    }
284
285    let kind = node.kind();
286
287    if DEFINITION_KINDS.contains(&kind) {
288        if let Some(name) = extract_definition_name(node, lines) {
289            let line = node.start_position().row as u32 + 1;
290            symbols.push((Arc::from(name.as_str()), line, true));
291        }
292
293        // For impl blocks in Rust, also index the trait name and type name
294        // so lookups for "MyTrait" find `impl MyTrait for Foo`.
295        if kind == "impl_item" {
296            if let Some(trait_name) = crate::lang::treesitter::extract_impl_trait(node, lines) {
297                let line = node.start_position().row as u32 + 1;
298                symbols.push((Arc::from(trait_name.as_str()), line, true));
299            }
300            if let Some(type_name) = crate::lang::treesitter::extract_impl_type(node, lines) {
301                let line = node.start_position().row as u32 + 1;
302                symbols.push((Arc::from(type_name.as_str()), line, true));
303            }
304        }
305
306        // For classes implementing interfaces, index the interface names too
307        if kind == "class_declaration" || kind == "class_definition" {
308            let interfaces = crate::lang::treesitter::extract_implemented_interfaces(node, lines);
309            for iface in interfaces {
310                let line = node.start_position().row as u32 + 1;
311                symbols.push((Arc::from(iface.as_str()), line, true));
312            }
313        }
314    } else if lang == crate::types::Lang::Elixir
315        && crate::lang::treesitter::is_elixir_definition(node, lines)
316    {
317        // Elixir: all definitions are `call` nodes — handle separately
318        if let Some(name) = crate::lang::treesitter::extract_elixir_definition_name(node, lines) {
319            let line = node.start_position().row as u32 + 1;
320            symbols.push((Arc::from(name.as_str()), line, true));
321        }
322    }
323
324    // Recurse into children for nested definitions (impl blocks, class bodies, modules)
325    let mut cursor = node.walk();
326    for child in node.children(&mut cursor) {
327        walk_definitions(child, lines, symbols, lang, depth + 1);
328    }
329}
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334    use std::io::Write;
335
336    #[test]
337    fn test_empty_index() {
338        let index = SymbolIndex::new();
339        assert_eq!(index.symbol_count(), 0);
340        assert_eq!(index.file_count(), 0);
341        assert!(!index.is_built(Path::new("/tmp")));
342        assert!(index.lookup("foo", Path::new("/tmp")).is_empty());
343    }
344
345    #[test]
346    fn test_extract_symbols_rust() {
347        let content = r#"
348pub struct Foo {
349    bar: u32,
350}
351
352impl Foo {
353    pub fn baz(&self) -> u32 {
354        self.bar
355    }
356}
357
358trait MyTrait {
359    fn do_thing(&self);
360}
361
362impl MyTrait for Foo {
363    fn do_thing(&self) {}
364}
365"#;
366        let dir = std::env::temp_dir().join("srcwalk_test_extract_symbols");
367        let _ = fs::create_dir_all(&dir);
368        let path = dir.join("test.rs");
369        let mut f = fs::File::create(&path).unwrap();
370        f.write_all(content.as_bytes()).unwrap();
371
372        let symbols = extract_symbols(&path, content);
373        let names: Vec<&str> = symbols.iter().map(|(n, _, _)| n.as_ref()).collect();
374
375        assert!(names.contains(&"Foo"), "should find struct Foo: {names:?}");
376        assert!(names.contains(&"baz"), "should find fn baz: {names:?}");
377        assert!(
378            names.contains(&"MyTrait"),
379            "should find trait MyTrait: {names:?}"
380        );
381        assert!(
382            names.contains(&"do_thing"),
383            "should find fn do_thing: {names:?}"
384        );
385
386        // All extracted symbols should be definitions
387        assert!(symbols.iter().all(|(_, _, is_def)| *is_def));
388
389        let _ = fs::remove_file(&path);
390    }
391
392    #[test]
393    fn test_index_file() {
394        let content = "pub fn hello() {}\npub fn world() {}";
395        let dir = std::env::temp_dir().join("srcwalk_test_index_file");
396        let _ = fs::create_dir_all(&dir);
397        let path = dir.join("test.rs");
398        fs::write(&path, content).unwrap();
399
400        let index = SymbolIndex::new();
401        index.index_file(&path, content);
402
403        assert_eq!(index.file_count(), 1);
404        let results = index.lookup("hello", &dir);
405        assert_eq!(results.len(), 1);
406        assert!(results[0].is_definition);
407        assert_eq!(results[0].line, 1);
408
409        let results = index.lookup("world", &dir);
410        assert_eq!(results.len(), 1);
411        assert_eq!(results[0].line, 2);
412
413        // Test incremental update
414        let new_content = "pub fn hello() {}\npub fn updated() {}";
415        fs::write(&path, new_content).unwrap();
416        index.index_file(&path, new_content);
417
418        // "world" should be gone, "updated" should be present
419        assert!(index.lookup("world", &dir).is_empty());
420        assert_eq!(index.lookup("updated", &dir).len(), 1);
421
422        let _ = fs::remove_file(&path);
423    }
424
425    #[test]
426    fn test_lookup_definitions_filter() {
427        let content = "pub fn target() {}";
428        let dir = std::env::temp_dir().join("srcwalk_test_lookup_defs");
429        let _ = fs::create_dir_all(&dir);
430        let path = dir.join("test.rs");
431        fs::write(&path, content).unwrap();
432
433        let index = SymbolIndex::new();
434        index.index_file(&path, content);
435
436        let defs = index.lookup_definitions("target", &dir);
437        assert_eq!(defs.len(), 1);
438        assert!(defs[0].is_definition);
439
440        // lookup_definitions with wrong scope should return empty
441        let defs = index.lookup_definitions("target", Path::new("/nonexistent"));
442        assert!(defs.is_empty());
443
444        let _ = fs::remove_file(&path);
445    }
446
447    #[test]
448    fn test_extract_symbols_typescript() {
449        let content = r#"
450function greet(name: string): string {
451    return `Hello, ${name}!`;
452}
453
454class Greeter {
455    greeting: string;
456    constructor(message: string) {
457        this.greeting = message;
458    }
459}
460
461interface Printable {
462    print(): void;
463}
464"#;
465        let dir = std::env::temp_dir().join("srcwalk_test_extract_ts");
466        let _ = fs::create_dir_all(&dir);
467        let path = dir.join("test.ts");
468        fs::write(&path, content).unwrap();
469
470        let symbols = extract_symbols(&path, content);
471        let names: Vec<&str> = symbols.iter().map(|(n, _, _)| n.as_ref()).collect();
472
473        assert!(
474            names.contains(&"greet"),
475            "should find function greet: {names:?}"
476        );
477        assert!(
478            names.contains(&"Greeter"),
479            "should find class Greeter: {names:?}"
480        );
481        assert!(
482            names.contains(&"Printable"),
483            "should find interface Printable: {names:?}"
484        );
485
486        let _ = fs::remove_file(&path);
487    }
488
489    #[test]
490    fn test_extract_symbols_python() {
491        let content = r#"
492def hello():
493    pass
494
495class MyClass:
496    def method(self):
497        pass
498"#;
499        let dir = std::env::temp_dir().join("srcwalk_test_extract_py");
500        let _ = fs::create_dir_all(&dir);
501        let path = dir.join("test.py");
502        fs::write(&path, content).unwrap();
503
504        let symbols = extract_symbols(&path, content);
505        let names: Vec<&str> = symbols.iter().map(|(n, _, _)| n.as_ref()).collect();
506
507        assert!(names.contains(&"hello"), "should find def hello: {names:?}");
508        assert!(
509            names.contains(&"MyClass"),
510            "should find class MyClass: {names:?}"
511        );
512
513        let _ = fs::remove_file(&path);
514    }
515}