Skip to main content

perl_semantic_analyzer/analysis/
index.rs

1//! Cross-file workspace indexing for Perl symbols
2//!
3//! This module provides efficient indexing of symbols across all files in a workspace,
4//! enabling fast cross-file navigation, references, and refactoring.
5
6use crate::analysis::import_extractor::ImportExtractor;
7use crate::symbol::{SymbolKind, SymbolTable};
8use crate::{Node, NodeKind, Parser};
9use perl_semantic_facts::FileId;
10use std::collections::{HashMap, HashSet};
11use std::sync::{Arc, RwLock};
12
13/// Symbol kinds for cross-file indexing during Index/Navigate workflows.
14#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
15pub enum SymKind {
16    /// Variable symbol ($, @, or % sigil)
17    Var,
18    /// Subroutine definition (sub foo)
19    Sub,
20    /// Package declaration (package Foo)
21    Pack,
22}
23
24/// A normalized symbol key for cross-file lookups in Index/Navigate workflows.
25#[derive(Clone, Debug, Eq, PartialEq, Hash)]
26pub struct SymbolKey {
27    /// Package name containing this symbol
28    pub pkg: Arc<str>,
29    /// Bare name without sigil prefix
30    pub name: Arc<str>,
31    /// Variable sigil ($, @, or %) if applicable
32    pub sigil: Option<char>,
33    /// Kind of symbol (variable, subroutine, package)
34    pub kind: SymKind,
35}
36
37/// A symbol definition in the workspace
38#[derive(Clone, Debug)]
39pub struct SymbolDef {
40    /// The name of the symbol
41    pub name: String,
42    /// The kind of symbol (function, variable, package, etc.)
43    pub kind: SymbolKind,
44    /// The URI of the file containing this symbol
45    pub uri: String,
46    /// Start byte offset in the file
47    pub start: usize,
48    /// End byte offset in the file
49    pub end: usize,
50}
51
52/// Workspace-wide index for fast symbol lookups
53#[derive(Default)]
54pub struct WorkspaceIndex {
55    /// Index from symbol name to all its definitions
56    by_name: HashMap<String, Vec<SymbolDef>>,
57    /// Index from URI to all symbol names in that file (for fast removal)
58    by_uri: HashMap<String, HashSet<String>>,
59    /// Import/module dependencies by URI.
60    imports_by_uri: RwLock<HashMap<String, HashSet<String>>>,
61}
62
63impl WorkspaceIndex {
64    /// Create a new empty workspace index
65    pub fn new() -> Self {
66        Self::default()
67    }
68
69    /// Update the index with symbols from a document
70    pub fn update_from_document(&mut self, uri: &str, content: &str, symtab: &SymbolTable) {
71        // Remove old symbols from this file
72        self.remove_document(uri);
73
74        // Track all symbol names in this file
75        let mut names_in_file = HashSet::new();
76
77        // Add all symbols from the symbol table
78        for symbols in symtab.symbols.values() {
79            for symbol in symbols {
80                let name = symbol.name.clone();
81                names_in_file.insert(name.clone());
82
83                let def = SymbolDef {
84                    name: symbol.name.clone(),
85                    kind: symbol.kind,
86                    uri: uri.to_string(),
87                    start: symbol.location.start,
88                    end: symbol.location.end,
89                };
90
91                self.by_name.entry(name).or_default().push(def);
92            }
93        }
94
95        // Track which names are in this file
96        self.by_uri.insert(uri.to_string(), names_in_file);
97
98        if !content.is_empty()
99            && let Ok(dependencies) = Self::extract_dependencies(content)
100        {
101            self.set_file_dependencies(uri, dependencies);
102        }
103    }
104
105    /// Remove all symbols from a document
106    pub fn remove_document(&mut self, uri: &str) {
107        if let Some(names) = self.by_uri.remove(uri) {
108            for name in names {
109                if let Some(defs) = self.by_name.get_mut(&name) {
110                    defs.retain(|d| d.uri != uri);
111                    if defs.is_empty() {
112                        self.by_name.remove(&name);
113                    }
114                }
115            }
116        }
117        self.remove_file_dependencies(uri);
118    }
119
120    /// Index import dependencies from raw file contents.
121    pub fn index_file_str(&self, uri: &str, content: &str) -> Result<(), String> {
122        let dependencies = Self::extract_dependencies(content)?;
123        let mut imports = self
124            .imports_by_uri
125            .write()
126            .map_err(|_| "workspace import index lock poisoned".to_string())?;
127        imports.insert(uri.to_string(), dependencies);
128        Ok(())
129    }
130
131    /// Return modules imported or required by a file.
132    pub fn file_dependencies(&self, uri: &str) -> HashSet<String> {
133        let Ok(imports) = self.imports_by_uri.read() else {
134            return HashSet::new();
135        };
136        imports.get(uri).cloned().unwrap_or_default()
137    }
138
139    fn set_file_dependencies(&self, uri: &str, dependencies: HashSet<String>) {
140        if let Ok(mut imports) = self.imports_by_uri.write() {
141            imports.insert(uri.to_string(), dependencies);
142        }
143    }
144
145    fn remove_file_dependencies(&self, uri: &str) {
146        if let Ok(mut imports) = self.imports_by_uri.write() {
147            imports.remove(uri);
148        }
149    }
150
151    fn extract_dependencies(content: &str) -> Result<HashSet<String>, String> {
152        let mut parser = Parser::new(content);
153        let ast = parser.parse().map_err(|err| format!("Parse error: {err}"))?;
154        let mut dependencies: HashSet<String> = ImportExtractor::extract(&ast, FileId(0))
155            .into_iter()
156            .filter_map(|spec| {
157                if spec.module.is_empty() || matches!(spec.module.as_str(), "parent" | "base") {
158                    None
159                } else {
160                    Some(spec.module)
161                }
162            })
163            .collect();
164
165        Self::collect_parent_dependencies(&ast, &mut dependencies);
166        Ok(dependencies)
167    }
168
169    fn collect_parent_dependencies(node: &Node, dependencies: &mut HashSet<String>) {
170        if let NodeKind::Use { module, args, .. } = &node.kind
171            && matches!(module.as_str(), "parent" | "base")
172        {
173            for name in Self::parent_names_from_args(args) {
174                dependencies.insert(name);
175            }
176        }
177
178        for child in node.children() {
179            Self::collect_parent_dependencies(child, dependencies);
180        }
181    }
182
183    fn parent_names_from_args(args: &[String]) -> Vec<String> {
184        args.iter()
185            .flat_map(|arg| Self::expand_parent_arg(arg))
186            .filter(|name| !name.starts_with('-'))
187            .collect()
188    }
189
190    fn expand_parent_arg(arg: &str) -> Vec<String> {
191        let trimmed = arg.trim();
192        if trimmed.is_empty() {
193            return Vec::new();
194        }
195
196        if let Some(content) = Self::parse_qw_content(trimmed) {
197            return content.split_whitespace().map(str::to_string).collect();
198        }
199
200        let unquoted = trimmed.trim_matches('\'').trim_matches('"').trim();
201        if unquoted.is_empty() { Vec::new() } else { vec![unquoted.to_string()] }
202    }
203
204    fn parse_qw_content(arg: &str) -> Option<&str> {
205        let rest = arg.strip_prefix("qw")?;
206        let mut chars = rest.chars();
207        let open = chars.next()?;
208        let close = match open {
209            '(' => ')',
210            '{' => '}',
211            '[' => ']',
212            '<' => '>',
213            delimiter => delimiter,
214        };
215        let start = open.len_utf8();
216        let end = rest.rfind(close)?;
217        (end >= start).then_some(&rest[start..end])
218    }
219
220    /// Find all definitions of a symbol by name
221    pub fn find_defs(&self, name: &str) -> &[SymbolDef] {
222        static EMPTY: Vec<SymbolDef> = Vec::new();
223        self.by_name.get(name).map(|v| v.as_slice()).unwrap_or(&EMPTY[..])
224    }
225
226    /// Find all references to a symbol (simplified version)
227    /// In a full implementation, this would analyze usage sites
228    pub fn find_refs(&self, name: &str) -> Vec<SymbolDef> {
229        // For now, return all definitions as references
230        // A full implementation would scan all files for usage sites
231        self.find_defs(name).to_vec()
232    }
233
234    /// Get all symbols in the workspace matching a query
235    pub fn search_symbols(&self, query: &str) -> Vec<SymbolDef> {
236        let query_lower = query.to_lowercase();
237        let mut results = Vec::new();
238
239        for (name, defs) in &self.by_name {
240            if name.to_lowercase().contains(&query_lower) {
241                results.extend(defs.clone());
242            }
243        }
244
245        results
246    }
247
248    /// Get the total number of indexed symbols
249    pub fn symbol_count(&self) -> usize {
250        self.by_name.values().map(|v| v.len()).sum()
251    }
252
253    /// Get the number of indexed files
254    pub fn file_count(&self) -> usize {
255        self.by_uri.len()
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use crate::SourceLocation;
263    use crate::symbol::Symbol;
264
265    #[test]
266    fn test_workspace_index() {
267        let mut index = WorkspaceIndex::new();
268
269        // Create a mock symbol table
270        let mut symtab = SymbolTable::new();
271
272        // Add a symbol to the symbol table
273        let symbol = Symbol {
274            name: "test_func".to_string(),
275            qualified_name: "main::test_func".to_string(),
276            kind: SymbolKind::Subroutine,
277            location: SourceLocation { start: 0, end: 10 },
278            scope_id: 0,
279            declaration: Some("sub".to_string()),
280            documentation: None,
281            attributes: Vec::new(),
282        };
283
284        symtab.symbols.entry("test_func".to_string()).or_default().push(symbol);
285
286        // Add document to index
287        index.update_from_document("file:///test.pl", "", &symtab);
288
289        // Find definitions
290        let defs = index.find_defs("test_func");
291        assert_eq!(defs.len(), 1);
292        assert_eq!(defs[0].name, "test_func");
293        assert_eq!(defs[0].uri, "file:///test.pl");
294
295        // Remove document
296        index.remove_document("file:///test.pl");
297        assert_eq!(index.find_defs("test_func").len(), 0);
298    }
299}