project_rag/relations/repomap/
reference_finder.rs

1//! Reference finding via identifier matching.
2//!
3//! This module finds references to symbols by searching for identifier occurrences
4//! that match known symbol names from the definition index.
5
6use std::collections::HashMap;
7
8use anyhow::Result;
9use chrono::Utc;
10use regex::Regex;
11
12use crate::indexer::FileInfo;
13use crate::relations::types::{Definition, Reference, ReferenceKind};
14
15/// Finds references to symbols using text-based identifier matching.
16pub struct ReferenceFinder {
17    /// Regex for identifying valid identifier characters
18    identifier_regex: Regex,
19}
20
21impl ReferenceFinder {
22    /// Create a new reference finder
23    pub fn new() -> Self {
24        // Match word boundaries around identifiers
25        Self {
26            identifier_regex: Regex::new(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b").unwrap(),
27        }
28    }
29
30    /// Find all references to known symbols in a file
31    pub fn find_references(
32        &self,
33        file_info: &FileInfo,
34        symbol_index: &HashMap<String, Vec<Definition>>,
35    ) -> Result<Vec<Reference>> {
36        let mut references = Vec::new();
37
38        // Skip if no symbols to look for
39        if symbol_index.is_empty() {
40            return Ok(references);
41        }
42
43        // Process each line
44        for (line_num, line) in file_info.content.lines().enumerate() {
45            let line_number = line_num + 1; // 1-based
46
47            // Find all identifier occurrences in this line
48            for mat in self.identifier_regex.find_iter(line) {
49                let name = mat.as_str();
50
51                // Check if this identifier matches a known symbol
52                if let Some(definitions) = symbol_index.get(name) {
53                    // Skip if this is likely a definition site in the same file
54                    if self.is_definition_site(definitions, &file_info.relative_path, line_number) {
55                        continue;
56                    }
57
58                    // Determine reference kind based on context
59                    let reference_kind = self.determine_reference_kind(line, mat.start(), name);
60
61                    // Get the best matching definition
62                    // For now, just use the first one (could be improved with scope analysis)
63                    if let Some(def) = definitions.first() {
64                        references.push(Reference {
65                            file_path: file_info.relative_path.clone(),
66                            root_path: Some(file_info.root_path.clone()),
67                            project: file_info.project.clone(),
68                            start_line: line_number,
69                            end_line: line_number,
70                            start_col: mat.start(),
71                            end_col: mat.end(),
72                            target_symbol_id: def.to_storage_id(),
73                            reference_kind,
74                            indexed_at: Utc::now().timestamp(),
75                        });
76                    }
77                }
78            }
79        }
80
81        Ok(references)
82    }
83
84    /// Check if a line is likely a definition site
85    fn is_definition_site(
86        &self,
87        definitions: &[Definition],
88        file_path: &str,
89        line_number: usize,
90    ) -> bool {
91        definitions.iter().any(|def| {
92            def.file_path() == file_path
93                && line_number >= def.start_line()
94                && line_number <= def.end_line
95        })
96    }
97
98    /// Determine the kind of reference based on context
99    fn determine_reference_kind(
100        &self,
101        line: &str,
102        position: usize,
103        name: &str,
104    ) -> ReferenceKind {
105        // Get text before the identifier
106        let before = &line[..position];
107
108        // Get text after the identifier (skip past the name itself)
109        let after_end = position + name.len();
110        let after_name = if after_end <= line.len() {
111            &line[after_end..]
112        } else {
113            ""
114        };
115
116        let lower_line = line.to_lowercase();
117
118        // Check for import patterns (highest priority)
119        if lower_line.contains("import ")
120            || lower_line.contains("from ")
121            || lower_line.contains("require(")
122            || lower_line.contains("use ")
123        {
124            return ReferenceKind::Import;
125        }
126
127        // Check for instantiation (before function call, since `new Foo()` looks like a call)
128        if before.contains("new ") {
129            return ReferenceKind::Instantiation;
130        }
131
132        // Check for inheritance patterns
133        if before.contains("extends") || before.contains("implements") {
134            return ReferenceKind::Inheritance;
135        }
136
137        // Check for function/method call pattern (identifier followed by parenthesis)
138        if after_name.trim_start().starts_with('(') {
139            return ReferenceKind::Call;
140        }
141
142        // Check for assignment (write)
143        if after_name.trim_start().starts_with('=')
144            && !after_name.trim_start().starts_with("==")
145            && !after_name.trim_start().starts_with("=>")
146        {
147            return ReferenceKind::Write;
148        }
149
150        // Check for type reference patterns
151        if before.contains(':') || before.contains("->") || before.contains('<') {
152            return ReferenceKind::TypeReference;
153        }
154
155        // Default to read
156        ReferenceKind::Read
157    }
158}
159
160impl Default for ReferenceFinder {
161    fn default() -> Self {
162        Self::new()
163    }
164}
165
166#[cfg(test)]
167mod tests {
168    use super::*;
169    use crate::relations::types::{SymbolId, SymbolKind, Visibility};
170    use std::path::PathBuf;
171
172    fn make_file_info(content: &str, path: &str) -> FileInfo {
173        FileInfo {
174            path: PathBuf::from(path),
175            relative_path: path.to_string(),
176            root_path: "/test".to_string(),
177            project: None,
178            extension: Some("rs".to_string()),
179            language: Some("Rust".to_string()),
180            content: content.to_string(),
181            hash: "test_hash".to_string(),
182        }
183    }
184
185    fn make_definition(name: &str, file_path: &str, start_line: usize) -> Definition {
186        Definition {
187            symbol_id: SymbolId::new(file_path, name, SymbolKind::Function, start_line, 0),
188            root_path: Some("/test".to_string()),
189            project: None,
190            end_line: start_line + 5,
191            end_col: 0,
192            signature: format!("fn {}()", name),
193            doc_comment: None,
194            visibility: Visibility::Public,
195            parent_id: None,
196            indexed_at: 0,
197        }
198    }
199
200    #[test]
201    fn test_find_function_call() {
202        let source = r#"
203fn main() {
204    let result = greet("World");
205}
206"#;
207        let file_info = make_file_info(source, "src/main.rs");
208
209        let mut symbol_index = HashMap::new();
210        symbol_index.insert(
211            "greet".to_string(),
212            vec![make_definition("greet", "src/lib.rs", 1)],
213        );
214
215        let finder = ReferenceFinder::new();
216        let references = finder.find_references(&file_info, &symbol_index).unwrap();
217
218        assert_eq!(references.len(), 1);
219        assert_eq!(references[0].reference_kind, ReferenceKind::Call);
220    }
221
222    #[test]
223    fn test_skip_definition_site() {
224        let source = r#"
225fn greet(name: &str) {
226    println!("Hello, {}!", name);
227}
228"#;
229        let file_info = make_file_info(source, "src/lib.rs");
230
231        let mut symbol_index = HashMap::new();
232        symbol_index.insert(
233            "greet".to_string(),
234            vec![make_definition("greet", "src/lib.rs", 2)], // Definition is on line 2
235        );
236
237        let finder = ReferenceFinder::new();
238        let references = finder.find_references(&file_info, &symbol_index).unwrap();
239
240        // Should not include the definition site as a reference
241        assert!(references.is_empty());
242    }
243
244    #[test]
245    fn test_detect_write() {
246        let source = "counter = counter + 1";
247        let file_info = make_file_info(source, "src/main.rs");
248
249        let mut symbol_index = HashMap::new();
250        symbol_index.insert(
251            "counter".to_string(),
252            vec![make_definition("counter", "src/lib.rs", 1)],
253        );
254
255        let finder = ReferenceFinder::new();
256        let references = finder.find_references(&file_info, &symbol_index).unwrap();
257
258        // First occurrence is a write, second is a read
259        assert!(references.len() >= 1);
260        assert!(references.iter().any(|r| r.reference_kind == ReferenceKind::Write));
261    }
262
263    #[test]
264    fn test_detect_import() {
265        let source = "from mymodule import greet";
266        let file_info = make_file_info(source, "src/main.py");
267
268        let mut symbol_index = HashMap::new();
269        symbol_index.insert(
270            "greet".to_string(),
271            vec![make_definition("greet", "src/mymodule.py", 1)],
272        );
273
274        let finder = ReferenceFinder::new();
275        let references = finder.find_references(&file_info, &symbol_index).unwrap();
276
277        assert!(!references.is_empty());
278        assert!(references.iter().any(|r| r.reference_kind == ReferenceKind::Import));
279    }
280
281    #[test]
282    fn test_detect_instantiation() {
283        let source = "let person = new Person()";
284        let file_info = make_file_info(source, "src/main.js");
285
286        let mut symbol_index = HashMap::new();
287        symbol_index.insert(
288            "Person".to_string(),
289            vec![make_definition("Person", "src/person.js", 1)],
290        );
291
292        let finder = ReferenceFinder::new();
293        let references = finder.find_references(&file_info, &symbol_index).unwrap();
294
295        assert!(!references.is_empty());
296        assert!(references.iter().any(|r| r.reference_kind == ReferenceKind::Instantiation));
297    }
298
299    #[test]
300    fn test_empty_symbol_index() {
301        let source = "fn main() { greet(); }";
302        let file_info = make_file_info(source, "src/main.rs");
303
304        let symbol_index = HashMap::new();
305
306        let finder = ReferenceFinder::new();
307        let references = finder.find_references(&file_info, &symbol_index).unwrap();
308
309        assert!(references.is_empty());
310    }
311
312    #[test]
313    fn test_multiple_references() {
314        let source = r#"
315fn main() {
316    greet("Alice");
317    greet("Bob");
318    greet("Charlie");
319}
320"#;
321        let file_info = make_file_info(source, "src/main.rs");
322
323        let mut symbol_index = HashMap::new();
324        symbol_index.insert(
325            "greet".to_string(),
326            vec![make_definition("greet", "src/lib.rs", 1)],
327        );
328
329        let finder = ReferenceFinder::new();
330        let references = finder.find_references(&file_info, &symbol_index).unwrap();
331
332        assert_eq!(references.len(), 3);
333    }
334}