sem_core/parser/
graph.rs

1//! Entity dependency graph — cross-file reference extraction.
2//!
3//! Implements a two-pass approach inspired by arXiv:2601.08773 (Reliable Graph-RAG):
4//! Pass 1: Extract all entities, build a symbol table (name → entity ID).
5//! Pass 2: For each entity, extract identifier references from its AST subtree,
6//!         resolve them against the symbol table to create edges.
7//!
8//! This enables impact analysis: "if I change entity X, what else is affected?"
9
10use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::LazyLock;
13
14use rayon::prelude::*;
15use serde::Serialize;
16
17use crate::git::types::{FileChange, FileStatus};
18use crate::model::entity::SemanticEntity;
19use crate::parser::registry::ParserRegistry;
20
21/// A reference from one entity to another.
22#[derive(Debug, Clone, Serialize)]
23#[serde(rename_all = "camelCase")]
24pub struct EntityRef {
25    pub from_entity: String,
26    pub to_entity: String,
27    pub ref_type: RefType,
28}
29
30/// Type of reference between entities.
31#[derive(Debug, Clone, PartialEq, Serialize)]
32#[serde(rename_all = "lowercase")]
33pub enum RefType {
34    /// Function/method call
35    Calls,
36    /// Type reference (extends, implements, field type)
37    TypeRef,
38    /// Import/use statement reference
39    Imports,
40}
41
42/// A complete entity dependency graph for a set of files.
43#[derive(Debug)]
44pub struct EntityGraph {
45    /// All entities indexed by ID
46    pub entities: HashMap<String, EntityInfo>,
47    /// Edges: from_entity → [(to_entity, ref_type)]
48    pub edges: Vec<EntityRef>,
49    /// Reverse index: entity_id → entities that reference it
50    pub dependents: HashMap<String, Vec<String>>,
51    /// Forward index: entity_id → entities it references
52    pub dependencies: HashMap<String, Vec<String>>,
53}
54
55/// Minimal entity info stored in the graph.
56#[derive(Debug, Clone, Serialize)]
57#[serde(rename_all = "camelCase")]
58pub struct EntityInfo {
59    pub id: String,
60    pub name: String,
61    pub entity_type: String,
62    pub file_path: String,
63    pub start_line: usize,
64    pub end_line: usize,
65}
66
67impl EntityGraph {
68    /// Build an entity graph from a set of files.
69    ///
70    /// Pass 1: Extract all entities from all files using the parser registry.
71    /// Pass 2: For each entity, find identifier tokens and resolve them against
72    ///         the symbol table to create reference edges.
73    pub fn build(
74        root: &Path,
75        file_paths: &[String],
76        registry: &ParserRegistry,
77    ) -> Self {
78        // Pass 1: Extract all entities in parallel (file I/O + tree-sitter parsing)
79        let all_entities: Vec<SemanticEntity> = file_paths
80            .par_iter()
81            .filter_map(|file_path| {
82                let full_path = root.join(file_path);
83                let content = std::fs::read_to_string(&full_path).ok()?;
84                let plugin = registry.get_plugin(file_path)?;
85                Some(plugin.extract_entities(&content, file_path))
86            })
87            .flatten()
88            .collect();
89
90        // Build symbol table: name → entity IDs (can be multiple with same name)
91        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
92        let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
93
94        for entity in &all_entities {
95            symbol_table
96                .entry(entity.name.clone())
97                .or_default()
98                .push(entity.id.clone());
99
100            entity_map.insert(
101                entity.id.clone(),
102                EntityInfo {
103                    id: entity.id.clone(),
104                    name: entity.name.clone(),
105                    entity_type: entity.entity_type.clone(),
106                    file_path: entity.file_path.clone(),
107                    start_line: entity.start_line,
108                    end_line: entity.end_line,
109                },
110            );
111        }
112
113        // Pass 2: Extract references in parallel, then resolve against symbol table
114        // Step 2a: Parallel reference extraction per entity
115        let resolved_refs: Vec<(String, String, RefType)> = all_entities
116            .par_iter()
117            .flat_map(|entity| {
118                let refs = extract_references_from_content(&entity.content, &entity.name);
119                let mut entity_edges = Vec::new();
120                for ref_name in refs {
121                    if let Some(target_ids) = symbol_table.get(ref_name) {
122                        let target = target_ids
123                            .iter()
124                            .find(|id| {
125                                *id != &entity.id
126                                    && entity_map
127                                        .get(*id)
128                                        .map_or(false, |e| e.file_path == entity.file_path)
129                            })
130                            .or_else(|| target_ids.iter().find(|id| *id != &entity.id));
131
132                        if let Some(target_id) = target {
133                            let ref_type = infer_ref_type(&entity.content, &ref_name);
134                            entity_edges.push((
135                                entity.id.clone(),
136                                target_id.clone(),
137                                ref_type,
138                            ));
139                        }
140                    }
141                }
142                entity_edges
143            })
144            .collect();
145
146        // Step 2b: Build edge indexes from resolved references
147        let mut edges: Vec<EntityRef> = Vec::with_capacity(resolved_refs.len());
148        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
149        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
150
151        for (from_entity, to_entity, ref_type) in resolved_refs {
152            dependents
153                .entry(to_entity.clone())
154                .or_default()
155                .push(from_entity.clone());
156            dependencies
157                .entry(from_entity.clone())
158                .or_default()
159                .push(to_entity.clone());
160            edges.push(EntityRef {
161                from_entity,
162                to_entity,
163                ref_type,
164            });
165        }
166
167        EntityGraph {
168            entities: entity_map,
169            edges,
170            dependents,
171            dependencies,
172        }
173    }
174
175    /// Get entities that depend on the given entity (reverse deps).
176    pub fn get_dependents(&self, entity_id: &str) -> Vec<&EntityInfo> {
177        self.dependents
178            .get(entity_id)
179            .map(|ids| {
180                ids.iter()
181                    .filter_map(|id| self.entities.get(id))
182                    .collect()
183            })
184            .unwrap_or_default()
185    }
186
187    /// Get entities that the given entity depends on (forward deps).
188    pub fn get_dependencies(&self, entity_id: &str) -> Vec<&EntityInfo> {
189        self.dependencies
190            .get(entity_id)
191            .map(|ids| {
192                ids.iter()
193                    .filter_map(|id| self.entities.get(id))
194                    .collect()
195            })
196            .unwrap_or_default()
197    }
198
199    /// Impact analysis: if the given entity changes, what else might be affected?
200    /// Returns all transitive dependents (breadth-first), capped at 10k.
201    pub fn impact_analysis(&self, entity_id: &str) -> Vec<&EntityInfo> {
202        self.impact_analysis_capped(entity_id, 10_000)
203    }
204
205    /// Impact analysis with a cap on maximum nodes visited.
206    /// Returns transitive dependents up to the cap. Uses borrowed strings.
207    pub fn impact_analysis_capped(&self, entity_id: &str, max_visited: usize) -> Vec<&EntityInfo> {
208        let mut visited: HashSet<&str> = HashSet::new();
209        let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
210        let mut result = Vec::new();
211
212        let start_key = match self.entities.get_key_value(entity_id) {
213            Some((k, _)) => k.as_str(),
214            None => return result,
215        };
216
217        queue.push_back(start_key);
218        visited.insert(start_key);
219
220        while let Some(current) = queue.pop_front() {
221            if result.len() >= max_visited {
222                break;
223            }
224            if let Some(deps) = self.dependents.get(current) {
225                for dep in deps {
226                    if visited.insert(dep.as_str()) {
227                        if let Some(info) = self.entities.get(dep.as_str()) {
228                            result.push(info);
229                        }
230                        queue.push_back(dep.as_str());
231                        if result.len() >= max_visited {
232                            break;
233                        }
234                    }
235                }
236            }
237        }
238
239        result
240    }
241
242    /// Count transitive dependents without collecting them (faster for large graphs).
243    /// Uses borrowed strings to avoid allocation overhead.
244    pub fn impact_count(&self, entity_id: &str, max_count: usize) -> usize {
245        let mut visited: HashSet<&str> = HashSet::new();
246        let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
247        let mut count = 0;
248
249        // We need entity_id to live long enough; look it up in our entities map
250        let start_key = match self.entities.get_key_value(entity_id) {
251            Some((k, _)) => k.as_str(),
252            None => return 0,
253        };
254
255        queue.push_back(start_key);
256        visited.insert(start_key);
257
258        while let Some(current) = queue.pop_front() {
259            if count >= max_count {
260                break;
261            }
262            if let Some(deps) = self.dependents.get(current) {
263                for dep in deps {
264                    if visited.insert(dep.as_str()) {
265                        count += 1;
266                        queue.push_back(dep.as_str());
267                        if count >= max_count {
268                            break;
269                        }
270                    }
271                }
272            }
273        }
274
275        count
276    }
277
278    /// Incrementally update the graph from a set of changed files.
279    ///
280    /// Instead of rebuilding the entire graph, this only re-extracts entities
281    /// from changed files and re-resolves their references. This is faster
282    /// than a full rebuild when only a few files changed.
283    ///
284    /// For each changed file:
285    /// - Deleted: remove all entities from that file, prune edges
286    /// - Added/Modified: remove old entities, extract new ones, rebuild references
287    /// - Renamed: update file paths in entity info
288    pub fn update_from_changes(
289        &mut self,
290        changed_files: &[FileChange],
291        root: &Path,
292        registry: &ParserRegistry,
293    ) {
294        let mut affected_files: HashSet<String> = HashSet::new();
295        let mut new_entities: Vec<SemanticEntity> = Vec::new();
296
297        for change in changed_files {
298            affected_files.insert(change.file_path.clone());
299            if let Some(ref old_path) = change.old_file_path {
300                affected_files.insert(old_path.clone());
301            }
302
303            match change.status {
304                FileStatus::Deleted => {
305                    self.remove_entities_for_file(&change.file_path);
306                }
307                FileStatus::Renamed => {
308                    // Update file paths for renamed files
309                    if let Some(ref old_path) = change.old_file_path {
310                        self.remove_entities_for_file(old_path);
311                    }
312                    // Extract entities from the new file
313                    if let Some(entities) = self.extract_file_entities(
314                        &change.file_path,
315                        change.after_content.as_deref(),
316                        root,
317                        registry,
318                    ) {
319                        new_entities.extend(entities);
320                    }
321                }
322                FileStatus::Added | FileStatus::Modified => {
323                    // Remove old entities for this file
324                    self.remove_entities_for_file(&change.file_path);
325                    // Extract new entities
326                    if let Some(entities) = self.extract_file_entities(
327                        &change.file_path,
328                        change.after_content.as_deref(),
329                        root,
330                        registry,
331                    ) {
332                        new_entities.extend(entities);
333                    }
334                }
335            }
336        }
337
338        // Add new entities to the entity map
339        for entity in &new_entities {
340            self.entities.insert(
341                entity.id.clone(),
342                EntityInfo {
343                    id: entity.id.clone(),
344                    name: entity.name.clone(),
345                    entity_type: entity.entity_type.clone(),
346                    file_path: entity.file_path.clone(),
347                    start_line: entity.start_line,
348                    end_line: entity.end_line,
349                },
350            );
351        }
352
353        // Rebuild the global symbol table from all current entities
354        let symbol_table = self.build_symbol_table();
355
356        // Re-resolve references for new entities
357        for entity in &new_entities {
358            self.resolve_entity_references(entity, &symbol_table);
359        }
360
361        // Also re-resolve references for entities in OTHER files that might
362        // reference entities in changed files (their targets may have changed)
363        let changed_entity_names: HashSet<String> = new_entities
364            .iter()
365            .map(|e| e.name.clone())
366            .collect();
367
368        // Find entities in unchanged files that reference any changed entity name
369        let entities_to_recheck: Vec<String> = self
370            .entities
371            .values()
372            .filter(|e| !affected_files.contains(&e.file_path))
373            .filter(|e| {
374                self.dependencies
375                    .get(&e.id)
376                    .map_or(false, |deps| {
377                        deps.iter().any(|dep_id| {
378                            self.entities
379                                .get(dep_id)
380                                .map_or(false, |dep| changed_entity_names.contains(&dep.name))
381                        })
382                    })
383            })
384            .map(|e| e.id.clone())
385            .collect();
386
387        // We don't have the full SemanticEntity for unchanged files, so we skip
388        // deep re-resolution here. The forward/reverse indexes are already updated
389        // by remove_entities_for_file and resolve_entity_references.
390        // For entities that had dangling references (their target was deleted),
391        // the edges were already pruned.
392        let _ = entities_to_recheck; // acknowledge but don't act on for now
393    }
394
395    /// Extract entities from a file, using provided content or reading from disk.
396    fn extract_file_entities(
397        &self,
398        file_path: &str,
399        content: Option<&str>,
400        root: &Path,
401        registry: &ParserRegistry,
402    ) -> Option<Vec<SemanticEntity>> {
403        let plugin = registry.get_plugin(file_path)?;
404
405        let content = if let Some(c) = content {
406            c.to_string()
407        } else {
408            let full_path = root.join(file_path);
409            std::fs::read_to_string(&full_path).ok()?
410        };
411
412        Some(plugin.extract_entities(&content, file_path))
413    }
414
415    /// Remove all entities belonging to a specific file and prune their edges.
416    fn remove_entities_for_file(&mut self, file_path: &str) {
417        // Collect entity IDs to remove
418        let ids_to_remove: Vec<String> = self
419            .entities
420            .values()
421            .filter(|e| e.file_path == file_path)
422            .map(|e| e.id.clone())
423            .collect();
424
425        let id_set: HashSet<&str> = ids_to_remove.iter().map(|s| s.as_str()).collect();
426
427        // Remove from entity map
428        for id in &ids_to_remove {
429            self.entities.remove(id);
430        }
431
432        // Remove edges involving these entities
433        self.edges
434            .retain(|e| !id_set.contains(e.from_entity.as_str()) && !id_set.contains(e.to_entity.as_str()));
435
436        // Clean up dependency/dependent indexes
437        for id in &ids_to_remove {
438            // Remove forward deps
439            if let Some(deps) = self.dependencies.remove(id) {
440                // Also remove from reverse index
441                for dep in &deps {
442                    if let Some(dependents) = self.dependents.get_mut(dep) {
443                        dependents.retain(|d| d != id);
444                    }
445                }
446            }
447            // Remove reverse deps
448            if let Some(deps) = self.dependents.remove(id) {
449                // Also remove from forward index
450                for dep in &deps {
451                    if let Some(dependencies) = self.dependencies.get_mut(dep) {
452                        dependencies.retain(|d| d != id);
453                    }
454                }
455            }
456        }
457    }
458
459    /// Build a symbol table from all current entities.
460    fn build_symbol_table(&self) -> HashMap<String, Vec<String>> {
461        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::new();
462        for entity in self.entities.values() {
463            symbol_table
464                .entry(entity.name.clone())
465                .or_default()
466                .push(entity.id.clone());
467        }
468        symbol_table
469    }
470
471    /// Resolve references for a single entity against the symbol table.
472    fn resolve_entity_references(
473        &mut self,
474        entity: &SemanticEntity,
475        symbol_table: &HashMap<String, Vec<String>>,
476    ) {
477        let refs = extract_references_from_content(&entity.content, &entity.name);
478
479        for ref_name in refs {
480            if let Some(target_ids) = symbol_table.get(ref_name) {
481                let target = target_ids
482                    .iter()
483                    .find(|id| {
484                        *id != &entity.id
485                            && self
486                                .entities
487                                .get(*id)
488                                .map_or(false, |e| e.file_path == entity.file_path)
489                    })
490                    .or_else(|| target_ids.iter().find(|id| *id != &entity.id));
491
492                if let Some(target_id) = target {
493                    let ref_type = infer_ref_type(&entity.content, &ref_name);
494                    self.edges.push(EntityRef {
495                        from_entity: entity.id.clone(),
496                        to_entity: target_id.clone(),
497                        ref_type,
498                    });
499                    self.dependents
500                        .entry(target_id.clone())
501                        .or_default()
502                        .push(entity.id.clone());
503                    self.dependencies
504                        .entry(entity.id.clone())
505                        .or_default()
506                        .push(target_id.clone());
507                }
508            }
509        }
510    }
511}
512
513/// Extract identifier references from entity content using simple token analysis.
514/// Returns borrowed slices from the content to avoid allocations.
515fn extract_references_from_content<'a>(content: &'a str, own_name: &str) -> Vec<&'a str> {
516    let mut refs = Vec::new();
517    let mut seen: HashSet<&str> = HashSet::new();
518
519    for word in content.split(|c: char| !c.is_alphanumeric() && c != '_') {
520        if word.is_empty() || word == own_name {
521            continue;
522        }
523        if is_keyword(word) || word.len() < 2 {
524            continue;
525        }
526        // Skip very short lowercase identifiers (likely local vars: i, x, a, ok, id, etc.)
527        if word.starts_with(|c: char| c.is_lowercase()) && word.len() < 3 {
528            continue;
529        }
530        if !word.starts_with(|c: char| c.is_alphabetic() || c == '_') {
531            continue;
532        }
533        // Skip common local variable names that create false graph edges
534        if is_common_local_name(word) {
535            continue;
536        }
537        if seen.insert(word) {
538            refs.push(word);
539        }
540    }
541
542    refs
543}
544
545static COMMON_LOCAL_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
546    [
547        "result", "results", "data", "config", "value", "values",
548        "item", "items", "input", "output", "args", "opts",
549        "name", "path", "file", "line", "count", "index",
550        "temp", "prev", "next", "curr", "current", "node",
551        "left", "right", "root", "head", "tail", "body",
552        "text", "content", "source", "target", "entry",
553        "error", "errors", "message", "response", "request",
554        "context", "state", "props", "event", "handler",
555        "callback", "options", "params", "query", "list",
556        "base", "info", "meta", "kind", "mode", "flag",
557        "size", "length", "width", "height", "start", "stop",
558        "begin", "done", "found", "status", "code", "test",
559    ].into_iter().collect()
560});
561
562/// Names that are overwhelmingly local variables, not entity references.
563/// These create massive false-positive edges in the dependency graph.
564fn is_common_local_name(word: &str) -> bool {
565    COMMON_LOCAL_NAMES.contains(word)
566}
567
568/// Infer reference type from context using word-boundary-aware matching.
569fn infer_ref_type(content: &str, ref_name: &str) -> RefType {
570    // Check if it's a function call: ref_name followed by ( with word boundary before.
571    // Avoids format! allocation by finding ref_name and checking the next char.
572    let bytes = content.as_bytes();
573    let name_bytes = ref_name.as_bytes();
574    let mut search_start = 0;
575    while let Some(rel_pos) = content[search_start..].find(ref_name) {
576        let pos = search_start + rel_pos;
577        let after = pos + name_bytes.len();
578        // Check next char is '('
579        if after < bytes.len() && bytes[after] == b'(' {
580            // Verify word boundary before
581            let is_boundary = pos == 0 || {
582                let prev = bytes[pos - 1];
583                !prev.is_ascii_alphanumeric() && prev != b'_'
584            };
585            if is_boundary {
586                return RefType::Calls;
587            }
588        }
589        search_start = pos + 1;
590    }
591
592    // Check if it's in an import/use statement (line-level, not substring)
593    for line in content.lines() {
594        let trimmed = line.trim();
595        if (trimmed.starts_with("import ") || trimmed.starts_with("use ")
596            || trimmed.starts_with("from ") || trimmed.starts_with("require("))
597            && trimmed.contains(ref_name)
598        {
599            return RefType::Imports;
600        }
601    }
602
603    // Default to type reference
604    RefType::TypeRef
605}
606
607static KEYWORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
608    [
609        // Common across languages
610        "if", "else", "for", "while", "do", "switch", "case", "break",
611        "continue", "return", "try", "catch", "finally", "throw",
612        "new", "delete", "typeof", "instanceof", "in", "of",
613        "true", "false", "null", "undefined", "void", "this",
614        "super", "class", "extends", "implements", "interface",
615        "enum", "const", "let", "var", "function", "async",
616        "await", "yield", "import", "export", "default", "from",
617        "as", "static", "public", "private", "protected",
618        "abstract", "final", "override",
619        // Rust
620        "fn", "pub", "mod", "use", "struct", "impl", "trait",
621        "where", "type", "self", "Self", "mut", "ref", "match",
622        "loop", "move", "unsafe", "extern", "crate", "dyn",
623        // Python
624        "def", "elif", "except", "raise", "with",
625        "pass", "lambda", "nonlocal", "global", "assert",
626        "True", "False", "and", "or", "not", "is",
627        // Go
628        "func", "package", "range", "select", "chan", "go",
629        "defer", "map", "make", "append", "len", "cap",
630        // C/C++
631        "auto", "register", "volatile", "sizeof", "typedef",
632        "template", "typename", "namespace", "virtual", "inline",
633        "constexpr", "nullptr", "noexcept", "explicit", "friend",
634        "operator", "using", "cout", "endl", "cerr", "cin",
635        "printf", "scanf", "malloc", "free", "NULL", "include",
636        "ifdef", "ifndef", "endif", "define", "pragma",
637        // Ruby
638        "end", "then", "elsif", "unless", "until",
639        "begin", "rescue", "ensure", "when", "require",
640        "attr_accessor", "attr_reader", "attr_writer",
641        "puts", "nil", "module", "defined",
642        // C#
643        "internal", "sealed", "readonly",
644        "partial", "delegate", "event", "params", "out",
645        "object", "decimal", "sbyte", "ushort", "uint",
646        "ulong", "nint", "nuint", "dynamic",
647        "get", "set", "value", "init", "record",
648        // Types (primitives)
649        "string", "number", "boolean", "int", "float", "double",
650        "bool", "char", "byte", "i8", "i16", "i32", "i64",
651        "u8", "u16", "u32", "u64", "f32", "f64", "usize",
652        "isize", "str", "String", "Vec", "Option", "Result",
653        "Box", "Arc", "Rc", "HashMap", "HashSet", "Some",
654        "Ok", "Err",
655    ].into_iter().collect()
656});
657
658fn is_keyword(word: &str) -> bool {
659    KEYWORDS.contains(word)
660}
661
662#[cfg(test)]
663mod tests {
664    use super::*;
665    use crate::git::types::{FileChange, FileStatus};
666    use std::io::Write;
667    use tempfile::TempDir;
668
669    fn create_test_repo() -> (TempDir, ParserRegistry) {
670        let dir = TempDir::new().unwrap();
671        let registry = crate::parser::plugins::create_default_registry();
672        (dir, registry)
673    }
674
675    fn write_file(dir: &Path, name: &str, content: &str) {
676        let path = dir.join(name);
677        if let Some(parent) = path.parent() {
678            std::fs::create_dir_all(parent).unwrap();
679        }
680        let mut f = std::fs::File::create(path).unwrap();
681        f.write_all(content.as_bytes()).unwrap();
682    }
683
684    #[test]
685    fn test_incremental_add_file() {
686        let (dir, registry) = create_test_repo();
687        let root = dir.path();
688
689        // Start with one file
690        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
691        write_file(root, "b.ts", "export function bar() { return 1; }\n");
692
693        let mut graph = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
694        assert_eq!(graph.entities.len(), 2);
695
696        // Add a new file
697        write_file(root, "c.ts", "export function baz() { return foo(); }\n");
698        graph.update_from_changes(
699            &[FileChange {
700                file_path: "c.ts".into(),
701                status: FileStatus::Added,
702                old_file_path: None,
703                before_content: None,
704                after_content: None, // will read from disk
705            }],
706            root,
707            &registry,
708        );
709
710        assert_eq!(graph.entities.len(), 3);
711        assert!(graph.entities.contains_key("c.ts::function::baz"));
712        // baz references foo
713        let baz_deps = graph.get_dependencies("c.ts::function::baz");
714        assert!(
715            baz_deps.iter().any(|d| d.name == "foo"),
716            "baz should depend on foo. Deps: {:?}",
717            baz_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
718        );
719    }
720
721    #[test]
722    fn test_incremental_delete_file() {
723        let (dir, registry) = create_test_repo();
724        let root = dir.path();
725
726        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
727        write_file(root, "b.ts", "export function bar() { return 1; }\n");
728
729        let mut graph = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
730        assert_eq!(graph.entities.len(), 2);
731
732        // Delete b.ts
733        graph.update_from_changes(
734            &[FileChange {
735                file_path: "b.ts".into(),
736                status: FileStatus::Deleted,
737                old_file_path: None,
738                before_content: None,
739                after_content: None,
740            }],
741            root,
742            &registry,
743        );
744
745        assert_eq!(graph.entities.len(), 1);
746        assert!(!graph.entities.contains_key("b.ts::function::bar"));
747        // foo's dependency on bar should be pruned
748        let foo_deps = graph.get_dependencies("a.ts::function::foo");
749        assert!(
750            foo_deps.is_empty(),
751            "foo's deps should be empty after bar deleted. Deps: {:?}",
752            foo_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
753        );
754    }
755
756    #[test]
757    fn test_incremental_modify_file() {
758        let (dir, registry) = create_test_repo();
759        let root = dir.path();
760
761        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
762        write_file(root, "b.ts", "export function bar() { return 1; }\nexport function baz() { return 2; }\n");
763
764        let mut graph = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
765        assert_eq!(graph.entities.len(), 3);
766
767        // Modify a.ts to call baz instead of bar
768        write_file(root, "a.ts", "export function foo() { return baz(); }\n");
769        graph.update_from_changes(
770            &[FileChange {
771                file_path: "a.ts".into(),
772                status: FileStatus::Modified,
773                old_file_path: None,
774                before_content: None,
775                after_content: None,
776            }],
777            root,
778            &registry,
779        );
780
781        assert_eq!(graph.entities.len(), 3);
782        // foo should now depend on baz, not bar
783        let foo_deps = graph.get_dependencies("a.ts::function::foo");
784        let dep_names: Vec<&str> = foo_deps.iter().map(|d| d.name.as_str()).collect();
785        assert!(dep_names.contains(&"baz"), "foo should depend on baz after modification. Deps: {:?}", dep_names);
786        assert!(!dep_names.contains(&"bar"), "foo should no longer depend on bar. Deps: {:?}", dep_names);
787    }
788
789    #[test]
790    fn test_incremental_with_content() {
791        let (dir, registry) = create_test_repo();
792        let root = dir.path();
793
794        write_file(root, "a.ts", "export function foo() { return 1; }\n");
795        let mut graph = EntityGraph::build(root, &["a.ts".into()], &registry);
796        assert_eq!(graph.entities.len(), 1);
797
798        // Add file with content provided directly (no disk read needed)
799        graph.update_from_changes(
800            &[FileChange {
801                file_path: "b.ts".into(),
802                status: FileStatus::Added,
803                old_file_path: None,
804                before_content: None,
805                after_content: Some("export function bar() { return foo(); }\n".into()),
806            }],
807            root,
808            &registry,
809        );
810
811        assert_eq!(graph.entities.len(), 2);
812        let bar_deps = graph.get_dependencies("b.ts::function::bar");
813        assert!(bar_deps.iter().any(|d| d.name == "foo"));
814    }
815
816    #[test]
817    fn test_extract_references() {
818        let content = "function processData(input) {\n  const result = validateInput(input);\n  return transform(result);\n}";
819        let refs = extract_references_from_content(content, "processData");
820        assert!(refs.contains(&"validateInput"));
821        assert!(refs.contains(&"transform"));
822        assert!(!refs.contains(&"processData")); // self excluded
823    }
824
825    #[test]
826    fn test_extract_references_skips_keywords() {
827        let content = "function foo() { if (true) { return false; } }";
828        let refs = extract_references_from_content(content, "foo");
829        assert!(!refs.contains(&"if"));
830        assert!(!refs.contains(&"true"));
831        assert!(!refs.contains(&"return"));
832        assert!(!refs.contains(&"false"));
833    }
834
835    #[test]
836    fn test_infer_ref_type_call() {
837        assert_eq!(
838            infer_ref_type("validateInput(data)", "validateInput"),
839            RefType::Calls,
840        );
841    }
842
843    #[test]
844    fn test_infer_ref_type_type() {
845        assert_eq!(
846            infer_ref_type("let x: MyType = something", "MyType"),
847            RefType::TypeRef,
848        );
849    }
850}
sem_core/parser/graph.rs

sem_core/parser/
graph.rs