Skip to main content

sem_core/parser/
graph.rs

1//! Entity dependency graph — cross-file reference extraction.
2//!
3//! Implements a two-pass approach inspired by arXiv:2601.08773 (Reliable Graph-RAG):
4//! Pass 1: Extract all entities, build a symbol table (name → entity ID).
5//! Pass 2: For each entity, extract identifier references from its AST subtree,
6//!         resolve them against the symbol table to create edges.
7//!
8//! This enables impact analysis: "if I change entity X, what else is affected?"
9
10use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::LazyLock;
13
14use rayon::prelude::*;
15use regex::Regex;
16use serde::{Deserialize, Serialize};
17
18use crate::git::types::{FileChange, FileStatus};
19use crate::model::entity::SemanticEntity;
20use crate::parser::registry::ParserRegistry;
21use crate::parser::scope_resolve;
22
23/// A reference from one entity to another.
24#[derive(Debug, Clone, Serialize, Deserialize)]
25#[serde(rename_all = "camelCase")]
26pub struct EntityRef {
27    pub from_entity: String,
28    pub to_entity: String,
29    pub ref_type: RefType,
30}
31
32/// Type of reference between entities.
33#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34#[serde(rename_all = "lowercase")]
35pub enum RefType {
36    /// Function/method call
37    Calls,
38    /// Type reference (extends, implements, field type)
39    TypeRef,
40    /// Import/use statement reference
41    Imports,
42}
43
44/// A complete entity dependency graph for a set of files.
45#[derive(Debug)]
46pub struct EntityGraph {
47    /// All entities indexed by ID
48    pub entities: HashMap<String, EntityInfo>,
49    /// Edges: from_entity → [(to_entity, ref_type)]
50    pub edges: Vec<EntityRef>,
51    /// Reverse index: entity_id → entities that reference it
52    pub dependents: HashMap<String, Vec<String>>,
53    /// Forward index: entity_id → entities it references
54    pub dependencies: HashMap<String, Vec<String>>,
55}
56
57/// Minimal entity info stored in the graph.
58#[derive(Debug, Clone, Serialize, Deserialize)]
59#[serde(rename_all = "camelCase")]
60pub struct EntityInfo {
61    pub id: String,
62    pub name: String,
63    pub entity_type: String,
64    pub file_path: String,
65    #[serde(skip_serializing_if = "Option::is_none")]
66    pub parent_id: Option<String>,
67    pub start_line: usize,
68    pub end_line: usize,
69}
70
71impl EntityGraph {
72    /// Reconstruct an EntityGraph from pre-loaded parts (e.g. from a cache).
73    pub fn from_parts(entities: HashMap<String, EntityInfo>, edges: Vec<EntityRef>) -> Self {
74        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
75        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
76        for edge in &edges {
77            dependents
78                .entry(edge.to_entity.clone())
79                .or_default()
80                .push(edge.from_entity.clone());
81            dependencies
82                .entry(edge.from_entity.clone())
83                .or_default()
84                .push(edge.to_entity.clone());
85        }
86        EntityGraph {
87            entities,
88            edges,
89            dependents,
90            dependencies,
91        }
92    }
93
94    /// Build an entity graph from a set of files.
95    ///
96    /// Pass 1: Extract all entities from all files using the parser registry.
97    /// Pass 2: For each entity, find identifier tokens and resolve them against
98    ///         the symbol table to create reference edges.
99    pub fn build(
100        root: &Path,
101        file_paths: &[String],
102        registry: &ParserRegistry,
103    ) -> (Self, Vec<SemanticEntity>) {
104        // Pass 1: Extract all entities in parallel (file I/O + tree-sitter parsing)
105        // Also collect (file_path, content, tree) for scope_resolve reuse
106        let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = file_paths
107            .par_iter()
108            .filter_map(|file_path| {
109                let full_path = root.join(file_path);
110                let content = std::fs::read_to_string(&full_path).ok()?;
111                let (entities, tree) = registry.extract_entities_with_tree(file_path, &content)?;
112                let parsed = tree.map(|t| (file_path.clone(), content, t));
113                Some((entities, parsed))
114            })
115            .collect();
116
117        let mut all_entities: Vec<SemanticEntity> = Vec::new();
118        let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
119        for (entities, parsed) in per_file {
120            all_entities.extend(entities);
121            if let Some(p) = parsed {
122                parsed_files.push(p);
123            }
124        }
125
126        // Build symbol table: name → entity IDs (can be multiple with same name)
127        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
128        let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
129
130        for entity in &all_entities {
131            symbol_table
132                .entry(entity.name.clone())
133                .or_default()
134                .push(entity.id.clone());
135
136            entity_map.insert(
137                entity.id.clone(),
138                EntityInfo {
139                    id: entity.id.clone(),
140                    name: entity.name.clone(),
141                    entity_type: entity.entity_type.clone(),
142                    file_path: entity.file_path.clone(),
143                    parent_id: entity.parent_id.clone(),
144                    start_line: entity.start_line,
145                    end_line: entity.end_line,
146                },
147            );
148        }
149
150        // Build parent-child set for skipping class→method self-edges
151        let parent_child_pairs: HashSet<(&str, &str)> = all_entities
152            .iter()
153            .filter_map(|e| {
154                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
155            })
156            .collect();
157
158        // Build set of (class_id, child_method_name) so classes skip refs to their own methods
159        let class_child_names: HashSet<(&str, &str)> = all_entities
160            .iter()
161            .filter_map(|e| {
162                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
163            })
164            .collect();
165
166        // Build class-related maps for dot-chain resolution
167        // class_entity_names: all class/struct/interface entity names
168        let class_entity_names: HashSet<&str> = all_entities
169            .iter()
170            .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
171            .map(|e| e.name.as_str())
172            .collect();
173
174        // id_to_name: quick lookup for parent name resolution
175        let id_to_name: HashMap<&str, &str> = all_entities
176            .iter()
177            .map(|e| (e.id.as_str(), e.name.as_str()))
178            .collect();
179
180        // enclosing_class: entity_id → class_name (for self/this resolution)
181        // class_members: class_name → [(member_name, member_entity_id)]
182        let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
183        let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
184
185        for entity in &all_entities {
186            if let Some(ref pid) = entity.parent_id {
187                if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
188                    if class_entity_names.contains(parent_name) {
189                        enclosing_class.insert(entity.id.as_str(), parent_name);
190                        class_members
191                            .entry(parent_name)
192                            .or_default()
193                            .push((entity.name.as_str(), entity.id.as_str()));
194                    }
195                }
196            }
197        }
198
199        // Build import table: (file_path, imported_name) → target entity ID
200        // e.g. ("io_handler.py", "validate") → "core.py::function::validate"
201        let import_table = build_import_table(root, file_paths, &symbol_table, &entity_map);
202
203        // Run scope-aware resolver for supported languages (reuse pre-parsed trees)
204        let has_scope_lang = file_paths.iter().any(|f| {
205            let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
206            crate::parser::plugins::code::languages::get_language_config(ext)
207                .and_then(|c| c.scope_resolve)
208                .is_some()
209        });
210        let (scope_edges, scope_resolved_entities) = if has_scope_lang {
211            let result = scope_resolve::resolve_with_scopes(root, file_paths, &all_entities, &entity_map, Some(parsed_files));
212            let resolved_entity_ids: HashSet<String> = result.edges.iter()
213                .map(|(from, _, _)| from.clone())
214                .collect();
215            (result.edges, resolved_entity_ids)
216        } else {
217            (vec![], HashSet::new())
218        };
219
220        // Pass 2: Extract references in parallel, then resolve against symbol table
221        // Phase 1: Dot-chain resolution (precise self.X, this.X, ClassName.X)
222        // Phase 2: Bag-of-words resolution (existing logic, skipping consumed words)
223        // Skip entities already resolved by scope resolver (Python files)
224        let resolved_refs: Vec<(String, String, RefType)> = all_entities
225            .par_iter()
226            .flat_map(|entity| {
227                // Skip entities already resolved by scope resolver
228                if scope_resolved_entities.contains(&entity.id) {
229                    return vec![];
230                }
231
232                let mut entity_edges = Vec::new();
233                let mut consumed_words: HashSet<String> = HashSet::new();
234
235                // Phase 1: Dot-chain resolution
236                let stripped = strip_comments_and_strings(&entity.content);
237                let dot_chains = extract_dot_chains(&stripped);
238
239                for (receiver, member) in &dot_chains {
240                    if *receiver == "self" || *receiver == "this" {
241                        // self.B / this.B: resolve to sibling method in enclosing class
242                        if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
243                            if let Some(members) = class_members.get(class_name) {
244                                for (n, tid) in members {
245                                    if *n == *member && *tid != entity.id.as_str() {
246                                        entity_edges.push((
247                                            entity.id.clone(),
248                                            tid.to_string(),
249                                            RefType::Calls,
250                                        ));
251                                        consumed_words.insert(member.to_string());
252                                        break;
253                                    }
254                                }
255                            }
256                        }
257                    } else if class_entity_names.contains(*receiver) {
258                        // ClassName.B: resolve to class member
259                        if let Some(members) = class_members.get(*receiver) {
260                            for (n, tid) in members {
261                                if *n == *member {
262                                    entity_edges.push((
263                                        entity.id.clone(),
264                                        tid.to_string(),
265                                        RefType::Calls,
266                                    ));
267                                    consumed_words.insert(member.to_string());
268                                    consumed_words.insert(receiver.to_string());
269                                    break;
270                                }
271                            }
272                        }
273                    }
274                    // Unresolved chains fall through to bag-of-words below
275                }
276
277                // Phase 2: Bag-of-words resolution (skip words consumed by dot-chains)
278                let refs = extract_references_from_content(&entity.content, &entity.name);
279                for ref_name in refs {
280                    if consumed_words.contains(ref_name) {
281                        continue;
282                    }
283
284                    // Skip references to names that are this class's own methods
285                    if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
286                        continue;
287                    }
288
289                    // Check import table first: if this file imports this name,
290                    // resolve to the import target instead of global symbol table
291                    let import_key = (entity.file_path.clone(), ref_name.to_string());
292                    if let Some(import_target_id) = import_table.get(&import_key) {
293                        if import_target_id != &entity.id
294                            && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
295                            && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
296                        {
297                            let ref_type = infer_ref_type(&entity.content, &ref_name);
298                            entity_edges.push((
299                                entity.id.clone(),
300                                import_target_id.clone(),
301                                ref_type,
302                            ));
303                        }
304                        continue;
305                    }
306
307                    if let Some(target_ids) = symbol_table.get(ref_name) {
308                        // Without an import, only resolve to entities in the same file.
309                        // Cross-file resolution is handled by the import table above.
310                        let target = target_ids
311                            .iter()
312                            .find(|id| {
313                                *id != &entity.id
314                                    && entity_map
315                                        .get(*id)
316                                        .map_or(false, |e| e.file_path == entity.file_path)
317                            });
318
319                        if let Some(target_id) = target {
320                            // Skip parent-child edges (class -> own method)
321                            if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
322                                || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
323                            {
324                                continue;
325                            }
326                            let ref_type = infer_ref_type(&entity.content, &ref_name);
327                            entity_edges.push((
328                                entity.id.clone(),
329                                target_id.clone(),
330                                ref_type,
331                            ));
332                        }
333                    }
334                }
335                entity_edges
336            })
337            .collect();
338
339        // Merge scope edges with bag-of-words edges, deduplicating
340        let mut all_resolved: Vec<(String, String, RefType)> = scope_edges;
341        all_resolved.extend(resolved_refs);
342        let mut seen_edges: HashSet<(String, String)> = HashSet::new();
343        all_resolved.retain(|e| seen_edges.insert((e.0.clone(), e.1.clone())));
344
345        // Build edge indexes from resolved references
346        let mut edges: Vec<EntityRef> = Vec::with_capacity(all_resolved.len());
347        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
348        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
349
350        for (from_entity, to_entity, ref_type) in all_resolved {
351            dependents
352                .entry(to_entity.clone())
353                .or_default()
354                .push(from_entity.clone());
355            dependencies
356                .entry(from_entity.clone())
357                .or_default()
358                .push(to_entity.clone());
359            edges.push(EntityRef {
360                from_entity,
361                to_entity,
362                ref_type,
363            });
364        }
365
366        let graph = EntityGraph {
367            entities: entity_map,
368            edges,
369            dependents,
370            dependencies,
371        };
372
373        (graph, all_entities)
374    }
375
376    /// Incrementally build an entity graph: reparse only stale files, reuse cached data for clean files.
377    ///
378    /// Uses the same full 3-phase resolution (scope + dot-chain + bag-of-words) as `build()`,
379    /// but only runs it for entities in stale files + clean entities whose cached edges
380    /// pointed into stale files (they need re-resolution since their targets may have changed).
381    pub fn build_incremental(
382        root: &Path,
383        stale_files: &[String],
384        all_file_paths: &[String],
385        cached_entities: Vec<SemanticEntity>,
386        cached_edges: Vec<EntityRef>,
387        stale_file_cached_entities: Vec<SemanticEntity>,
388        registry: &ParserRegistry,
389    ) -> (Self, Vec<SemanticEntity>) {
390        // Build set of stale file paths for quick lookup
391        let stale_set: HashSet<&str> = stale_files.iter().map(|s| s.as_str()).collect();
392
393        // Parse stale files in parallel to get new entities + trees
394        let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = stale_files
395            .par_iter()
396            .filter_map(|file_path| {
397                let full_path = root.join(file_path);
398                let content = std::fs::read_to_string(&full_path).ok()?;
399                let (entities, tree) = registry.extract_entities_with_tree(file_path, &content)?;
400                let parsed = tree.map(|t| (file_path.clone(), content, t));
401                Some((entities, parsed))
402            })
403            .collect();
404
405        let mut new_entities: Vec<SemanticEntity> = Vec::new();
406        let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
407        for (entities, parsed) in per_file {
408            new_entities.extend(entities);
409            if let Some(p) = parsed {
410                parsed_files.push(p);
411            }
412        }
413
414        // Entity-level diffing: compare new stale-file entities against cached versions
415        // Build content_hash lookup from cached stale-file entities
416        let cached_hashes: HashMap<&str, &str> = stale_file_cached_entities
417            .iter()
418            .map(|e| (e.id.as_str(), e.content_hash.as_str()))
419            .collect();
420
421        // Classify new stale-file entities
422        let mut truly_changed_ids: HashSet<String> = HashSet::new();
423        let mut content_clean_ids: HashSet<String> = HashSet::new();
424        for entity in &new_entities {
425            match cached_hashes.get(entity.id.as_str()) {
426                Some(old_hash) if *old_hash == entity.content_hash.as_str() => {
427                    content_clean_ids.insert(entity.id.clone());
428                }
429                _ => {
430                    // Hash differs or entity is new
431                    truly_changed_ids.insert(entity.id.clone());
432                }
433            }
434        }
435
436        // Detect deleted entities: in cached stale but not in new
437        let new_entity_ids: HashSet<&str> = new_entities.iter().map(|e| e.id.as_str()).collect();
438        let deleted_ids: HashSet<&str> = stale_file_cached_entities
439            .iter()
440            .filter(|e| !new_entity_ids.contains(e.id.as_str()))
441            .map(|e| e.id.as_str())
442            .collect();
443
444        // Merge: cached (clean) entities + new (stale) entities
445        let all_entities: Vec<SemanticEntity> = cached_entities
446            .into_iter()
447            .chain(new_entities.into_iter())
448            .collect();
449
450        // Find affected clean entities: only care about edges pointing to truly_changed/deleted
451        let mut affected_clean_ids: HashSet<String> = HashSet::new();
452        for edge in &cached_edges {
453            let to_truly_changed = truly_changed_ids.contains(&edge.to_entity)
454                || deleted_ids.contains(edge.to_entity.as_str());
455            if to_truly_changed && !stale_set.contains(
456                all_entities.iter()
457                    .find(|e| e.id == edge.from_entity)
458                    .map(|e| e.file_path.as_str())
459                    .unwrap_or("")
460            ) {
461                affected_clean_ids.insert(edge.from_entity.clone());
462            }
463        }
464
465        // Collect all stale entity IDs (for edge filtering)
466        let stale_entity_ids: HashSet<&str> = all_entities
467            .iter()
468            .filter(|e| stale_set.contains(e.file_path.as_str()))
469            .map(|e| e.id.as_str())
470            .collect();
471
472        // Keep edges where:
473        // - Both endpoints are clean files AND from_entity is not affected, OR
474        // - From a content_clean stale entity whose targets are also clean/content_clean
475        let kept_edges: Vec<EntityRef> = cached_edges
476            .into_iter()
477            .filter(|e| {
478                let from_stale = stale_entity_ids.contains(e.from_entity.as_str());
479                let to_stale = stale_entity_ids.contains(e.to_entity.as_str());
480
481                if !from_stale && !to_stale && !affected_clean_ids.contains(&e.from_entity) {
482                    // Both clean, from not affected
483                    return true;
484                }
485                if content_clean_ids.contains(&e.from_entity)
486                    && !truly_changed_ids.contains(&e.to_entity)
487                    && !deleted_ids.contains(e.to_entity.as_str())
488                    && !affected_clean_ids.contains(&e.from_entity)
489                {
490                    // From content_clean stale entity, target not truly changed
491                    return true;
492                }
493                false
494            })
495            .collect();
496
497        // Set of entity IDs that need resolution: truly_changed + affected clean
498        // (content_clean stale entities keep their cached edges)
499        let needs_resolution: HashSet<&str> = all_entities
500            .iter()
501            .filter(|e| {
502                truly_changed_ids.contains(&e.id)
503                    || affected_clean_ids.contains(&e.id)
504            })
505            .map(|e| e.id.as_str())
506            .collect();
507
508        // Now run the same resolution logic as build() but only for entities in needs_resolution.
509        // We still need the full context (symbol table, import table, etc.) from ALL entities.
510
511        // Build symbol table from all entities
512        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
513        let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
514
515        for entity in &all_entities {
516            symbol_table
517                .entry(entity.name.clone())
518                .or_default()
519                .push(entity.id.clone());
520            entity_map.insert(
521                entity.id.clone(),
522                EntityInfo {
523                    id: entity.id.clone(),
524                    name: entity.name.clone(),
525                    entity_type: entity.entity_type.clone(),
526                    file_path: entity.file_path.clone(),
527                    parent_id: entity.parent_id.clone(),
528                    start_line: entity.start_line,
529                    end_line: entity.end_line,
530                },
531            );
532        }
533
534        // Build parent-child set
535        let parent_child_pairs: HashSet<(&str, &str)> = all_entities
536            .iter()
537            .filter_map(|e| {
538                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
539            })
540            .collect();
541
542        let class_child_names: HashSet<(&str, &str)> = all_entities
543            .iter()
544            .filter_map(|e| {
545                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
546            })
547            .collect();
548
549        let class_entity_names: HashSet<&str> = all_entities
550            .iter()
551            .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
552            .map(|e| e.name.as_str())
553            .collect();
554
555        let id_to_name: HashMap<&str, &str> = all_entities
556            .iter()
557            .map(|e| (e.id.as_str(), e.name.as_str()))
558            .collect();
559
560        let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
561        let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
562
563        for entity in &all_entities {
564            if let Some(ref pid) = entity.parent_id {
565                if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
566                    if class_entity_names.contains(parent_name) {
567                        enclosing_class.insert(entity.id.as_str(), parent_name);
568                        class_members
569                            .entry(parent_name)
570                            .or_default()
571                            .push((entity.name.as_str(), entity.id.as_str()));
572                    }
573                }
574            }
575        }
576
577        // Build import table from ALL files (imports may reference stale entities)
578        let import_table = build_import_table(root, all_file_paths, &symbol_table, &entity_map);
579
580        // Run scope-aware resolver only on files that need resolution
581        let resolve_file_paths: Vec<String> = all_file_paths
582            .iter()
583            .filter(|f| {
584                // Include file if any entity in needs_resolution belongs to it
585                stale_set.contains(f.as_str()) || all_entities.iter().any(|e| {
586                    e.file_path == **f && affected_clean_ids.contains(&e.id)
587                })
588            })
589            .cloned()
590            .collect();
591
592        let has_scope_lang = resolve_file_paths.iter().any(|f| {
593            let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
594            crate::parser::plugins::code::languages::get_language_config(ext)
595                .and_then(|c| c.scope_resolve)
596                .is_some()
597        });
598        let (scope_edges, scope_resolved_entities) = if has_scope_lang {
599            // Pass pre-parsed stale-file trees; scope_resolve reads affected clean files from disk
600            let resolve_set: HashSet<&str> = resolve_file_paths.iter().map(|s| s.as_str()).collect();
601            let relevant_parsed: Vec<(String, String, tree_sitter::Tree)> = parsed_files
602                .into_iter()
603                .filter(|(fp, _, _)| resolve_set.contains(fp.as_str()))
604                .collect();
605            let pre = if relevant_parsed.is_empty() { None } else { Some(relevant_parsed) };
606            let result = scope_resolve::resolve_with_scopes(root, &resolve_file_paths, &all_entities, &entity_map, pre);
607            let resolved_entity_ids: HashSet<String> = result.edges.iter()
608                .map(|(from, _, _)| from.clone())
609                .collect();
610            (result.edges, resolved_entity_ids)
611        } else {
612            (vec![], HashSet::new())
613        };
614
615        // Resolve references only for entities in needs_resolution
616        let resolved_refs: Vec<(String, String, RefType)> = all_entities
617            .par_iter()
618            .filter(|e| needs_resolution.contains(e.id.as_str()))
619            .flat_map(|entity| {
620                if scope_resolved_entities.contains(&entity.id) {
621                    return vec![];
622                }
623
624                let mut entity_edges = Vec::new();
625                let mut consumed_words: HashSet<String> = HashSet::new();
626
627                // Phase 1: Dot-chain resolution
628                let stripped = strip_comments_and_strings(&entity.content);
629                let dot_chains = extract_dot_chains(&stripped);
630
631                for (receiver, member) in &dot_chains {
632                    if *receiver == "self" || *receiver == "this" {
633                        if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
634                            if let Some(members) = class_members.get(class_name) {
635                                for (n, tid) in members {
636                                    if *n == *member && *tid != entity.id.as_str() {
637                                        entity_edges.push((
638                                            entity.id.clone(),
639                                            tid.to_string(),
640                                            RefType::Calls,
641                                        ));
642                                        consumed_words.insert(member.to_string());
643                                        break;
644                                    }
645                                }
646                            }
647                        }
648                    } else if class_entity_names.contains(*receiver) {
649                        if let Some(members) = class_members.get(*receiver) {
650                            for (n, tid) in members {
651                                if *n == *member {
652                                    entity_edges.push((
653                                        entity.id.clone(),
654                                        tid.to_string(),
655                                        RefType::Calls,
656                                    ));
657                                    consumed_words.insert(member.to_string());
658                                    consumed_words.insert(receiver.to_string());
659                                    break;
660                                }
661                            }
662                        }
663                    }
664                }
665
666                // Phase 2: Bag-of-words resolution
667                let refs = extract_references_from_content(&entity.content, &entity.name);
668                for ref_name in refs {
669                    if consumed_words.contains(ref_name) {
670                        continue;
671                    }
672                    if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
673                        continue;
674                    }
675
676                    let import_key = (entity.file_path.clone(), ref_name.to_string());
677                    if let Some(import_target_id) = import_table.get(&import_key) {
678                        if import_target_id != &entity.id
679                            && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
680                            && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
681                        {
682                            let ref_type = infer_ref_type(&entity.content, &ref_name);
683                            entity_edges.push((
684                                entity.id.clone(),
685                                import_target_id.clone(),
686                                ref_type,
687                            ));
688                        }
689                        continue;
690                    }
691
692                    if let Some(target_ids) = symbol_table.get(ref_name) {
693                        let target = target_ids
694                            .iter()
695                            .find(|id| {
696                                *id != &entity.id
697                                    && entity_map
698                                        .get(*id)
699                                        .map_or(false, |e| e.file_path == entity.file_path)
700                            });
701
702                        if let Some(target_id) = target {
703                            if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
704                                || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
705                            {
706                                continue;
707                            }
708                            let ref_type = infer_ref_type(&entity.content, &ref_name);
709                            entity_edges.push((
710                                entity.id.clone(),
711                                target_id.clone(),
712                                ref_type,
713                            ));
714                        }
715                    }
716                }
717                entity_edges
718            })
719            .collect();
720
721        // Merge scope edges + bag-of-words edges + kept cached edges
722        let mut all_resolved: Vec<(String, String, RefType)> = scope_edges;
723        all_resolved.extend(resolved_refs);
724        let mut seen_edges: HashSet<(String, String)> = HashSet::new();
725        all_resolved.retain(|e| seen_edges.insert((e.0.clone(), e.1.clone())));
726
727        // Build final edge list: kept edges + newly resolved edges
728        let mut edges: Vec<EntityRef> = Vec::with_capacity(kept_edges.len() + all_resolved.len());
729        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
730        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
731
732        // Track all edge pairs for dedup
733        let mut all_edge_pairs: HashSet<(String, String)> = HashSet::new();
734
735        // Add kept cached edges
736        for edge in kept_edges {
737            all_edge_pairs.insert((edge.from_entity.clone(), edge.to_entity.clone()));
738            dependents
739                .entry(edge.to_entity.clone())
740                .or_default()
741                .push(edge.from_entity.clone());
742            dependencies
743                .entry(edge.from_entity.clone())
744                .or_default()
745                .push(edge.to_entity.clone());
746            edges.push(edge);
747        }
748
749        // Add newly resolved edges, dedup against kept edges
750        for (from_entity, to_entity, ref_type) in all_resolved {
751            if !all_edge_pairs.insert((from_entity.clone(), to_entity.clone())) {
752                continue;
753            }
754            dependents
755                .entry(to_entity.clone())
756                .or_default()
757                .push(from_entity.clone());
758            dependencies
759                .entry(from_entity.clone())
760                .or_default()
761                .push(to_entity.clone());
762            edges.push(EntityRef {
763                from_entity,
764                to_entity,
765                ref_type,
766            });
767        }
768
769        let graph = EntityGraph {
770            entities: entity_map,
771            edges,
772            dependents,
773            dependencies,
774        };
775
776        (graph, all_entities)
777    }
778
779    /// Get entities that depend on the given entity (reverse deps).
780    pub fn get_dependents(&self, entity_id: &str) -> Vec<&EntityInfo> {
781        self.dependents
782            .get(entity_id)
783            .map(|ids| {
784                ids.iter()
785                    .filter_map(|id| self.entities.get(id))
786                    .collect()
787            })
788            .unwrap_or_default()
789    }
790
791    /// Get entities that the given entity depends on (forward deps).
792    pub fn get_dependencies(&self, entity_id: &str) -> Vec<&EntityInfo> {
793        self.dependencies
794            .get(entity_id)
795            .map(|ids| {
796                ids.iter()
797                    .filter_map(|id| self.entities.get(id))
798                    .collect()
799            })
800            .unwrap_or_default()
801    }
802
803    /// Impact analysis: if the given entity changes, what else might be affected?
804    /// Returns all transitive dependents (breadth-first), capped at 10k.
805    pub fn impact_analysis(&self, entity_id: &str) -> Vec<&EntityInfo> {
806        self.impact_analysis_capped(entity_id, 10_000)
807    }
808
809    /// Depth-limited impact analysis. Returns transitive dependents with their BFS depth.
810    /// `max_depth == 0` means unlimited. Default depth of 2 covers direct + one transitive level.
811    pub fn impact_analysis_bounded(&self, entity_id: &str, max_depth: usize) -> Vec<(&EntityInfo, usize)> {
812        let mut visited: HashSet<&str> = HashSet::new();
813        let mut queue: std::collections::VecDeque<(&str, usize)> = std::collections::VecDeque::new();
814        let mut result = Vec::new();
815
816        let start_key = match self.entities.get_key_value(entity_id) {
817            Some((k, _)) => k.as_str(),
818            None => return result,
819        };
820
821        queue.push_back((start_key, 0));
822        visited.insert(start_key);
823
824        while let Some((current, depth)) = queue.pop_front() {
825            if let Some(deps) = self.dependents.get(current) {
826                let next_depth = depth + 1;
827                if max_depth > 0 && next_depth > max_depth {
828                    continue;
829                }
830                for dep in deps {
831                    if visited.insert(dep.as_str()) {
832                        if let Some(info) = self.entities.get(dep.as_str()) {
833                            result.push((info, next_depth));
834                        }
835                        queue.push_back((dep.as_str(), next_depth));
836                    }
837                }
838            }
839        }
840
841        result
842    }
843
844    /// Impact analysis with a cap on maximum nodes visited.
845    /// Returns transitive dependents up to the cap. Uses borrowed strings.
846    pub fn impact_analysis_capped(&self, entity_id: &str, max_visited: usize) -> Vec<&EntityInfo> {
847        let mut visited: HashSet<&str> = HashSet::new();
848        let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
849        let mut result = Vec::new();
850
851        let start_key = match self.entities.get_key_value(entity_id) {
852            Some((k, _)) => k.as_str(),
853            None => return result,
854        };
855
856        queue.push_back(start_key);
857        visited.insert(start_key);
858
859        while let Some(current) = queue.pop_front() {
860            if result.len() >= max_visited {
861                break;
862            }
863            if let Some(deps) = self.dependents.get(current) {
864                for dep in deps {
865                    if visited.insert(dep.as_str()) {
866                        if let Some(info) = self.entities.get(dep.as_str()) {
867                            result.push(info);
868                        }
869                        queue.push_back(dep.as_str());
870                        if result.len() >= max_visited {
871                            break;
872                        }
873                    }
874                }
875            }
876        }
877
878        result
879    }
880
881    /// Count transitive dependents without collecting them (faster for large graphs).
882    /// Uses borrowed strings to avoid allocation overhead.
883    pub fn impact_count(&self, entity_id: &str, max_count: usize) -> usize {
884        let mut visited: HashSet<&str> = HashSet::new();
885        let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
886        let mut count = 0;
887
888        // We need entity_id to live long enough; look it up in our entities map
889        let start_key = match self.entities.get_key_value(entity_id) {
890            Some((k, _)) => k.as_str(),
891            None => return 0,
892        };
893
894        queue.push_back(start_key);
895        visited.insert(start_key);
896
897        while let Some(current) = queue.pop_front() {
898            if count >= max_count {
899                break;
900            }
901            if let Some(deps) = self.dependents.get(current) {
902                for dep in deps {
903                    if visited.insert(dep.as_str()) {
904                        count += 1;
905                        queue.push_back(dep.as_str());
906                        if count >= max_count {
907                            break;
908                        }
909                    }
910                }
911            }
912        }
913
914        count
915    }
916
917    /// Filter entities to those that look like tests.
918    /// Uses name heuristics, file path patterns, and content patterns.
919    pub fn filter_test_entities(&self, entities: &[crate::model::entity::SemanticEntity]) -> HashSet<String> {
920        let mut test_ids = HashSet::new();
921        for entity in entities {
922            if is_test_entity(entity) {
923                test_ids.insert(entity.id.clone());
924            }
925        }
926        test_ids
927    }
928
929    /// Impact analysis filtered to test entities only.
930    /// Returns transitive dependents that are test functions/methods.
931    pub fn test_impact(
932        &self,
933        entity_id: &str,
934        all_entities: &[crate::model::entity::SemanticEntity],
935    ) -> Vec<&EntityInfo> {
936        let test_ids = self.filter_test_entities(all_entities);
937        let impact = self.impact_analysis(entity_id);
938        impact
939            .into_iter()
940            .filter(|info| test_ids.contains(&info.id))
941            .collect()
942    }
943
944    /// Incrementally update the graph from a set of changed files.
945    ///
946    /// Instead of rebuilding the entire graph, this only re-extracts entities
947    /// from changed files and re-resolves their references. This is faster
948    /// than a full rebuild when only a few files changed.
949    ///
950    /// For each changed file:
951    /// - Deleted: remove all entities from that file, prune edges
952    /// - Added/Modified: remove old entities, extract new ones, rebuild references
953    /// - Renamed: update file paths in entity info
954    pub fn update_from_changes(
955        &mut self,
956        changed_files: &[FileChange],
957        root: &Path,
958        registry: &ParserRegistry,
959    ) {
960        let mut affected_files: HashSet<String> = HashSet::new();
961        let mut new_entities: Vec<SemanticEntity> = Vec::new();
962
963        for change in changed_files {
964            affected_files.insert(change.file_path.clone());
965            if let Some(ref old_path) = change.old_file_path {
966                affected_files.insert(old_path.clone());
967            }
968
969            match change.status {
970                FileStatus::Deleted => {
971                    self.remove_entities_for_file(&change.file_path);
972                }
973                FileStatus::Renamed => {
974                    // Update file paths for renamed files
975                    if let Some(ref old_path) = change.old_file_path {
976                        self.remove_entities_for_file(old_path);
977                    }
978                    // Extract entities from the new file
979                    if let Some(entities) = self.extract_file_entities(
980                        &change.file_path,
981                        change.after_content.as_deref(),
982                        root,
983                        registry,
984                    ) {
985                        new_entities.extend(entities);
986                    }
987                }
988                FileStatus::Added | FileStatus::Modified => {
989                    // Remove old entities for this file
990                    self.remove_entities_for_file(&change.file_path);
991                    // Extract new entities
992                    if let Some(entities) = self.extract_file_entities(
993                        &change.file_path,
994                        change.after_content.as_deref(),
995                        root,
996                        registry,
997                    ) {
998                        new_entities.extend(entities);
999                    }
1000                }
1001            }
1002        }
1003
1004        // Add new entities to the entity map
1005        for entity in &new_entities {
1006            self.entities.insert(
1007                entity.id.clone(),
1008                EntityInfo {
1009                    id: entity.id.clone(),
1010                    name: entity.name.clone(),
1011                    entity_type: entity.entity_type.clone(),
1012                    file_path: entity.file_path.clone(),
1013                    parent_id: entity.parent_id.clone(),
1014                    start_line: entity.start_line,
1015                    end_line: entity.end_line,
1016                },
1017            );
1018        }
1019
1020        // Rebuild the global symbol table from all current entities
1021        let symbol_table = self.build_symbol_table();
1022
1023        // Re-resolve references for new entities
1024        for entity in &new_entities {
1025            self.resolve_entity_references(entity, &symbol_table);
1026        }
1027
1028        // Also re-resolve references for entities in OTHER files that might
1029        // reference entities in changed files (their targets may have changed)
1030        let changed_entity_names: HashSet<String> = new_entities
1031            .iter()
1032            .map(|e| e.name.clone())
1033            .collect();
1034
1035        // Find entities in unchanged files that reference any changed entity name
1036        let entities_to_recheck: Vec<String> = self
1037            .entities
1038            .values()
1039            .filter(|e| !affected_files.contains(&e.file_path))
1040            .filter(|e| {
1041                self.dependencies
1042                    .get(&e.id)
1043                    .map_or(false, |deps| {
1044                        deps.iter().any(|dep_id| {
1045                            self.entities
1046                                .get(dep_id)
1047                                .map_or(false, |dep| changed_entity_names.contains(&dep.name))
1048                        })
1049                    })
1050            })
1051            .map(|e| e.id.clone())
1052            .collect();
1053
1054        // We don't have the full SemanticEntity for unchanged files, so we skip
1055        // deep re-resolution here. The forward/reverse indexes are already updated
1056        // by remove_entities_for_file and resolve_entity_references.
1057        // For entities that had dangling references (their target was deleted),
1058        // the edges were already pruned.
1059        let _ = entities_to_recheck; // acknowledge but don't act on for now
1060    }
1061
1062    /// Extract entities from a file, using provided content or reading from disk.
1063    fn extract_file_entities(
1064        &self,
1065        file_path: &str,
1066        content: Option<&str>,
1067        root: &Path,
1068        registry: &ParserRegistry,
1069    ) -> Option<Vec<SemanticEntity>> {
1070        let content = if let Some(c) = content {
1071            c.to_string()
1072        } else {
1073            let full_path = root.join(file_path);
1074            std::fs::read_to_string(&full_path).ok()?
1075        };
1076
1077        Some(registry.extract_entities(file_path, &content))
1078    }
1079
1080    /// Remove all entities belonging to a specific file and prune their edges.
1081    fn remove_entities_for_file(&mut self, file_path: &str) {
1082        // Collect entity IDs to remove
1083        let ids_to_remove: Vec<String> = self
1084            .entities
1085            .values()
1086            .filter(|e| e.file_path == file_path)
1087            .map(|e| e.id.clone())
1088            .collect();
1089
1090        let id_set: HashSet<&str> = ids_to_remove.iter().map(|s| s.as_str()).collect();
1091
1092        // Remove from entity map
1093        for id in &ids_to_remove {
1094            self.entities.remove(id);
1095        }
1096
1097        // Remove edges involving these entities
1098        self.edges
1099            .retain(|e| !id_set.contains(e.from_entity.as_str()) && !id_set.contains(e.to_entity.as_str()));
1100
1101        // Clean up dependency/dependent indexes
1102        for id in &ids_to_remove {
1103            // Remove forward deps
1104            if let Some(deps) = self.dependencies.remove(id) {
1105                // Also remove from reverse index
1106                for dep in &deps {
1107                    if let Some(dependents) = self.dependents.get_mut(dep) {
1108                        dependents.retain(|d| d != id);
1109                    }
1110                }
1111            }
1112            // Remove reverse deps
1113            if let Some(deps) = self.dependents.remove(id) {
1114                // Also remove from forward index
1115                for dep in &deps {
1116                    if let Some(dependencies) = self.dependencies.get_mut(dep) {
1117                        dependencies.retain(|d| d != id);
1118                    }
1119                }
1120            }
1121        }
1122    }
1123
1124    /// Build a symbol table from all current entities.
1125    fn build_symbol_table(&self) -> HashMap<String, Vec<String>> {
1126        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::new();
1127        for entity in self.entities.values() {
1128            symbol_table
1129                .entry(entity.name.clone())
1130                .or_default()
1131                .push(entity.id.clone());
1132        }
1133        symbol_table
1134    }
1135
1136    /// Resolve references for a single entity against the symbol table.
1137    fn resolve_entity_references(
1138        &mut self,
1139        entity: &SemanticEntity,
1140        symbol_table: &HashMap<String, Vec<String>>,
1141    ) {
1142        let refs = extract_references_from_content(&entity.content, &entity.name);
1143
1144        for ref_name in refs {
1145            if let Some(target_ids) = symbol_table.get(ref_name) {
1146                let target = target_ids
1147                    .iter()
1148                    .find(|id| {
1149                        *id != &entity.id
1150                            && self
1151                                .entities
1152                                .get(*id)
1153                                .map_or(false, |e| e.file_path == entity.file_path)
1154                    })
1155                    .or_else(|| target_ids.iter().find(|id| *id != &entity.id));
1156
1157                if let Some(target_id) = target {
1158                    let ref_type = infer_ref_type(&entity.content, &ref_name);
1159                    self.edges.push(EntityRef {
1160                        from_entity: entity.id.clone(),
1161                        to_entity: target_id.clone(),
1162                        ref_type,
1163                    });
1164                    self.dependents
1165                        .entry(target_id.clone())
1166                        .or_default()
1167                        .push(entity.id.clone());
1168                    self.dependencies
1169                        .entry(entity.id.clone())
1170                        .or_default()
1171                        .push(target_id.clone());
1172                }
1173            }
1174        }
1175    }
1176}
1177
1178/// Check if an entity looks like a test based on name, file path, and content patterns.
1179fn is_test_entity(entity: &crate::model::entity::SemanticEntity) -> bool {
1180    let name = &entity.name;
1181    let path = &entity.file_path;
1182    let content = &entity.content;
1183
1184    // Name patterns
1185    if name.starts_with("test_") || name.starts_with("Test") || name.ends_with("_test") || name.ends_with("Test") {
1186        return true;
1187    }
1188    if name.starts_with("it_") || name.starts_with("describe_") || name.starts_with("spec_") {
1189        return true;
1190    }
1191
1192    // File path patterns
1193    let path_lower = path.to_lowercase();
1194    let in_test_file = path_lower.contains("/test/")
1195        || path_lower.contains("/tests/")
1196        || path_lower.contains("/spec/")
1197        || path_lower.contains("_test.")
1198        || path_lower.contains(".test.")
1199        || path_lower.contains("_spec.")
1200        || path_lower.contains(".spec.");
1201
1202    // Content patterns (test annotations/decorators)
1203    let has_test_marker = content.contains("#[test]")
1204        || content.contains("#[cfg(test)]")
1205        || content.contains("@Test")
1206        || content.contains("@pytest")
1207        || content.contains("@test")
1208        || content.contains("describe(")
1209        || content.contains("it(")
1210        || content.contains("test(");
1211
1212    in_test_file && has_test_marker
1213}
1214
1215/// Build import table: maps (file_path, imported_name) → target entity ID.
1216///
1217/// Parses `from X import Y` / `import X` / `use X` style statements from entity content
1218/// and resolves Y to the entity it refers to in the symbol table.
1219fn build_import_table(
1220    root: &Path,
1221    file_paths: &[String],
1222    symbol_table: &HashMap<String, Vec<String>>,
1223    entity_map: &HashMap<String, EntityInfo>,
1224) -> HashMap<(String, String), String> {
1225    let mut import_table: HashMap<(String, String), String> = HashMap::new();
1226
1227    for file_path in file_paths {
1228        let full_path = root.join(file_path);
1229        let content = match std::fs::read_to_string(&full_path) {
1230            Ok(c) => c,
1231            Err(_) => continue,
1232        };
1233
1234        // Join multi-line imports into single logical lines
1235        // e.g. "from .cookies import (\n    foo,\n    bar,\n)" -> "from .cookies import foo, bar"
1236        let mut logical_lines: Vec<String> = Vec::new();
1237        let mut current_line = String::new();
1238        let mut in_parens = false;
1239
1240        for line in content.lines() {
1241            let trimmed = line.trim();
1242            if in_parens {
1243                // Strip parentheses and comments
1244                let clean = trimmed.trim_end_matches(|c: char| c == ')' || c == ',');
1245                let clean = clean.split('#').next().unwrap_or(clean).trim();
1246                if !clean.is_empty() && clean != "(" {
1247                    current_line.push_str(", ");
1248                    current_line.push_str(clean);
1249                }
1250                if trimmed.contains(')') {
1251                    in_parens = false;
1252                    logical_lines.push(std::mem::take(&mut current_line));
1253                }
1254            } else if trimmed.starts_with("from ") && trimmed.contains(" import ") {
1255                if trimmed.contains('(') && !trimmed.contains(')') {
1256                    // Multi-line import starts
1257                    in_parens = true;
1258                    // Take everything before the paren
1259                    let before_paren = trimmed.split('(').next().unwrap_or(trimmed);
1260                    current_line = before_paren.trim().to_string();
1261                    // Also grab anything after the paren on this line
1262                    if let Some(after) = trimmed.split('(').nth(1) {
1263                        let after = after.trim().trim_end_matches(')').trim();
1264                        if !after.is_empty() {
1265                            current_line.push(' ');
1266                            current_line.push_str(after);
1267                        }
1268                    }
1269                } else {
1270                    logical_lines.push(trimmed.to_string());
1271                }
1272            }
1273        }
1274
1275        for logical_line in &logical_lines {
1276            if let Some(rest) = logical_line.strip_prefix("from ") {
1277                // Find " import " or " import," (multi-line imports join with comma)
1278                let import_match = rest.find(" import ")
1279                    .map(|pos| (pos, 8))
1280                    .or_else(|| rest.find(" import,").map(|pos| (pos, 8)));
1281                if let Some((import_pos, skip)) = import_match {
1282                    let module_path = &rest[..import_pos];
1283                    let names_str = &rest[import_pos + skip..];
1284
1285                    let source_module = module_path
1286                        .trim_start_matches('.')
1287                        .rsplit('.')
1288                        .next()
1289                        .unwrap_or(module_path.trim_start_matches('.'));
1290
1291                    for name_part in names_str.split(',') {
1292                        let name_part = name_part.trim();
1293                        let imported_name = name_part.split_whitespace().next().unwrap_or(name_part);
1294                        // Strip trailing parens/punctuation
1295                        let imported_name = imported_name.trim_matches(|c: char| c == '(' || c == ')' || c == ',');
1296                        if imported_name.is_empty() {
1297                            continue;
1298                        }
1299
1300                        if let Some(target_ids) = symbol_table.get(imported_name) {
1301                            let target = target_ids.iter().find(|id| {
1302                                entity_map.get(*id).map_or(false, |e| {
1303                                    let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1304                                    let stem = stem.strip_suffix(".py")
1305                                        .or_else(|| stem.strip_suffix(".ts"))
1306                                        .or_else(|| stem.strip_suffix(".js"))
1307                                        .or_else(|| stem.strip_suffix(".rs"))
1308                                        .unwrap_or(stem);
1309                                    stem == source_module
1310                                })
1311                            });
1312                            if let Some(target_id) = target {
1313                                import_table.insert(
1314                                    (file_path.clone(), imported_name.to_string()),
1315                                    target_id.clone(),
1316                                );
1317                            }
1318                        }
1319                    }
1320                }
1321            }
1322        }
1323
1324        // JS/TS imports: import { foo, bar as baz } from './module'
1325        //                import Foo from './module'
1326        let is_js_ts = file_path.ends_with(".js") || file_path.ends_with(".ts")
1327            || file_path.ends_with(".jsx") || file_path.ends_with(".tsx");
1328
1329        if is_js_ts {
1330            static JS_NAMED_RE: LazyLock<Regex> = LazyLock::new(|| {
1331                Regex::new(r#"import\s*\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#).unwrap()
1332            });
1333            static JS_DEFAULT_RE: LazyLock<Regex> = LazyLock::new(|| {
1334                Regex::new(r#"import\s+(?:type\s+)?([A-Za-z_]\w*)\s+from\s*['"]([^'"]+)['"]"#).unwrap()
1335            });
1336
1337            for cap in JS_NAMED_RE.captures_iter(&content) {
1338                let names_str = cap.get(1).unwrap().as_str();
1339                let module_path = cap.get(2).unwrap().as_str();
1340                let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1341                let source_module = strip_js_ext(source_module);
1342
1343                for name_part in names_str.split(',') {
1344                    let name_part = name_part.trim();
1345                    if name_part.is_empty() { continue; }
1346
1347                    // Handle "foo as bar" aliases and "type foo" prefixes
1348                    let (original_name, local_name) = if let Some(pos) = name_part.find(" as ") {
1349                        let orig = name_part[..pos].trim();
1350                        let local = name_part[pos + 4..].trim();
1351                        let orig = orig.strip_prefix("type ").unwrap_or(orig);
1352                        (orig, local)
1353                    } else {
1354                        let name = name_part.strip_prefix("type ").unwrap_or(name_part);
1355                        (name, name)
1356                    };
1357
1358                    if original_name.is_empty() || local_name.is_empty() { continue; }
1359
1360                    if let Some(target_ids) = symbol_table.get(original_name) {
1361                        let target = target_ids.iter().find(|id| {
1362                            entity_map.get(*id).map_or(false, |e| {
1363                                let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1364                                let stem = strip_file_ext(stem);
1365                                stem == source_module
1366                            })
1367                        });
1368                        if let Some(target_id) = target {
1369                            import_table.insert(
1370                                (file_path.clone(), local_name.to_string()),
1371                                target_id.clone(),
1372                            );
1373                        }
1374                    }
1375                }
1376            }
1377
1378            for cap in JS_DEFAULT_RE.captures_iter(&content) {
1379                let local_name = cap.get(1).unwrap().as_str();
1380                let module_path = cap.get(2).unwrap().as_str();
1381                let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1382                let source_module = strip_js_ext(source_module);
1383
1384                if let Some(target_ids) = symbol_table.get(local_name) {
1385                    let target = target_ids.iter().find(|id| {
1386                        entity_map.get(*id).map_or(false, |e| {
1387                            let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1388                            let stem = strip_file_ext(stem);
1389                            stem == source_module
1390                        })
1391                    });
1392                    if let Some(target_id) = target {
1393                        import_table.insert(
1394                            (file_path.clone(), local_name.to_string()),
1395                            target_id.clone(),
1396                        );
1397                    }
1398                }
1399            }
1400        }
1401
1402        // Rust imports: use crate::module::Name; / use crate::module::{A, B};
1403        // Also: use super::module::Name; / use self::module::Name;
1404        let is_rust = file_path.ends_with(".rs");
1405        if is_rust {
1406            static RUST_USE_SIMPLE_RE: LazyLock<Regex> = LazyLock::new(|| {
1407                // use crate::config::Config;
1408                // use super::types::Entity;
1409                // use config::Config;  (bare module path in binary crates)
1410                Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*;").unwrap()
1411            });
1412            static RUST_USE_GROUP_RE: LazyLock<Regex> = LazyLock::new(|| {
1413                // use crate::types::{Entity, ParseError};
1414                // use types::{Entity, ParseError};  (bare module path)
1415                Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)::\{([^}]+)\}\s*;").unwrap()
1416            });
1417
1418            // Build a map: module_name -> list of file paths whose stem matches
1419            // For "use crate::config::Config", module is "config", name is "Config"
1420            for cap in RUST_USE_SIMPLE_RE.captures_iter(&content) {
1421                let full_path_str = cap.get(1).unwrap().as_str();
1422                let parts: Vec<&str> = full_path_str.split("::").collect();
1423                if parts.is_empty() { continue; }
1424
1425                // Last part is the imported name, everything before is the module path
1426                let imported_name = parts[parts.len() - 1];
1427                // The module is the second-to-last part, or the first if only one part
1428                let source_module = if parts.len() >= 2 {
1429                    parts[parts.len() - 2]
1430                } else {
1431                    parts[0]
1432                };
1433
1434                resolve_rust_import(
1435                    file_path, imported_name, source_module,
1436                    symbol_table, entity_map, &mut import_table,
1437                );
1438            }
1439
1440            for cap in RUST_USE_GROUP_RE.captures_iter(&content) {
1441                let module_path = cap.get(1).unwrap().as_str();
1442                let names_str = cap.get(2).unwrap().as_str();
1443
1444                // source_module is the last segment of the module path
1445                let source_module = module_path.rsplit("::").next().unwrap_or(module_path);
1446
1447                for name_part in names_str.split(',') {
1448                    let name_part = name_part.trim();
1449                    // Handle "Name as Alias"
1450                    let (original, local) = if let Some(pos) = name_part.find(" as ") {
1451                        (&name_part[..pos], name_part[pos + 4..].trim())
1452                    } else {
1453                        (name_part, name_part)
1454                    };
1455                    let original = original.trim();
1456                    let local = local.trim();
1457                    if original.is_empty() || local.is_empty() { continue; }
1458
1459                    resolve_rust_import(
1460                        file_path, original, source_module,
1461                        symbol_table, entity_map, &mut import_table,
1462                    );
1463                    // If aliased, also map the local name
1464                    if local != original {
1465                        if let Some(target) = import_table.get(&(file_path.clone(), original.to_string())).cloned() {
1466                            import_table.insert(
1467                                (file_path.clone(), local.to_string()),
1468                                target,
1469                            );
1470                        }
1471                    }
1472                }
1473            }
1474        }
1475
1476        // Go imports: import "module/path" or import ( "module/path" )
1477        // Go uses the last path component as the package name
1478        let is_go = file_path.ends_with(".go");
1479        if is_go {
1480            static GO_IMPORT_RE: LazyLock<Regex> = LazyLock::new(|| {
1481                Regex::new(r#"(?m)"([^"]+)""#).unwrap()
1482            });
1483
1484            // Only look in import blocks
1485            let import_section = extract_go_import_section(&content);
1486            for cap in GO_IMPORT_RE.captures_iter(&import_section) {
1487                let import_path = cap.get(1).unwrap().as_str();
1488                let pkg_name = import_path.rsplit('/').next().unwrap_or(import_path);
1489
1490                // Map all entities from files matching this package name
1491                for (name, target_ids) in symbol_table.iter() {
1492                    for target_id in target_ids {
1493                        if let Some(entity) = entity_map.get(target_id) {
1494                            let stem = entity.file_path.rsplit('/').next().unwrap_or(&entity.file_path);
1495                            let stem = strip_file_ext(stem);
1496                            // Go: file stem or directory matches package name
1497                            if stem == pkg_name || entity.file_path.contains(&format!("{}/", pkg_name)) {
1498                                import_table.insert(
1499                                    (file_path.clone(), name.clone()),
1500                                    target_id.clone(),
1501                                );
1502                            }
1503                        }
1504                    }
1505                }
1506            }
1507        }
1508    }
1509
1510    import_table
1511}
1512
1513/// Resolve a Rust import: find the target entity in the symbol table
1514/// by matching the imported name against entities in files whose stem matches source_module.
1515fn resolve_rust_import(
1516    file_path: &str,
1517    imported_name: &str,
1518    source_module: &str,
1519    symbol_table: &HashMap<String, Vec<String>>,
1520    entity_map: &HashMap<String, EntityInfo>,
1521    import_table: &mut HashMap<(String, String), String>,
1522) {
1523    if let Some(target_ids) = symbol_table.get(imported_name) {
1524        let target = target_ids.iter().find(|id| {
1525            entity_map.get(*id).map_or(false, |e| {
1526                let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1527                let stem = strip_file_ext(stem);
1528                stem == source_module
1529            })
1530        });
1531        if let Some(target_id) = target {
1532            import_table.insert(
1533                (file_path.to_string(), imported_name.to_string()),
1534                target_id.clone(),
1535            );
1536        }
1537    }
1538}
1539
1540/// Extract Go import section (everything inside import blocks).
1541fn extract_go_import_section(content: &str) -> String {
1542    let mut result = String::new();
1543    let mut in_import_block = false;
1544    for line in content.lines() {
1545        let trimmed = line.trim();
1546        if trimmed.starts_with("import (") {
1547            in_import_block = true;
1548            continue;
1549        }
1550        if trimmed.starts_with("import \"") || trimmed.starts_with("import `") {
1551            result.push_str(trimmed);
1552            result.push('\n');
1553            continue;
1554        }
1555        if in_import_block {
1556            if trimmed == ")" {
1557                in_import_block = false;
1558            } else {
1559                result.push_str(trimmed);
1560                result.push('\n');
1561            }
1562        }
1563    }
1564    result
1565}
1566
1567/// Strip JS/TS extensions from a module name.
1568fn strip_js_ext(s: &str) -> &str {
1569    s.strip_suffix(".js")
1570        .or_else(|| s.strip_suffix(".ts"))
1571        .or_else(|| s.strip_suffix(".jsx"))
1572        .or_else(|| s.strip_suffix(".tsx"))
1573        .unwrap_or(s)
1574}
1575
1576/// Strip common file extensions from a filename.
1577fn strip_file_ext(s: &str) -> &str {
1578    s.strip_suffix(".py")
1579        .or_else(|| s.strip_suffix(".ts"))
1580        .or_else(|| s.strip_suffix(".js"))
1581        .or_else(|| s.strip_suffix(".tsx"))
1582        .or_else(|| s.strip_suffix(".jsx"))
1583        .or_else(|| s.strip_suffix(".rs"))
1584        .unwrap_or(s)
1585}
1586
1587/// Strip comments and string literals from content to avoid false references.
1588/// Returns a new string with comments/docstrings replaced by spaces.
1589fn strip_comments_and_strings(content: &str) -> String {
1590    let bytes = content.as_bytes();
1591    let len = bytes.len();
1592    let mut result = vec![b' '; len];
1593    let mut i = 0;
1594
1595    while i < len {
1596        // Triple-quoted strings (Python docstrings)
1597        if i + 2 < len && bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1598            i += 3;
1599            while i + 2 < len {
1600                if bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1601                    i += 3;
1602                    break;
1603                }
1604                i += 1;
1605            }
1606            continue;
1607        }
1608        if i + 2 < len && bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1609            i += 3;
1610            while i + 2 < len {
1611                if bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1612                    i += 3;
1613                    break;
1614                }
1615                i += 1;
1616            }
1617            continue;
1618        }
1619        // Double-quoted strings
1620        if bytes[i] == b'"' {
1621            i += 1;
1622            while i < len {
1623                if bytes[i] == b'\\' { i += 2; continue; }
1624                if bytes[i] == b'"' { i += 1; break; }
1625                i += 1;
1626            }
1627            continue;
1628        }
1629        // Single-quoted strings
1630        if bytes[i] == b'\'' {
1631            i += 1;
1632            while i < len {
1633                if bytes[i] == b'\\' { i += 2; continue; }
1634                if bytes[i] == b'\'' { i += 1; break; }
1635                i += 1;
1636            }
1637            continue;
1638        }
1639        // Python/Ruby single-line comments
1640        if bytes[i] == b'#' {
1641            while i < len && bytes[i] != b'\n' { i += 1; }
1642            continue;
1643        }
1644        // C-style single-line comments
1645        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'/' {
1646            while i < len && bytes[i] != b'\n' { i += 1; }
1647            continue;
1648        }
1649        // C-style block comments
1650        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
1651            i += 2;
1652            while i + 1 < len {
1653                if bytes[i] == b'*' && bytes[i + 1] == b'/' { i += 2; break; }
1654                i += 1;
1655            }
1656            continue;
1657        }
1658        // Regular code: copy through
1659        result[i] = bytes[i];
1660        i += 1;
1661    }
1662
1663    String::from_utf8_lossy(&result).into_owned()
1664}
1665
1666/// Extract dot-chains (receiver.member) from content for precise resolution.
1667/// Returns unique (receiver, member) pairs found in the content.
1668fn extract_dot_chains<'a>(content: &'a str) -> Vec<(&'a str, &'a str)> {
1669    static DOT_CHAIN_RE: LazyLock<Regex> = LazyLock::new(|| {
1670        Regex::new(r"\b([A-Za-z_]\w*)\.([A-Za-z_]\w*)").unwrap()
1671    });
1672
1673    let mut chains = Vec::new();
1674    let mut seen: HashSet<(&str, &str)> = HashSet::new();
1675    for cap in DOT_CHAIN_RE.captures_iter(content) {
1676        let receiver = cap.get(1).unwrap().as_str();
1677        let member = cap.get(2).unwrap().as_str();
1678        if seen.insert((receiver, member)) {
1679            chains.push((receiver, member));
1680        }
1681    }
1682    chains
1683}
1684
1685/// Extract identifier references from entity content using simple token analysis.
1686/// Strips comments and strings first to avoid false positives from docstrings.
1687/// Returns borrowed slices from the stripped content.
1688fn extract_references_from_content<'a>(content: &'a str, own_name: &str) -> Vec<&'a str> {
1689    // We need to figure out which words appear only in comments/strings vs real code.
1690    // Strategy: strip comments/strings, then only accept words that appear in the stripped version.
1691    let stripped = strip_comments_and_strings(content);
1692    let stripped_words: HashSet<&str> = stripped
1693        .split(|c: char| !c.is_alphanumeric() && c != '_')
1694        .filter(|w| !w.is_empty())
1695        .collect();
1696
1697    let mut refs = Vec::new();
1698    let mut seen: HashSet<&str> = HashSet::new();
1699
1700    for word in content.split(|c: char| !c.is_alphanumeric() && c != '_') {
1701        if word.is_empty() || word == own_name {
1702            continue;
1703        }
1704        if is_keyword(word) || word.len() < 2 {
1705            continue;
1706        }
1707        // Skip very short lowercase identifiers (likely local vars: i, x, a, ok, id, etc.)
1708        if word.starts_with(|c: char| c.is_lowercase()) && word.len() < 3 {
1709            continue;
1710        }
1711        if !word.starts_with(|c: char| c.is_alphabetic() || c == '_') {
1712            continue;
1713        }
1714        // Skip common local variable names that create false graph edges
1715        if is_common_local_name(word) {
1716            continue;
1717        }
1718        // Skip words that only appear in comments/strings
1719        if !stripped_words.contains(word) {
1720            continue;
1721        }
1722        if seen.insert(word) {
1723            refs.push(word);
1724        }
1725    }
1726
1727    refs
1728}
1729
1730static COMMON_LOCAL_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1731    [
1732        "result", "results", "data", "config", "value", "values",
1733        "item", "items", "input", "output", "args", "opts",
1734        "name", "path", "file", "line", "count", "index",
1735        "temp", "prev", "next", "curr", "current", "node",
1736        "left", "right", "root", "head", "tail", "body",
1737        "text", "content", "source", "target", "entry",
1738        "error", "errors", "message", "response", "request",
1739        "context", "state", "props", "event", "handler",
1740        "callback", "options", "params", "query", "list",
1741        "base", "info", "meta", "kind", "mode", "flag",
1742        "size", "length", "width", "height", "start", "stop",
1743        "begin", "done", "found", "status", "code",
1744    ].into_iter().collect()
1745});
1746
1747/// Names that are overwhelmingly local variables, not entity references.
1748/// These create massive false-positive edges in the dependency graph.
1749fn is_common_local_name(word: &str) -> bool {
1750    COMMON_LOCAL_NAMES.contains(word)
1751}
1752
1753/// Infer reference type from context using word-boundary-aware matching.
1754fn infer_ref_type(content: &str, ref_name: &str) -> RefType {
1755    // Check if it's a function call: ref_name followed by ( with word boundary before.
1756    // Avoids format! allocation by finding ref_name and checking the next char.
1757    let bytes = content.as_bytes();
1758    let name_bytes = ref_name.as_bytes();
1759    let mut search_start = 0;
1760    while let Some(rel_pos) = content[search_start..].find(ref_name) {
1761        let pos = search_start + rel_pos;
1762        let after = pos + name_bytes.len();
1763        // Check next char is '('
1764        if after < bytes.len() && bytes[after] == b'(' {
1765            // Verify word boundary before
1766            let is_boundary = pos == 0 || {
1767                let prev = bytes[pos - 1];
1768                !prev.is_ascii_alphanumeric() && prev != b'_'
1769            };
1770            if is_boundary {
1771                return RefType::Calls;
1772            }
1773        }
1774        // Advance past pos to the next char boundary to avoid slicing inside a multi-byte UTF-8 char.
1775        search_start = pos + 1;
1776        while search_start < content.len() && !content.is_char_boundary(search_start) {
1777            search_start += 1;
1778        }
1779    }
1780
1781    // Check if it's in an import/use statement (line-level, not substring)
1782    for line in content.lines() {
1783        let trimmed = line.trim();
1784        if (trimmed.starts_with("import ") || trimmed.starts_with("use ")
1785            || trimmed.starts_with("from ") || trimmed.starts_with("require("))
1786            && trimmed.contains(ref_name)
1787        {
1788            return RefType::Imports;
1789        }
1790    }
1791
1792    // Default to type reference
1793    RefType::TypeRef
1794}
1795
1796static KEYWORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1797    [
1798        // Common across languages
1799        "if", "else", "for", "while", "do", "switch", "case", "break",
1800        "continue", "return", "try", "catch", "finally", "throw",
1801        "new", "delete", "typeof", "instanceof", "in", "of",
1802        "true", "false", "null", "undefined", "void", "this",
1803        "super", "class", "extends", "implements", "interface",
1804        "enum", "const", "let", "var", "function", "async",
1805        "await", "yield", "import", "export", "default", "from",
1806        "as", "static", "public", "private", "protected",
1807        "abstract", "final", "override",
1808        // Rust
1809        "fn", "pub", "mod", "use", "struct", "impl", "trait",
1810        "where", "type", "self", "Self", "mut", "ref", "match",
1811        "loop", "move", "unsafe", "extern", "crate", "dyn",
1812        // Python
1813        "def", "elif", "except", "raise", "with",
1814        "pass", "lambda", "nonlocal", "global", "assert",
1815        "True", "False", "and", "or", "not", "is",
1816        // Go
1817        "func", "package", "range", "select", "chan", "go",
1818        "defer", "map", "make", "append", "len", "cap",
1819        // C/C++
1820        "auto", "register", "volatile", "sizeof", "typedef",
1821        "template", "typename", "namespace", "virtual", "inline",
1822        "constexpr", "nullptr", "noexcept", "explicit", "friend",
1823        "operator", "using", "cout", "endl", "cerr", "cin",
1824        "printf", "scanf", "malloc", "free", "NULL", "include",
1825        "ifdef", "ifndef", "endif", "define", "pragma",
1826        // Ruby
1827        "end", "then", "elsif", "unless", "until",
1828        "begin", "rescue", "ensure", "when", "require",
1829        "attr_accessor", "attr_reader", "attr_writer",
1830        "puts", "nil", "module", "defined",
1831        // C#
1832        "internal", "sealed", "readonly",
1833        "partial", "delegate", "event", "params", "out",
1834        "object", "decimal", "sbyte", "ushort", "uint",
1835        "ulong", "nint", "nuint", "dynamic",
1836        "get", "set", "value", "init", "record",
1837        // Types (primitives)
1838        "string", "number", "boolean", "int", "float", "double",
1839        "bool", "char", "byte", "i8", "i16", "i32", "i64",
1840        "u8", "u16", "u32", "u64", "f32", "f64", "usize",
1841        "isize", "str", "String", "Vec", "Option", "Result",
1842        "Box", "Arc", "Rc", "HashMap", "HashSet", "Some",
1843        "Ok", "Err",
1844    ].into_iter().collect()
1845});
1846
1847fn is_keyword(word: &str) -> bool {
1848    KEYWORDS.contains(word)
1849}
1850
1851#[cfg(test)]
1852mod tests {
1853    use super::*;
1854    use crate::git::types::{FileChange, FileStatus};
1855    use std::io::Write;
1856    use tempfile::TempDir;
1857
1858    fn create_test_repo() -> (TempDir, ParserRegistry) {
1859        let dir = TempDir::new().unwrap();
1860        let registry = crate::parser::plugins::create_default_registry();
1861        (dir, registry)
1862    }
1863
1864    fn write_file(dir: &Path, name: &str, content: &str) {
1865        let path = dir.join(name);
1866        if let Some(parent) = path.parent() {
1867            std::fs::create_dir_all(parent).unwrap();
1868        }
1869        let mut f = std::fs::File::create(path).unwrap();
1870        f.write_all(content.as_bytes()).unwrap();
1871    }
1872
1873    #[test]
1874    fn test_incremental_add_file() {
1875        let (dir, registry) = create_test_repo();
1876        let root = dir.path();
1877
1878        // Start with one file
1879        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1880        write_file(root, "b.ts", "export function bar() { return 1; }\n");
1881
1882        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
1883        assert_eq!(graph.entities.len(), 2);
1884
1885        // Add a new file
1886        write_file(root, "c.ts", "export function baz() { return foo(); }\n");
1887        graph.update_from_changes(
1888            &[FileChange {
1889                file_path: "c.ts".into(),
1890                status: FileStatus::Added,
1891                old_file_path: None,
1892                before_content: None,
1893                after_content: None, // will read from disk
1894            }],
1895            root,
1896            &registry,
1897        );
1898
1899        assert_eq!(graph.entities.len(), 3);
1900        assert!(graph.entities.contains_key("c.ts::function::baz"));
1901        // baz references foo
1902        let baz_deps = graph.get_dependencies("c.ts::function::baz");
1903        assert!(
1904            baz_deps.iter().any(|d| d.name == "foo"),
1905            "baz should depend on foo. Deps: {:?}",
1906            baz_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1907        );
1908    }
1909
1910    #[test]
1911    fn test_incremental_delete_file() {
1912        let (dir, registry) = create_test_repo();
1913        let root = dir.path();
1914
1915        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1916        write_file(root, "b.ts", "export function bar() { return 1; }\n");
1917
1918        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
1919        assert_eq!(graph.entities.len(), 2);
1920
1921        // Delete b.ts
1922        graph.update_from_changes(
1923            &[FileChange {
1924                file_path: "b.ts".into(),
1925                status: FileStatus::Deleted,
1926                old_file_path: None,
1927                before_content: None,
1928                after_content: None,
1929            }],
1930            root,
1931            &registry,
1932        );
1933
1934        assert_eq!(graph.entities.len(), 1);
1935        assert!(!graph.entities.contains_key("b.ts::function::bar"));
1936        // foo's dependency on bar should be pruned
1937        let foo_deps = graph.get_dependencies("a.ts::function::foo");
1938        assert!(
1939            foo_deps.is_empty(),
1940            "foo's deps should be empty after bar deleted. Deps: {:?}",
1941            foo_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1942        );
1943    }
1944
1945    #[test]
1946    fn test_incremental_modify_file() {
1947        let (dir, registry) = create_test_repo();
1948        let root = dir.path();
1949
1950        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1951        write_file(root, "b.ts", "export function bar() { return 1; }\nexport function baz() { return 2; }\n");
1952
1953        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
1954        assert_eq!(graph.entities.len(), 3);
1955
1956        // Modify a.ts to call baz instead of bar
1957        write_file(root, "a.ts", "export function foo() { return baz(); }\n");
1958        graph.update_from_changes(
1959            &[FileChange {
1960                file_path: "a.ts".into(),
1961                status: FileStatus::Modified,
1962                old_file_path: None,
1963                before_content: None,
1964                after_content: None,
1965            }],
1966            root,
1967            &registry,
1968        );
1969
1970        assert_eq!(graph.entities.len(), 3);
1971        // foo should now depend on baz, not bar
1972        let foo_deps = graph.get_dependencies("a.ts::function::foo");
1973        let dep_names: Vec<&str> = foo_deps.iter().map(|d| d.name.as_str()).collect();
1974        assert!(dep_names.contains(&"baz"), "foo should depend on baz after modification. Deps: {:?}", dep_names);
1975        assert!(!dep_names.contains(&"bar"), "foo should no longer depend on bar. Deps: {:?}", dep_names);
1976    }
1977
1978    #[test]
1979    fn test_incremental_with_content() {
1980        let (dir, registry) = create_test_repo();
1981        let root = dir.path();
1982
1983        write_file(root, "a.ts", "export function foo() { return 1; }\n");
1984        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into()], &registry);
1985        assert_eq!(graph.entities.len(), 1);
1986
1987        // Add file with content provided directly (no disk read needed)
1988        graph.update_from_changes(
1989            &[FileChange {
1990                file_path: "b.ts".into(),
1991                status: FileStatus::Added,
1992                old_file_path: None,
1993                before_content: None,
1994                after_content: Some("export function bar() { return foo(); }\n".into()),
1995            }],
1996            root,
1997            &registry,
1998        );
1999
2000        assert_eq!(graph.entities.len(), 2);
2001        let bar_deps = graph.get_dependencies("b.ts::function::bar");
2002        assert!(bar_deps.iter().any(|d| d.name == "foo"));
2003    }
2004
2005    #[test]
2006    fn test_extract_references() {
2007        let content = "function processData(input) {\n  const result = validateInput(input);\n  return transform(result);\n}";
2008        let refs = extract_references_from_content(content, "processData");
2009        assert!(refs.contains(&"validateInput"));
2010        assert!(refs.contains(&"transform"));
2011        assert!(!refs.contains(&"processData")); // self excluded
2012    }
2013
2014    #[test]
2015    fn test_extract_references_skips_keywords() {
2016        let content = "function foo() { if (true) { return false; } }";
2017        let refs = extract_references_from_content(content, "foo");
2018        assert!(!refs.contains(&"if"));
2019        assert!(!refs.contains(&"true"));
2020        assert!(!refs.contains(&"return"));
2021        assert!(!refs.contains(&"false"));
2022    }
2023
2024    #[test]
2025    fn test_infer_ref_type_call() {
2026        assert_eq!(
2027            infer_ref_type("validateInput(data)", "validateInput"),
2028            RefType::Calls,
2029        );
2030    }
2031
2032    #[test]
2033    fn test_infer_ref_type_type() {
2034        assert_eq!(
2035            infer_ref_type("let x: MyType = something", "MyType"),
2036            RefType::TypeRef,
2037        );
2038    }
2039
2040    #[test]
2041    fn test_infer_ref_type_multibyte_utf8() {
2042        // Ensure no panic when content contains multi-byte UTF-8 characters
2043        assert_eq!(
2044            infer_ref_type("let café = foo(x)", "foo"),
2045            RefType::Calls,
2046        );
2047        assert_eq!(
2048            infer_ref_type("class HandicapfrPublicationFieldsEnum:\n    É = 1\n    bar()", "bar"),
2049            RefType::Calls,
2050        );
2051        // No match should not panic either
2052        assert_eq!(
2053            infer_ref_type("// 日本語コメント\nlet x = 1", "missing"),
2054            RefType::TypeRef,
2055        );
2056    }
2057
2058    #[test]
2059    fn test_dot_chain_self_resolution() {
2060        let (dir, registry) = create_test_repo();
2061        let root = dir.path();
2062
2063        write_file(root, "service.py", "\
2064class MyService:
2065    def process(self):
2066        return self.validate()
2067
2068    def validate(self):
2069        return True
2070");
2071
2072        let (graph, _) = EntityGraph::build(root, &["service.py".into()], &registry);
2073
2074        // process should have an edge to validate via self.validate()
2075        let process_id = graph.entities.keys()
2076            .find(|id| id.contains("process"))
2077            .expect("process entity should exist");
2078        let deps = graph.get_dependencies(process_id);
2079        assert!(
2080            deps.iter().any(|d| d.name == "validate"),
2081            "process should depend on validate via self.validate(). Deps: {:?}",
2082            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2083        );
2084    }
2085
2086    #[test]
2087    fn test_dot_chain_this_resolution() {
2088        let (dir, registry) = create_test_repo();
2089        let root = dir.path();
2090
2091        write_file(root, "service.ts", "\
2092class UserService {
2093    process() {
2094        return this.validate();
2095    }
2096    validate() {
2097        return true;
2098    }
2099}
2100");
2101
2102        let (graph, _) = EntityGraph::build(root, &["service.ts".into()], &registry);
2103
2104        let process_id = graph.entities.keys()
2105            .find(|id| id.contains("process"))
2106            .expect("process entity should exist");
2107        let deps = graph.get_dependencies(process_id);
2108        assert!(
2109            deps.iter().any(|d| d.name == "validate"),
2110            "process should depend on validate via this.validate(). Deps: {:?}",
2111            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2112        );
2113    }
2114
2115    #[test]
2116    fn test_dot_chain_class_static() {
2117        let (dir, registry) = create_test_repo();
2118        let root = dir.path();
2119
2120        write_file(root, "utils.ts", "\
2121class MathUtils {
2122    static compute() { return 1; }
2123}
2124function caller() { return MathUtils.compute(); }
2125");
2126
2127        let (graph, _) = EntityGraph::build(root, &["utils.ts".into()], &registry);
2128
2129        let caller_id = graph.entities.keys()
2130            .find(|id| id.contains("caller"))
2131            .expect("caller entity should exist");
2132        let deps = graph.get_dependencies(caller_id);
2133        assert!(
2134            deps.iter().any(|d| d.name == "compute"),
2135            "caller should depend on compute via MathUtils.compute(). Deps: {:?}",
2136            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2137        );
2138    }
2139
2140    #[test]
2141    fn test_js_ts_import_resolution() {
2142        let (dir, registry) = create_test_repo();
2143        let root = dir.path();
2144
2145        write_file(root, "helper.ts", "\
2146export function helper() { return 1; }
2147");
2148        write_file(root, "main.ts", "\
2149import { helper } from './helper';
2150export function main() { return helper(); }
2151");
2152
2153        let (graph, _) = EntityGraph::build(
2154            root,
2155            &["helper.ts".into(), "main.ts".into()],
2156            &registry,
2157        );
2158
2159        let main_id = graph.entities.keys()
2160            .find(|id| id.contains("main"))
2161            .expect("main entity should exist");
2162        let deps = graph.get_dependencies(main_id);
2163        assert!(
2164            deps.iter().any(|d| d.name == "helper"),
2165            "main should depend on helper via JS import. Deps: {:?}",
2166            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2167        );
2168    }
2169
2170    #[test]
2171    fn test_dot_chain_no_false_edges() {
2172        let (dir, registry) = create_test_repo();
2173        let root = dir.path();
2174
2175        // Two classes with same method name "process".
2176        // self.process() in ClassA should NOT create edge to ClassB::process.
2177        write_file(root, "a.py", "\
2178class ClassA:
2179    def run(self):
2180        return self.process()
2181
2182    def process(self):
2183        return 1
2184");
2185        write_file(root, "b.py", "\
2186class ClassB:
2187    def process(self):
2188        return 2
2189");
2190
2191        let (graph, _) = EntityGraph::build(
2192            root,
2193            &["a.py".into(), "b.py".into()],
2194            &registry,
2195        );
2196
2197        let run_id = graph.entities.keys()
2198            .find(|id| id.contains("run"))
2199            .expect("run entity should exist");
2200        let deps = graph.get_dependencies(run_id);
2201        // Should have edge to ClassA::process, NOT ClassB::process
2202        for dep in &deps {
2203            if dep.name == "process" {
2204                assert!(
2205                    dep.file_path == "a.py",
2206                    "run's process dep should be in a.py, not {}",
2207                    dep.file_path
2208                );
2209            }
2210        }
2211    }
2212
2213    #[test]
2214    fn test_dot_chain_fallback() {
2215        let (dir, registry) = create_test_repo();
2216        let root = dir.path();
2217
2218        // someVar.unknownMethod() - "someVar" is not a class,
2219        // so the chain is unresolved and words fall through to bag-of-words.
2220        // "helper" should still resolve via bag-of-words.
2221        write_file(root, "app.ts", "\
2222export function helper() { return 1; }
2223export function caller() {
2224    const val = helper();
2225    return val;
2226}
2227");
2228
2229        let (graph, _) = EntityGraph::build(root, &["app.ts".into()], &registry);
2230
2231        let caller_id = graph.entities.keys()
2232            .find(|id| id.contains("caller"))
2233            .expect("caller entity should exist");
2234        let deps = graph.get_dependencies(caller_id);
2235        assert!(
2236            deps.iter().any(|d| d.name == "helper"),
2237            "caller should still resolve helper via bag-of-words. Deps: {:?}",
2238            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2239        );
2240    }
2241
2242}