Skip to main content

sem_core/parser/
graph.rs

1//! Entity dependency graph — cross-file reference extraction.
2//!
3//! Implements a two-pass approach inspired by arXiv:2601.08773 (Reliable Graph-RAG):
4//! Pass 1: Extract all entities, build a symbol table (name → entity ID).
5//! Pass 2: For each entity, extract identifier references from its AST subtree,
6//!         resolve them against the symbol table to create edges.
7//!
8//! This enables impact analysis: "if I change entity X, what else is affected?"
9
10use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::LazyLock;
13
14use rayon::prelude::*;
15use regex::Regex;
16use serde::{Deserialize, Serialize};
17
18use crate::git::types::{FileChange, FileStatus};
19use crate::model::entity::SemanticEntity;
20use crate::parser::registry::ParserRegistry;
21use crate::parser::scope_resolve;
22
23/// A reference from one entity to another.
24#[derive(Debug, Clone, Serialize, Deserialize)]
25#[serde(rename_all = "camelCase")]
26pub struct EntityRef {
27    pub from_entity: String,
28    pub to_entity: String,
29    pub ref_type: RefType,
30}
31
32/// Type of reference between entities.
33#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34#[serde(rename_all = "lowercase")]
35pub enum RefType {
36    /// Function/method call
37    Calls,
38    /// Type reference (extends, implements, field type)
39    TypeRef,
40    /// Import/use statement reference
41    Imports,
42}
43
44/// A complete entity dependency graph for a set of files.
45#[derive(Debug)]
46pub struct EntityGraph {
47    /// All entities indexed by ID
48    pub entities: HashMap<String, EntityInfo>,
49    /// Edges: from_entity → [(to_entity, ref_type)]
50    pub edges: Vec<EntityRef>,
51    /// Reverse index: entity_id → entities that reference it
52    pub dependents: HashMap<String, Vec<String>>,
53    /// Forward index: entity_id → entities it references
54    pub dependencies: HashMap<String, Vec<String>>,
55}
56
57/// Minimal entity info stored in the graph.
58#[derive(Debug, Clone, Serialize, Deserialize)]
59#[serde(rename_all = "camelCase")]
60pub struct EntityInfo {
61    pub id: String,
62    pub name: String,
63    pub entity_type: String,
64    pub file_path: String,
65    #[serde(skip_serializing_if = "Option::is_none")]
66    pub parent_id: Option<String>,
67    pub start_line: usize,
68    pub end_line: usize,
69}
70
71impl EntityGraph {
72    /// Reconstruct an EntityGraph from pre-loaded parts (e.g. from a cache).
73    pub fn from_parts(entities: HashMap<String, EntityInfo>, edges: Vec<EntityRef>) -> Self {
74        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
75        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
76        for edge in &edges {
77            dependents
78                .entry(edge.to_entity.clone())
79                .or_default()
80                .push(edge.from_entity.clone());
81            dependencies
82                .entry(edge.from_entity.clone())
83                .or_default()
84                .push(edge.to_entity.clone());
85        }
86        EntityGraph {
87            entities,
88            edges,
89            dependents,
90            dependencies,
91        }
92    }
93
94    /// Build an entity graph from a set of files.
95    ///
96    /// Pass 1: Extract all entities from all files using the parser registry.
97    /// Pass 2: For each entity, find identifier tokens and resolve them against
98    ///         the symbol table to create reference edges.
99    pub fn build(
100        root: &Path,
101        file_paths: &[String],
102        registry: &ParserRegistry,
103    ) -> (Self, Vec<SemanticEntity>) {
104        // Pass 1: Extract all entities in parallel (file I/O + tree-sitter parsing)
105        // Also collect (file_path, content, tree) for scope_resolve reuse
106        let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = file_paths
107            .par_iter()
108            .filter_map(|file_path| {
109                let full_path = root.join(file_path);
110                let content = std::fs::read_to_string(&full_path).ok()?;
111                let plugin = registry.get_plugin_with_content(file_path, &content)?;
112                let (entities, tree) = plugin.extract_entities_with_tree(&content, file_path);
113                let parsed = tree.map(|t| (file_path.clone(), content, t));
114                Some((entities, parsed))
115            })
116            .collect();
117
118        let mut all_entities: Vec<SemanticEntity> = Vec::new();
119        let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
120        for (entities, parsed) in per_file {
121            all_entities.extend(entities);
122            if let Some(p) = parsed {
123                parsed_files.push(p);
124            }
125        }
126
127        // Build symbol table: name → entity IDs (can be multiple with same name)
128        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
129        let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
130
131        for entity in &all_entities {
132            symbol_table
133                .entry(entity.name.clone())
134                .or_default()
135                .push(entity.id.clone());
136
137            entity_map.insert(
138                entity.id.clone(),
139                EntityInfo {
140                    id: entity.id.clone(),
141                    name: entity.name.clone(),
142                    entity_type: entity.entity_type.clone(),
143                    file_path: entity.file_path.clone(),
144                    parent_id: entity.parent_id.clone(),
145                    start_line: entity.start_line,
146                    end_line: entity.end_line,
147                },
148            );
149        }
150
151        // Build parent-child set for skipping class→method self-edges
152        let parent_child_pairs: HashSet<(&str, &str)> = all_entities
153            .iter()
154            .filter_map(|e| {
155                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
156            })
157            .collect();
158
159        // Build set of (class_id, child_method_name) so classes skip refs to their own methods
160        let class_child_names: HashSet<(&str, &str)> = all_entities
161            .iter()
162            .filter_map(|e| {
163                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
164            })
165            .collect();
166
167        // Build class-related maps for dot-chain resolution
168        // class_entity_names: all class/struct/interface entity names
169        let class_entity_names: HashSet<&str> = all_entities
170            .iter()
171            .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
172            .map(|e| e.name.as_str())
173            .collect();
174
175        // id_to_name: quick lookup for parent name resolution
176        let id_to_name: HashMap<&str, &str> = all_entities
177            .iter()
178            .map(|e| (e.id.as_str(), e.name.as_str()))
179            .collect();
180
181        // enclosing_class: entity_id → class_name (for self/this resolution)
182        // class_members: class_name → [(member_name, member_entity_id)]
183        let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
184        let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
185
186        for entity in &all_entities {
187            if let Some(ref pid) = entity.parent_id {
188                if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
189                    if class_entity_names.contains(parent_name) {
190                        enclosing_class.insert(entity.id.as_str(), parent_name);
191                        class_members
192                            .entry(parent_name)
193                            .or_default()
194                            .push((entity.name.as_str(), entity.id.as_str()));
195                    }
196                }
197            }
198        }
199
200        // Build import table: (file_path, imported_name) → target entity ID
201        // e.g. ("io_handler.py", "validate") → "core.py::function::validate"
202        let import_table = build_import_table(root, file_paths, &symbol_table, &entity_map);
203
204        // Run scope-aware resolver for supported languages (reuse pre-parsed trees)
205        let has_scope_lang = file_paths.iter().any(|f| {
206            let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
207            crate::parser::plugins::code::languages::get_language_config(ext)
208                .and_then(|c| c.scope_resolve)
209                .is_some()
210        });
211        let (scope_edges, scope_resolved_entities) = if has_scope_lang {
212            let result = scope_resolve::resolve_with_scopes(root, file_paths, &all_entities, &entity_map, Some(parsed_files));
213            let resolved_entity_ids: HashSet<String> = result.edges.iter()
214                .map(|(from, _, _)| from.clone())
215                .collect();
216            (result.edges, resolved_entity_ids)
217        } else {
218            (vec![], HashSet::new())
219        };
220
221        // Pass 2: Extract references in parallel, then resolve against symbol table
222        // Phase 1: Dot-chain resolution (precise self.X, this.X, ClassName.X)
223        // Phase 2: Bag-of-words resolution (existing logic, skipping consumed words)
224        // Skip entities already resolved by scope resolver (Python files)
225        let resolved_refs: Vec<(String, String, RefType)> = all_entities
226            .par_iter()
227            .flat_map(|entity| {
228                // Skip entities already resolved by scope resolver
229                if scope_resolved_entities.contains(&entity.id) {
230                    return vec![];
231                }
232
233                let mut entity_edges = Vec::new();
234                let mut consumed_words: HashSet<String> = HashSet::new();
235
236                // Phase 1: Dot-chain resolution
237                let stripped = strip_comments_and_strings(&entity.content);
238                let dot_chains = extract_dot_chains(&stripped);
239
240                for (receiver, member) in &dot_chains {
241                    if *receiver == "self" || *receiver == "this" {
242                        // self.B / this.B: resolve to sibling method in enclosing class
243                        if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
244                            if let Some(members) = class_members.get(class_name) {
245                                for (n, tid) in members {
246                                    if *n == *member && *tid != entity.id.as_str() {
247                                        entity_edges.push((
248                                            entity.id.clone(),
249                                            tid.to_string(),
250                                            RefType::Calls,
251                                        ));
252                                        consumed_words.insert(member.to_string());
253                                        break;
254                                    }
255                                }
256                            }
257                        }
258                    } else if class_entity_names.contains(*receiver) {
259                        // ClassName.B: resolve to class member
260                        if let Some(members) = class_members.get(*receiver) {
261                            for (n, tid) in members {
262                                if *n == *member {
263                                    entity_edges.push((
264                                        entity.id.clone(),
265                                        tid.to_string(),
266                                        RefType::Calls,
267                                    ));
268                                    consumed_words.insert(member.to_string());
269                                    consumed_words.insert(receiver.to_string());
270                                    break;
271                                }
272                            }
273                        }
274                    }
275                    // Unresolved chains fall through to bag-of-words below
276                }
277
278                // Phase 2: Bag-of-words resolution (skip words consumed by dot-chains)
279                let refs = extract_references_from_content(&entity.content, &entity.name);
280                for ref_name in refs {
281                    if consumed_words.contains(ref_name) {
282                        continue;
283                    }
284
285                    // Skip references to names that are this class's own methods
286                    if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
287                        continue;
288                    }
289
290                    // Check import table first: if this file imports this name,
291                    // resolve to the import target instead of global symbol table
292                    let import_key = (entity.file_path.clone(), ref_name.to_string());
293                    if let Some(import_target_id) = import_table.get(&import_key) {
294                        if import_target_id != &entity.id
295                            && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
296                            && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
297                        {
298                            let ref_type = infer_ref_type(&entity.content, &ref_name);
299                            entity_edges.push((
300                                entity.id.clone(),
301                                import_target_id.clone(),
302                                ref_type,
303                            ));
304                        }
305                        continue;
306                    }
307
308                    if let Some(target_ids) = symbol_table.get(ref_name) {
309                        // Without an import, only resolve to entities in the same file.
310                        // Cross-file resolution is handled by the import table above.
311                        let target = target_ids
312                            .iter()
313                            .find(|id| {
314                                *id != &entity.id
315                                    && entity_map
316                                        .get(*id)
317                                        .map_or(false, |e| e.file_path == entity.file_path)
318                            });
319
320                        if let Some(target_id) = target {
321                            // Skip parent-child edges (class -> own method)
322                            if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
323                                || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
324                            {
325                                continue;
326                            }
327                            let ref_type = infer_ref_type(&entity.content, &ref_name);
328                            entity_edges.push((
329                                entity.id.clone(),
330                                target_id.clone(),
331                                ref_type,
332                            ));
333                        }
334                    }
335                }
336                entity_edges
337            })
338            .collect();
339
340        // Merge scope edges with bag-of-words edges, deduplicating
341        let mut all_resolved: Vec<(String, String, RefType)> = scope_edges;
342        all_resolved.extend(resolved_refs);
343        let mut seen_edges: HashSet<(String, String)> = HashSet::new();
344        all_resolved.retain(|e| seen_edges.insert((e.0.clone(), e.1.clone())));
345
346        // Build edge indexes from resolved references
347        let mut edges: Vec<EntityRef> = Vec::with_capacity(all_resolved.len());
348        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
349        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
350
351        for (from_entity, to_entity, ref_type) in all_resolved {
352            dependents
353                .entry(to_entity.clone())
354                .or_default()
355                .push(from_entity.clone());
356            dependencies
357                .entry(from_entity.clone())
358                .or_default()
359                .push(to_entity.clone());
360            edges.push(EntityRef {
361                from_entity,
362                to_entity,
363                ref_type,
364            });
365        }
366
367        let graph = EntityGraph {
368            entities: entity_map,
369            edges,
370            dependents,
371            dependencies,
372        };
373
374        (graph, all_entities)
375    }
376
377    /// Incrementally build an entity graph: reparse only stale files, reuse cached data for clean files.
378    ///
379    /// Uses the same full 3-phase resolution (scope + dot-chain + bag-of-words) as `build()`,
380    /// but only runs it for entities in stale files + clean entities whose cached edges
381    /// pointed into stale files (they need re-resolution since their targets may have changed).
382    pub fn build_incremental(
383        root: &Path,
384        stale_files: &[String],
385        all_file_paths: &[String],
386        cached_entities: Vec<SemanticEntity>,
387        cached_edges: Vec<EntityRef>,
388        stale_file_cached_entities: Vec<SemanticEntity>,
389        registry: &ParserRegistry,
390    ) -> (Self, Vec<SemanticEntity>) {
391        // Build set of stale file paths for quick lookup
392        let stale_set: HashSet<&str> = stale_files.iter().map(|s| s.as_str()).collect();
393
394        // Parse stale files in parallel to get new entities + trees
395        let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = stale_files
396            .par_iter()
397            .filter_map(|file_path| {
398                let full_path = root.join(file_path);
399                let content = std::fs::read_to_string(&full_path).ok()?;
400                let plugin = registry.get_plugin_with_content(file_path, &content)?;
401                let (entities, tree) = plugin.extract_entities_with_tree(&content, file_path);
402                let parsed = tree.map(|t| (file_path.clone(), content, t));
403                Some((entities, parsed))
404            })
405            .collect();
406
407        let mut new_entities: Vec<SemanticEntity> = Vec::new();
408        let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
409        for (entities, parsed) in per_file {
410            new_entities.extend(entities);
411            if let Some(p) = parsed {
412                parsed_files.push(p);
413            }
414        }
415
416        // Entity-level diffing: compare new stale-file entities against cached versions
417        // Build content_hash lookup from cached stale-file entities
418        let cached_hashes: HashMap<&str, &str> = stale_file_cached_entities
419            .iter()
420            .map(|e| (e.id.as_str(), e.content_hash.as_str()))
421            .collect();
422
423        // Classify new stale-file entities
424        let mut truly_changed_ids: HashSet<String> = HashSet::new();
425        let mut content_clean_ids: HashSet<String> = HashSet::new();
426        for entity in &new_entities {
427            match cached_hashes.get(entity.id.as_str()) {
428                Some(old_hash) if *old_hash == entity.content_hash.as_str() => {
429                    content_clean_ids.insert(entity.id.clone());
430                }
431                _ => {
432                    // Hash differs or entity is new
433                    truly_changed_ids.insert(entity.id.clone());
434                }
435            }
436        }
437
438        // Detect deleted entities: in cached stale but not in new
439        let new_entity_ids: HashSet<&str> = new_entities.iter().map(|e| e.id.as_str()).collect();
440        let deleted_ids: HashSet<&str> = stale_file_cached_entities
441            .iter()
442            .filter(|e| !new_entity_ids.contains(e.id.as_str()))
443            .map(|e| e.id.as_str())
444            .collect();
445
446        // Merge: cached (clean) entities + new (stale) entities
447        let all_entities: Vec<SemanticEntity> = cached_entities
448            .into_iter()
449            .chain(new_entities.into_iter())
450            .collect();
451
452        // Find affected clean entities: only care about edges pointing to truly_changed/deleted
453        let mut affected_clean_ids: HashSet<String> = HashSet::new();
454        for edge in &cached_edges {
455            let to_truly_changed = truly_changed_ids.contains(&edge.to_entity)
456                || deleted_ids.contains(edge.to_entity.as_str());
457            if to_truly_changed && !stale_set.contains(
458                all_entities.iter()
459                    .find(|e| e.id == edge.from_entity)
460                    .map(|e| e.file_path.as_str())
461                    .unwrap_or("")
462            ) {
463                affected_clean_ids.insert(edge.from_entity.clone());
464            }
465        }
466
467        // Collect all stale entity IDs (for edge filtering)
468        let stale_entity_ids: HashSet<&str> = all_entities
469            .iter()
470            .filter(|e| stale_set.contains(e.file_path.as_str()))
471            .map(|e| e.id.as_str())
472            .collect();
473
474        // Keep edges where:
475        // - Both endpoints are clean files AND from_entity is not affected, OR
476        // - From a content_clean stale entity whose targets are also clean/content_clean
477        let kept_edges: Vec<EntityRef> = cached_edges
478            .into_iter()
479            .filter(|e| {
480                let from_stale = stale_entity_ids.contains(e.from_entity.as_str());
481                let to_stale = stale_entity_ids.contains(e.to_entity.as_str());
482
483                if !from_stale && !to_stale && !affected_clean_ids.contains(&e.from_entity) {
484                    // Both clean, from not affected
485                    return true;
486                }
487                if content_clean_ids.contains(&e.from_entity)
488                    && !truly_changed_ids.contains(&e.to_entity)
489                    && !deleted_ids.contains(e.to_entity.as_str())
490                    && !affected_clean_ids.contains(&e.from_entity)
491                {
492                    // From content_clean stale entity, target not truly changed
493                    return true;
494                }
495                false
496            })
497            .collect();
498
499        // Set of entity IDs that need resolution: truly_changed + affected clean
500        // (content_clean stale entities keep their cached edges)
501        let needs_resolution: HashSet<&str> = all_entities
502            .iter()
503            .filter(|e| {
504                truly_changed_ids.contains(&e.id)
505                    || affected_clean_ids.contains(&e.id)
506            })
507            .map(|e| e.id.as_str())
508            .collect();
509
510        // Now run the same resolution logic as build() but only for entities in needs_resolution.
511        // We still need the full context (symbol table, import table, etc.) from ALL entities.
512
513        // Build symbol table from all entities
514        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
515        let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
516
517        for entity in &all_entities {
518            symbol_table
519                .entry(entity.name.clone())
520                .or_default()
521                .push(entity.id.clone());
522            entity_map.insert(
523                entity.id.clone(),
524                EntityInfo {
525                    id: entity.id.clone(),
526                    name: entity.name.clone(),
527                    entity_type: entity.entity_type.clone(),
528                    file_path: entity.file_path.clone(),
529                    parent_id: entity.parent_id.clone(),
530                    start_line: entity.start_line,
531                    end_line: entity.end_line,
532                },
533            );
534        }
535
536        // Build parent-child set
537        let parent_child_pairs: HashSet<(&str, &str)> = all_entities
538            .iter()
539            .filter_map(|e| {
540                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
541            })
542            .collect();
543
544        let class_child_names: HashSet<(&str, &str)> = all_entities
545            .iter()
546            .filter_map(|e| {
547                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
548            })
549            .collect();
550
551        let class_entity_names: HashSet<&str> = all_entities
552            .iter()
553            .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
554            .map(|e| e.name.as_str())
555            .collect();
556
557        let id_to_name: HashMap<&str, &str> = all_entities
558            .iter()
559            .map(|e| (e.id.as_str(), e.name.as_str()))
560            .collect();
561
562        let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
563        let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
564
565        for entity in &all_entities {
566            if let Some(ref pid) = entity.parent_id {
567                if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
568                    if class_entity_names.contains(parent_name) {
569                        enclosing_class.insert(entity.id.as_str(), parent_name);
570                        class_members
571                            .entry(parent_name)
572                            .or_default()
573                            .push((entity.name.as_str(), entity.id.as_str()));
574                    }
575                }
576            }
577        }
578
579        // Build import table from ALL files (imports may reference stale entities)
580        let import_table = build_import_table(root, all_file_paths, &symbol_table, &entity_map);
581
582        // Run scope-aware resolver only on files that need resolution
583        let resolve_file_paths: Vec<String> = all_file_paths
584            .iter()
585            .filter(|f| {
586                // Include file if any entity in needs_resolution belongs to it
587                stale_set.contains(f.as_str()) || all_entities.iter().any(|e| {
588                    e.file_path == **f && affected_clean_ids.contains(&e.id)
589                })
590            })
591            .cloned()
592            .collect();
593
594        let has_scope_lang = resolve_file_paths.iter().any(|f| {
595            let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
596            crate::parser::plugins::code::languages::get_language_config(ext)
597                .and_then(|c| c.scope_resolve)
598                .is_some()
599        });
600        let (scope_edges, scope_resolved_entities) = if has_scope_lang {
601            // Pass pre-parsed stale-file trees; scope_resolve reads affected clean files from disk
602            let resolve_set: HashSet<&str> = resolve_file_paths.iter().map(|s| s.as_str()).collect();
603            let relevant_parsed: Vec<(String, String, tree_sitter::Tree)> = parsed_files
604                .into_iter()
605                .filter(|(fp, _, _)| resolve_set.contains(fp.as_str()))
606                .collect();
607            let pre = if relevant_parsed.is_empty() { None } else { Some(relevant_parsed) };
608            let result = scope_resolve::resolve_with_scopes(root, &resolve_file_paths, &all_entities, &entity_map, pre);
609            let resolved_entity_ids: HashSet<String> = result.edges.iter()
610                .map(|(from, _, _)| from.clone())
611                .collect();
612            (result.edges, resolved_entity_ids)
613        } else {
614            (vec![], HashSet::new())
615        };
616
617        // Resolve references only for entities in needs_resolution
618        let resolved_refs: Vec<(String, String, RefType)> = all_entities
619            .par_iter()
620            .filter(|e| needs_resolution.contains(e.id.as_str()))
621            .flat_map(|entity| {
622                if scope_resolved_entities.contains(&entity.id) {
623                    return vec![];
624                }
625
626                let mut entity_edges = Vec::new();
627                let mut consumed_words: HashSet<String> = HashSet::new();
628
629                // Phase 1: Dot-chain resolution
630                let stripped = strip_comments_and_strings(&entity.content);
631                let dot_chains = extract_dot_chains(&stripped);
632
633                for (receiver, member) in &dot_chains {
634                    if *receiver == "self" || *receiver == "this" {
635                        if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
636                            if let Some(members) = class_members.get(class_name) {
637                                for (n, tid) in members {
638                                    if *n == *member && *tid != entity.id.as_str() {
639                                        entity_edges.push((
640                                            entity.id.clone(),
641                                            tid.to_string(),
642                                            RefType::Calls,
643                                        ));
644                                        consumed_words.insert(member.to_string());
645                                        break;
646                                    }
647                                }
648                            }
649                        }
650                    } else if class_entity_names.contains(*receiver) {
651                        if let Some(members) = class_members.get(*receiver) {
652                            for (n, tid) in members {
653                                if *n == *member {
654                                    entity_edges.push((
655                                        entity.id.clone(),
656                                        tid.to_string(),
657                                        RefType::Calls,
658                                    ));
659                                    consumed_words.insert(member.to_string());
660                                    consumed_words.insert(receiver.to_string());
661                                    break;
662                                }
663                            }
664                        }
665                    }
666                }
667
668                // Phase 2: Bag-of-words resolution
669                let refs = extract_references_from_content(&entity.content, &entity.name);
670                for ref_name in refs {
671                    if consumed_words.contains(ref_name) {
672                        continue;
673                    }
674                    if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
675                        continue;
676                    }
677
678                    let import_key = (entity.file_path.clone(), ref_name.to_string());
679                    if let Some(import_target_id) = import_table.get(&import_key) {
680                        if import_target_id != &entity.id
681                            && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
682                            && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
683                        {
684                            let ref_type = infer_ref_type(&entity.content, &ref_name);
685                            entity_edges.push((
686                                entity.id.clone(),
687                                import_target_id.clone(),
688                                ref_type,
689                            ));
690                        }
691                        continue;
692                    }
693
694                    if let Some(target_ids) = symbol_table.get(ref_name) {
695                        let target = target_ids
696                            .iter()
697                            .find(|id| {
698                                *id != &entity.id
699                                    && entity_map
700                                        .get(*id)
701                                        .map_or(false, |e| e.file_path == entity.file_path)
702                            });
703
704                        if let Some(target_id) = target {
705                            if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
706                                || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
707                            {
708                                continue;
709                            }
710                            let ref_type = infer_ref_type(&entity.content, &ref_name);
711                            entity_edges.push((
712                                entity.id.clone(),
713                                target_id.clone(),
714                                ref_type,
715                            ));
716                        }
717                    }
718                }
719                entity_edges
720            })
721            .collect();
722
723        // Merge scope edges + bag-of-words edges + kept cached edges
724        let mut all_resolved: Vec<(String, String, RefType)> = scope_edges;
725        all_resolved.extend(resolved_refs);
726        let mut seen_edges: HashSet<(String, String)> = HashSet::new();
727        all_resolved.retain(|e| seen_edges.insert((e.0.clone(), e.1.clone())));
728
729        // Build final edge list: kept edges + newly resolved edges
730        let mut edges: Vec<EntityRef> = Vec::with_capacity(kept_edges.len() + all_resolved.len());
731        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
732        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
733
734        // Track all edge pairs for dedup
735        let mut all_edge_pairs: HashSet<(String, String)> = HashSet::new();
736
737        // Add kept cached edges
738        for edge in kept_edges {
739            all_edge_pairs.insert((edge.from_entity.clone(), edge.to_entity.clone()));
740            dependents
741                .entry(edge.to_entity.clone())
742                .or_default()
743                .push(edge.from_entity.clone());
744            dependencies
745                .entry(edge.from_entity.clone())
746                .or_default()
747                .push(edge.to_entity.clone());
748            edges.push(edge);
749        }
750
751        // Add newly resolved edges, dedup against kept edges
752        for (from_entity, to_entity, ref_type) in all_resolved {
753            if !all_edge_pairs.insert((from_entity.clone(), to_entity.clone())) {
754                continue;
755            }
756            dependents
757                .entry(to_entity.clone())
758                .or_default()
759                .push(from_entity.clone());
760            dependencies
761                .entry(from_entity.clone())
762                .or_default()
763                .push(to_entity.clone());
764            edges.push(EntityRef {
765                from_entity,
766                to_entity,
767                ref_type,
768            });
769        }
770
771        let graph = EntityGraph {
772            entities: entity_map,
773            edges,
774            dependents,
775            dependencies,
776        };
777
778        (graph, all_entities)
779    }
780
781    /// Get entities that depend on the given entity (reverse deps).
782    pub fn get_dependents(&self, entity_id: &str) -> Vec<&EntityInfo> {
783        self.dependents
784            .get(entity_id)
785            .map(|ids| {
786                ids.iter()
787                    .filter_map(|id| self.entities.get(id))
788                    .collect()
789            })
790            .unwrap_or_default()
791    }
792
793    /// Get entities that the given entity depends on (forward deps).
794    pub fn get_dependencies(&self, entity_id: &str) -> Vec<&EntityInfo> {
795        self.dependencies
796            .get(entity_id)
797            .map(|ids| {
798                ids.iter()
799                    .filter_map(|id| self.entities.get(id))
800                    .collect()
801            })
802            .unwrap_or_default()
803    }
804
805    /// Impact analysis: if the given entity changes, what else might be affected?
806    /// Returns all transitive dependents (breadth-first), capped at 10k.
807    pub fn impact_analysis(&self, entity_id: &str) -> Vec<&EntityInfo> {
808        self.impact_analysis_capped(entity_id, 10_000)
809    }
810
811    /// Impact analysis with a cap on maximum nodes visited.
812    /// Returns transitive dependents up to the cap. Uses borrowed strings.
813    pub fn impact_analysis_capped(&self, entity_id: &str, max_visited: usize) -> Vec<&EntityInfo> {
814        let mut visited: HashSet<&str> = HashSet::new();
815        let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
816        let mut result = Vec::new();
817
818        let start_key = match self.entities.get_key_value(entity_id) {
819            Some((k, _)) => k.as_str(),
820            None => return result,
821        };
822
823        queue.push_back(start_key);
824        visited.insert(start_key);
825
826        while let Some(current) = queue.pop_front() {
827            if result.len() >= max_visited {
828                break;
829            }
830            if let Some(deps) = self.dependents.get(current) {
831                for dep in deps {
832                    if visited.insert(dep.as_str()) {
833                        if let Some(info) = self.entities.get(dep.as_str()) {
834                            result.push(info);
835                        }
836                        queue.push_back(dep.as_str());
837                        if result.len() >= max_visited {
838                            break;
839                        }
840                    }
841                }
842            }
843        }
844
845        result
846    }
847
848    /// Count transitive dependents without collecting them (faster for large graphs).
849    /// Uses borrowed strings to avoid allocation overhead.
850    pub fn impact_count(&self, entity_id: &str, max_count: usize) -> usize {
851        let mut visited: HashSet<&str> = HashSet::new();
852        let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
853        let mut count = 0;
854
855        // We need entity_id to live long enough; look it up in our entities map
856        let start_key = match self.entities.get_key_value(entity_id) {
857            Some((k, _)) => k.as_str(),
858            None => return 0,
859        };
860
861        queue.push_back(start_key);
862        visited.insert(start_key);
863
864        while let Some(current) = queue.pop_front() {
865            if count >= max_count {
866                break;
867            }
868            if let Some(deps) = self.dependents.get(current) {
869                for dep in deps {
870                    if visited.insert(dep.as_str()) {
871                        count += 1;
872                        queue.push_back(dep.as_str());
873                        if count >= max_count {
874                            break;
875                        }
876                    }
877                }
878            }
879        }
880
881        count
882    }
883
884    /// Filter entities to those that look like tests.
885    /// Uses name heuristics, file path patterns, and content patterns.
886    pub fn filter_test_entities(&self, entities: &[crate::model::entity::SemanticEntity]) -> HashSet<String> {
887        let mut test_ids = HashSet::new();
888        for entity in entities {
889            if is_test_entity(entity) {
890                test_ids.insert(entity.id.clone());
891            }
892        }
893        test_ids
894    }
895
896    /// Impact analysis filtered to test entities only.
897    /// Returns transitive dependents that are test functions/methods.
898    pub fn test_impact(
899        &self,
900        entity_id: &str,
901        all_entities: &[crate::model::entity::SemanticEntity],
902    ) -> Vec<&EntityInfo> {
903        let test_ids = self.filter_test_entities(all_entities);
904        let impact = self.impact_analysis(entity_id);
905        impact
906            .into_iter()
907            .filter(|info| test_ids.contains(&info.id))
908            .collect()
909    }
910
911    /// Incrementally update the graph from a set of changed files.
912    ///
913    /// Instead of rebuilding the entire graph, this only re-extracts entities
914    /// from changed files and re-resolves their references. This is faster
915    /// than a full rebuild when only a few files changed.
916    ///
917    /// For each changed file:
918    /// - Deleted: remove all entities from that file, prune edges
919    /// - Added/Modified: remove old entities, extract new ones, rebuild references
920    /// - Renamed: update file paths in entity info
921    pub fn update_from_changes(
922        &mut self,
923        changed_files: &[FileChange],
924        root: &Path,
925        registry: &ParserRegistry,
926    ) {
927        let mut affected_files: HashSet<String> = HashSet::new();
928        let mut new_entities: Vec<SemanticEntity> = Vec::new();
929
930        for change in changed_files {
931            affected_files.insert(change.file_path.clone());
932            if let Some(ref old_path) = change.old_file_path {
933                affected_files.insert(old_path.clone());
934            }
935
936            match change.status {
937                FileStatus::Deleted => {
938                    self.remove_entities_for_file(&change.file_path);
939                }
940                FileStatus::Renamed => {
941                    // Update file paths for renamed files
942                    if let Some(ref old_path) = change.old_file_path {
943                        self.remove_entities_for_file(old_path);
944                    }
945                    // Extract entities from the new file
946                    if let Some(entities) = self.extract_file_entities(
947                        &change.file_path,
948                        change.after_content.as_deref(),
949                        root,
950                        registry,
951                    ) {
952                        new_entities.extend(entities);
953                    }
954                }
955                FileStatus::Added | FileStatus::Modified => {
956                    // Remove old entities for this file
957                    self.remove_entities_for_file(&change.file_path);
958                    // Extract new entities
959                    if let Some(entities) = self.extract_file_entities(
960                        &change.file_path,
961                        change.after_content.as_deref(),
962                        root,
963                        registry,
964                    ) {
965                        new_entities.extend(entities);
966                    }
967                }
968            }
969        }
970
971        // Add new entities to the entity map
972        for entity in &new_entities {
973            self.entities.insert(
974                entity.id.clone(),
975                EntityInfo {
976                    id: entity.id.clone(),
977                    name: entity.name.clone(),
978                    entity_type: entity.entity_type.clone(),
979                    file_path: entity.file_path.clone(),
980                    parent_id: entity.parent_id.clone(),
981                    start_line: entity.start_line,
982                    end_line: entity.end_line,
983                },
984            );
985        }
986
987        // Rebuild the global symbol table from all current entities
988        let symbol_table = self.build_symbol_table();
989
990        // Re-resolve references for new entities
991        for entity in &new_entities {
992            self.resolve_entity_references(entity, &symbol_table);
993        }
994
995        // Also re-resolve references for entities in OTHER files that might
996        // reference entities in changed files (their targets may have changed)
997        let changed_entity_names: HashSet<String> = new_entities
998            .iter()
999            .map(|e| e.name.clone())
1000            .collect();
1001
1002        // Find entities in unchanged files that reference any changed entity name
1003        let entities_to_recheck: Vec<String> = self
1004            .entities
1005            .values()
1006            .filter(|e| !affected_files.contains(&e.file_path))
1007            .filter(|e| {
1008                self.dependencies
1009                    .get(&e.id)
1010                    .map_or(false, |deps| {
1011                        deps.iter().any(|dep_id| {
1012                            self.entities
1013                                .get(dep_id)
1014                                .map_or(false, |dep| changed_entity_names.contains(&dep.name))
1015                        })
1016                    })
1017            })
1018            .map(|e| e.id.clone())
1019            .collect();
1020
1021        // We don't have the full SemanticEntity for unchanged files, so we skip
1022        // deep re-resolution here. The forward/reverse indexes are already updated
1023        // by remove_entities_for_file and resolve_entity_references.
1024        // For entities that had dangling references (their target was deleted),
1025        // the edges were already pruned.
1026        let _ = entities_to_recheck; // acknowledge but don't act on for now
1027    }
1028
1029    /// Extract entities from a file, using provided content or reading from disk.
1030    fn extract_file_entities(
1031        &self,
1032        file_path: &str,
1033        content: Option<&str>,
1034        root: &Path,
1035        registry: &ParserRegistry,
1036    ) -> Option<Vec<SemanticEntity>> {
1037        let content = if let Some(c) = content {
1038            c.to_string()
1039        } else {
1040            let full_path = root.join(file_path);
1041            std::fs::read_to_string(&full_path).ok()?
1042        };
1043
1044        let plugin = registry.get_plugin_with_content(file_path, &content)?;
1045
1046        Some(plugin.extract_entities(&content, file_path))
1047    }
1048
1049    /// Remove all entities belonging to a specific file and prune their edges.
1050    fn remove_entities_for_file(&mut self, file_path: &str) {
1051        // Collect entity IDs to remove
1052        let ids_to_remove: Vec<String> = self
1053            .entities
1054            .values()
1055            .filter(|e| e.file_path == file_path)
1056            .map(|e| e.id.clone())
1057            .collect();
1058
1059        let id_set: HashSet<&str> = ids_to_remove.iter().map(|s| s.as_str()).collect();
1060
1061        // Remove from entity map
1062        for id in &ids_to_remove {
1063            self.entities.remove(id);
1064        }
1065
1066        // Remove edges involving these entities
1067        self.edges
1068            .retain(|e| !id_set.contains(e.from_entity.as_str()) && !id_set.contains(e.to_entity.as_str()));
1069
1070        // Clean up dependency/dependent indexes
1071        for id in &ids_to_remove {
1072            // Remove forward deps
1073            if let Some(deps) = self.dependencies.remove(id) {
1074                // Also remove from reverse index
1075                for dep in &deps {
1076                    if let Some(dependents) = self.dependents.get_mut(dep) {
1077                        dependents.retain(|d| d != id);
1078                    }
1079                }
1080            }
1081            // Remove reverse deps
1082            if let Some(deps) = self.dependents.remove(id) {
1083                // Also remove from forward index
1084                for dep in &deps {
1085                    if let Some(dependencies) = self.dependencies.get_mut(dep) {
1086                        dependencies.retain(|d| d != id);
1087                    }
1088                }
1089            }
1090        }
1091    }
1092
1093    /// Build a symbol table from all current entities.
1094    fn build_symbol_table(&self) -> HashMap<String, Vec<String>> {
1095        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::new();
1096        for entity in self.entities.values() {
1097            symbol_table
1098                .entry(entity.name.clone())
1099                .or_default()
1100                .push(entity.id.clone());
1101        }
1102        symbol_table
1103    }
1104
1105    /// Resolve references for a single entity against the symbol table.
1106    fn resolve_entity_references(
1107        &mut self,
1108        entity: &SemanticEntity,
1109        symbol_table: &HashMap<String, Vec<String>>,
1110    ) {
1111        let refs = extract_references_from_content(&entity.content, &entity.name);
1112
1113        for ref_name in refs {
1114            if let Some(target_ids) = symbol_table.get(ref_name) {
1115                let target = target_ids
1116                    .iter()
1117                    .find(|id| {
1118                        *id != &entity.id
1119                            && self
1120                                .entities
1121                                .get(*id)
1122                                .map_or(false, |e| e.file_path == entity.file_path)
1123                    })
1124                    .or_else(|| target_ids.iter().find(|id| *id != &entity.id));
1125
1126                if let Some(target_id) = target {
1127                    let ref_type = infer_ref_type(&entity.content, &ref_name);
1128                    self.edges.push(EntityRef {
1129                        from_entity: entity.id.clone(),
1130                        to_entity: target_id.clone(),
1131                        ref_type,
1132                    });
1133                    self.dependents
1134                        .entry(target_id.clone())
1135                        .or_default()
1136                        .push(entity.id.clone());
1137                    self.dependencies
1138                        .entry(entity.id.clone())
1139                        .or_default()
1140                        .push(target_id.clone());
1141                }
1142            }
1143        }
1144    }
1145}
1146
1147/// Check if an entity looks like a test based on name, file path, and content patterns.
1148fn is_test_entity(entity: &crate::model::entity::SemanticEntity) -> bool {
1149    let name = &entity.name;
1150    let path = &entity.file_path;
1151    let content = &entity.content;
1152
1153    // Name patterns
1154    if name.starts_with("test_") || name.starts_with("Test") || name.ends_with("_test") || name.ends_with("Test") {
1155        return true;
1156    }
1157    if name.starts_with("it_") || name.starts_with("describe_") || name.starts_with("spec_") {
1158        return true;
1159    }
1160
1161    // File path patterns
1162    let path_lower = path.to_lowercase();
1163    let in_test_file = path_lower.contains("/test/")
1164        || path_lower.contains("/tests/")
1165        || path_lower.contains("/spec/")
1166        || path_lower.contains("_test.")
1167        || path_lower.contains(".test.")
1168        || path_lower.contains("_spec.")
1169        || path_lower.contains(".spec.");
1170
1171    // Content patterns (test annotations/decorators)
1172    let has_test_marker = content.contains("#[test]")
1173        || content.contains("#[cfg(test)]")
1174        || content.contains("@Test")
1175        || content.contains("@pytest")
1176        || content.contains("@test")
1177        || content.contains("describe(")
1178        || content.contains("it(")
1179        || content.contains("test(");
1180
1181    in_test_file && has_test_marker
1182}
1183
1184/// Build import table: maps (file_path, imported_name) → target entity ID.
1185///
1186/// Parses `from X import Y` / `import X` / `use X` style statements from entity content
1187/// and resolves Y to the entity it refers to in the symbol table.
1188fn build_import_table(
1189    root: &Path,
1190    file_paths: &[String],
1191    symbol_table: &HashMap<String, Vec<String>>,
1192    entity_map: &HashMap<String, EntityInfo>,
1193) -> HashMap<(String, String), String> {
1194    let mut import_table: HashMap<(String, String), String> = HashMap::new();
1195
1196    for file_path in file_paths {
1197        let full_path = root.join(file_path);
1198        let content = match std::fs::read_to_string(&full_path) {
1199            Ok(c) => c,
1200            Err(_) => continue,
1201        };
1202
1203        // Join multi-line imports into single logical lines
1204        // e.g. "from .cookies import (\n    foo,\n    bar,\n)" -> "from .cookies import foo, bar"
1205        let mut logical_lines: Vec<String> = Vec::new();
1206        let mut current_line = String::new();
1207        let mut in_parens = false;
1208
1209        for line in content.lines() {
1210            let trimmed = line.trim();
1211            if in_parens {
1212                // Strip parentheses and comments
1213                let clean = trimmed.trim_end_matches(|c: char| c == ')' || c == ',');
1214                let clean = clean.split('#').next().unwrap_or(clean).trim();
1215                if !clean.is_empty() && clean != "(" {
1216                    current_line.push_str(", ");
1217                    current_line.push_str(clean);
1218                }
1219                if trimmed.contains(')') {
1220                    in_parens = false;
1221                    logical_lines.push(std::mem::take(&mut current_line));
1222                }
1223            } else if trimmed.starts_with("from ") && trimmed.contains(" import ") {
1224                if trimmed.contains('(') && !trimmed.contains(')') {
1225                    // Multi-line import starts
1226                    in_parens = true;
1227                    // Take everything before the paren
1228                    let before_paren = trimmed.split('(').next().unwrap_or(trimmed);
1229                    current_line = before_paren.trim().to_string();
1230                    // Also grab anything after the paren on this line
1231                    if let Some(after) = trimmed.split('(').nth(1) {
1232                        let after = after.trim().trim_end_matches(')').trim();
1233                        if !after.is_empty() {
1234                            current_line.push(' ');
1235                            current_line.push_str(after);
1236                        }
1237                    }
1238                } else {
1239                    logical_lines.push(trimmed.to_string());
1240                }
1241            }
1242        }
1243
1244        for logical_line in &logical_lines {
1245            if let Some(rest) = logical_line.strip_prefix("from ") {
1246                // Find " import " or " import," (multi-line imports join with comma)
1247                let import_match = rest.find(" import ")
1248                    .map(|pos| (pos, 8))
1249                    .or_else(|| rest.find(" import,").map(|pos| (pos, 8)));
1250                if let Some((import_pos, skip)) = import_match {
1251                    let module_path = &rest[..import_pos];
1252                    let names_str = &rest[import_pos + skip..];
1253
1254                    let source_module = module_path
1255                        .trim_start_matches('.')
1256                        .rsplit('.')
1257                        .next()
1258                        .unwrap_or(module_path.trim_start_matches('.'));
1259
1260                    for name_part in names_str.split(',') {
1261                        let name_part = name_part.trim();
1262                        let imported_name = name_part.split_whitespace().next().unwrap_or(name_part);
1263                        // Strip trailing parens/punctuation
1264                        let imported_name = imported_name.trim_matches(|c: char| c == '(' || c == ')' || c == ',');
1265                        if imported_name.is_empty() {
1266                            continue;
1267                        }
1268
1269                        if let Some(target_ids) = symbol_table.get(imported_name) {
1270                            let target = target_ids.iter().find(|id| {
1271                                entity_map.get(*id).map_or(false, |e| {
1272                                    let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1273                                    let stem = stem.strip_suffix(".py")
1274                                        .or_else(|| stem.strip_suffix(".ts"))
1275                                        .or_else(|| stem.strip_suffix(".js"))
1276                                        .or_else(|| stem.strip_suffix(".rs"))
1277                                        .unwrap_or(stem);
1278                                    stem == source_module
1279                                })
1280                            });
1281                            if let Some(target_id) = target {
1282                                import_table.insert(
1283                                    (file_path.clone(), imported_name.to_string()),
1284                                    target_id.clone(),
1285                                );
1286                            }
1287                        }
1288                    }
1289                }
1290            }
1291        }
1292
1293        // JS/TS imports: import { foo, bar as baz } from './module'
1294        //                import Foo from './module'
1295        let is_js_ts = file_path.ends_with(".js") || file_path.ends_with(".ts")
1296            || file_path.ends_with(".jsx") || file_path.ends_with(".tsx");
1297
1298        if is_js_ts {
1299            static JS_NAMED_RE: LazyLock<Regex> = LazyLock::new(|| {
1300                Regex::new(r#"import\s*\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#).unwrap()
1301            });
1302            static JS_DEFAULT_RE: LazyLock<Regex> = LazyLock::new(|| {
1303                Regex::new(r#"import\s+(?:type\s+)?([A-Za-z_]\w*)\s+from\s*['"]([^'"]+)['"]"#).unwrap()
1304            });
1305
1306            for cap in JS_NAMED_RE.captures_iter(&content) {
1307                let names_str = cap.get(1).unwrap().as_str();
1308                let module_path = cap.get(2).unwrap().as_str();
1309                let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1310                let source_module = strip_js_ext(source_module);
1311
1312                for name_part in names_str.split(',') {
1313                    let name_part = name_part.trim();
1314                    if name_part.is_empty() { continue; }
1315
1316                    // Handle "foo as bar" aliases and "type foo" prefixes
1317                    let (original_name, local_name) = if let Some(pos) = name_part.find(" as ") {
1318                        let orig = name_part[..pos].trim();
1319                        let local = name_part[pos + 4..].trim();
1320                        let orig = orig.strip_prefix("type ").unwrap_or(orig);
1321                        (orig, local)
1322                    } else {
1323                        let name = name_part.strip_prefix("type ").unwrap_or(name_part);
1324                        (name, name)
1325                    };
1326
1327                    if original_name.is_empty() || local_name.is_empty() { continue; }
1328
1329                    if let Some(target_ids) = symbol_table.get(original_name) {
1330                        let target = target_ids.iter().find(|id| {
1331                            entity_map.get(*id).map_or(false, |e| {
1332                                let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1333                                let stem = strip_file_ext(stem);
1334                                stem == source_module
1335                            })
1336                        });
1337                        if let Some(target_id) = target {
1338                            import_table.insert(
1339                                (file_path.clone(), local_name.to_string()),
1340                                target_id.clone(),
1341                            );
1342                        }
1343                    }
1344                }
1345            }
1346
1347            for cap in JS_DEFAULT_RE.captures_iter(&content) {
1348                let local_name = cap.get(1).unwrap().as_str();
1349                let module_path = cap.get(2).unwrap().as_str();
1350                let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1351                let source_module = strip_js_ext(source_module);
1352
1353                if let Some(target_ids) = symbol_table.get(local_name) {
1354                    let target = target_ids.iter().find(|id| {
1355                        entity_map.get(*id).map_or(false, |e| {
1356                            let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1357                            let stem = strip_file_ext(stem);
1358                            stem == source_module
1359                        })
1360                    });
1361                    if let Some(target_id) = target {
1362                        import_table.insert(
1363                            (file_path.clone(), local_name.to_string()),
1364                            target_id.clone(),
1365                        );
1366                    }
1367                }
1368            }
1369        }
1370
1371        // Rust imports: use crate::module::Name; / use crate::module::{A, B};
1372        // Also: use super::module::Name; / use self::module::Name;
1373        let is_rust = file_path.ends_with(".rs");
1374        if is_rust {
1375            static RUST_USE_SIMPLE_RE: LazyLock<Regex> = LazyLock::new(|| {
1376                // use crate::config::Config;
1377                // use super::types::Entity;
1378                // use config::Config;  (bare module path in binary crates)
1379                Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*;").unwrap()
1380            });
1381            static RUST_USE_GROUP_RE: LazyLock<Regex> = LazyLock::new(|| {
1382                // use crate::types::{Entity, ParseError};
1383                // use types::{Entity, ParseError};  (bare module path)
1384                Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)::\{([^}]+)\}\s*;").unwrap()
1385            });
1386
1387            // Build a map: module_name -> list of file paths whose stem matches
1388            // For "use crate::config::Config", module is "config", name is "Config"
1389            for cap in RUST_USE_SIMPLE_RE.captures_iter(&content) {
1390                let full_path_str = cap.get(1).unwrap().as_str();
1391                let parts: Vec<&str> = full_path_str.split("::").collect();
1392                if parts.is_empty() { continue; }
1393
1394                // Last part is the imported name, everything before is the module path
1395                let imported_name = parts[parts.len() - 1];
1396                // The module is the second-to-last part, or the first if only one part
1397                let source_module = if parts.len() >= 2 {
1398                    parts[parts.len() - 2]
1399                } else {
1400                    parts[0]
1401                };
1402
1403                resolve_rust_import(
1404                    file_path, imported_name, source_module,
1405                    symbol_table, entity_map, &mut import_table,
1406                );
1407            }
1408
1409            for cap in RUST_USE_GROUP_RE.captures_iter(&content) {
1410                let module_path = cap.get(1).unwrap().as_str();
1411                let names_str = cap.get(2).unwrap().as_str();
1412
1413                // source_module is the last segment of the module path
1414                let source_module = module_path.rsplit("::").next().unwrap_or(module_path);
1415
1416                for name_part in names_str.split(',') {
1417                    let name_part = name_part.trim();
1418                    // Handle "Name as Alias"
1419                    let (original, local) = if let Some(pos) = name_part.find(" as ") {
1420                        (&name_part[..pos], name_part[pos + 4..].trim())
1421                    } else {
1422                        (name_part, name_part)
1423                    };
1424                    let original = original.trim();
1425                    let local = local.trim();
1426                    if original.is_empty() || local.is_empty() { continue; }
1427
1428                    resolve_rust_import(
1429                        file_path, original, source_module,
1430                        symbol_table, entity_map, &mut import_table,
1431                    );
1432                    // If aliased, also map the local name
1433                    if local != original {
1434                        if let Some(target) = import_table.get(&(file_path.clone(), original.to_string())).cloned() {
1435                            import_table.insert(
1436                                (file_path.clone(), local.to_string()),
1437                                target,
1438                            );
1439                        }
1440                    }
1441                }
1442            }
1443        }
1444
1445        // Go imports: import "module/path" or import ( "module/path" )
1446        // Go uses the last path component as the package name
1447        let is_go = file_path.ends_with(".go");
1448        if is_go {
1449            static GO_IMPORT_RE: LazyLock<Regex> = LazyLock::new(|| {
1450                Regex::new(r#"(?m)"([^"]+)""#).unwrap()
1451            });
1452
1453            // Only look in import blocks
1454            let import_section = extract_go_import_section(&content);
1455            for cap in GO_IMPORT_RE.captures_iter(&import_section) {
1456                let import_path = cap.get(1).unwrap().as_str();
1457                let pkg_name = import_path.rsplit('/').next().unwrap_or(import_path);
1458
1459                // Map all entities from files matching this package name
1460                for (name, target_ids) in symbol_table.iter() {
1461                    for target_id in target_ids {
1462                        if let Some(entity) = entity_map.get(target_id) {
1463                            let stem = entity.file_path.rsplit('/').next().unwrap_or(&entity.file_path);
1464                            let stem = strip_file_ext(stem);
1465                            // Go: file stem or directory matches package name
1466                            if stem == pkg_name || entity.file_path.contains(&format!("{}/", pkg_name)) {
1467                                import_table.insert(
1468                                    (file_path.clone(), name.clone()),
1469                                    target_id.clone(),
1470                                );
1471                            }
1472                        }
1473                    }
1474                }
1475            }
1476        }
1477    }
1478
1479    import_table
1480}
1481
1482/// Resolve a Rust import: find the target entity in the symbol table
1483/// by matching the imported name against entities in files whose stem matches source_module.
1484fn resolve_rust_import(
1485    file_path: &str,
1486    imported_name: &str,
1487    source_module: &str,
1488    symbol_table: &HashMap<String, Vec<String>>,
1489    entity_map: &HashMap<String, EntityInfo>,
1490    import_table: &mut HashMap<(String, String), String>,
1491) {
1492    if let Some(target_ids) = symbol_table.get(imported_name) {
1493        let target = target_ids.iter().find(|id| {
1494            entity_map.get(*id).map_or(false, |e| {
1495                let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1496                let stem = strip_file_ext(stem);
1497                stem == source_module
1498            })
1499        });
1500        if let Some(target_id) = target {
1501            import_table.insert(
1502                (file_path.to_string(), imported_name.to_string()),
1503                target_id.clone(),
1504            );
1505        }
1506    }
1507}
1508
1509/// Extract Go import section (everything inside import blocks).
1510fn extract_go_import_section(content: &str) -> String {
1511    let mut result = String::new();
1512    let mut in_import_block = false;
1513    for line in content.lines() {
1514        let trimmed = line.trim();
1515        if trimmed.starts_with("import (") {
1516            in_import_block = true;
1517            continue;
1518        }
1519        if trimmed.starts_with("import \"") || trimmed.starts_with("import `") {
1520            result.push_str(trimmed);
1521            result.push('\n');
1522            continue;
1523        }
1524        if in_import_block {
1525            if trimmed == ")" {
1526                in_import_block = false;
1527            } else {
1528                result.push_str(trimmed);
1529                result.push('\n');
1530            }
1531        }
1532    }
1533    result
1534}
1535
1536/// Strip JS/TS extensions from a module name.
1537fn strip_js_ext(s: &str) -> &str {
1538    s.strip_suffix(".js")
1539        .or_else(|| s.strip_suffix(".ts"))
1540        .or_else(|| s.strip_suffix(".jsx"))
1541        .or_else(|| s.strip_suffix(".tsx"))
1542        .unwrap_or(s)
1543}
1544
1545/// Strip common file extensions from a filename.
1546fn strip_file_ext(s: &str) -> &str {
1547    s.strip_suffix(".py")
1548        .or_else(|| s.strip_suffix(".ts"))
1549        .or_else(|| s.strip_suffix(".js"))
1550        .or_else(|| s.strip_suffix(".tsx"))
1551        .or_else(|| s.strip_suffix(".jsx"))
1552        .or_else(|| s.strip_suffix(".rs"))
1553        .unwrap_or(s)
1554}
1555
1556/// Strip comments and string literals from content to avoid false references.
1557/// Returns a new string with comments/docstrings replaced by spaces.
1558fn strip_comments_and_strings(content: &str) -> String {
1559    let bytes = content.as_bytes();
1560    let len = bytes.len();
1561    let mut result = vec![b' '; len];
1562    let mut i = 0;
1563
1564    while i < len {
1565        // Triple-quoted strings (Python docstrings)
1566        if i + 2 < len && bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1567            i += 3;
1568            while i + 2 < len {
1569                if bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1570                    i += 3;
1571                    break;
1572                }
1573                i += 1;
1574            }
1575            continue;
1576        }
1577        if i + 2 < len && bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1578            i += 3;
1579            while i + 2 < len {
1580                if bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1581                    i += 3;
1582                    break;
1583                }
1584                i += 1;
1585            }
1586            continue;
1587        }
1588        // Double-quoted strings
1589        if bytes[i] == b'"' {
1590            i += 1;
1591            while i < len {
1592                if bytes[i] == b'\\' { i += 2; continue; }
1593                if bytes[i] == b'"' { i += 1; break; }
1594                i += 1;
1595            }
1596            continue;
1597        }
1598        // Single-quoted strings
1599        if bytes[i] == b'\'' {
1600            i += 1;
1601            while i < len {
1602                if bytes[i] == b'\\' { i += 2; continue; }
1603                if bytes[i] == b'\'' { i += 1; break; }
1604                i += 1;
1605            }
1606            continue;
1607        }
1608        // Python/Ruby single-line comments
1609        if bytes[i] == b'#' {
1610            while i < len && bytes[i] != b'\n' { i += 1; }
1611            continue;
1612        }
1613        // C-style single-line comments
1614        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'/' {
1615            while i < len && bytes[i] != b'\n' { i += 1; }
1616            continue;
1617        }
1618        // C-style block comments
1619        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
1620            i += 2;
1621            while i + 1 < len {
1622                if bytes[i] == b'*' && bytes[i + 1] == b'/' { i += 2; break; }
1623                i += 1;
1624            }
1625            continue;
1626        }
1627        // Regular code: copy through
1628        result[i] = bytes[i];
1629        i += 1;
1630    }
1631
1632    String::from_utf8_lossy(&result).into_owned()
1633}
1634
1635/// Extract dot-chains (receiver.member) from content for precise resolution.
1636/// Returns unique (receiver, member) pairs found in the content.
1637fn extract_dot_chains<'a>(content: &'a str) -> Vec<(&'a str, &'a str)> {
1638    static DOT_CHAIN_RE: LazyLock<Regex> = LazyLock::new(|| {
1639        Regex::new(r"\b([A-Za-z_]\w*)\.([A-Za-z_]\w*)").unwrap()
1640    });
1641
1642    let mut chains = Vec::new();
1643    let mut seen: HashSet<(&str, &str)> = HashSet::new();
1644    for cap in DOT_CHAIN_RE.captures_iter(content) {
1645        let receiver = cap.get(1).unwrap().as_str();
1646        let member = cap.get(2).unwrap().as_str();
1647        if seen.insert((receiver, member)) {
1648            chains.push((receiver, member));
1649        }
1650    }
1651    chains
1652}
1653
1654/// Extract identifier references from entity content using simple token analysis.
1655/// Strips comments and strings first to avoid false positives from docstrings.
1656/// Returns borrowed slices from the stripped content.
1657fn extract_references_from_content<'a>(content: &'a str, own_name: &str) -> Vec<&'a str> {
1658    // We need to figure out which words appear only in comments/strings vs real code.
1659    // Strategy: strip comments/strings, then only accept words that appear in the stripped version.
1660    let stripped = strip_comments_and_strings(content);
1661    let stripped_words: HashSet<&str> = stripped
1662        .split(|c: char| !c.is_alphanumeric() && c != '_')
1663        .filter(|w| !w.is_empty())
1664        .collect();
1665
1666    let mut refs = Vec::new();
1667    let mut seen: HashSet<&str> = HashSet::new();
1668
1669    for word in content.split(|c: char| !c.is_alphanumeric() && c != '_') {
1670        if word.is_empty() || word == own_name {
1671            continue;
1672        }
1673        if is_keyword(word) || word.len() < 2 {
1674            continue;
1675        }
1676        // Skip very short lowercase identifiers (likely local vars: i, x, a, ok, id, etc.)
1677        if word.starts_with(|c: char| c.is_lowercase()) && word.len() < 3 {
1678            continue;
1679        }
1680        if !word.starts_with(|c: char| c.is_alphabetic() || c == '_') {
1681            continue;
1682        }
1683        // Skip common local variable names that create false graph edges
1684        if is_common_local_name(word) {
1685            continue;
1686        }
1687        // Skip words that only appear in comments/strings
1688        if !stripped_words.contains(word) {
1689            continue;
1690        }
1691        if seen.insert(word) {
1692            refs.push(word);
1693        }
1694    }
1695
1696    refs
1697}
1698
1699static COMMON_LOCAL_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1700    [
1701        "result", "results", "data", "config", "value", "values",
1702        "item", "items", "input", "output", "args", "opts",
1703        "name", "path", "file", "line", "count", "index",
1704        "temp", "prev", "next", "curr", "current", "node",
1705        "left", "right", "root", "head", "tail", "body",
1706        "text", "content", "source", "target", "entry",
1707        "error", "errors", "message", "response", "request",
1708        "context", "state", "props", "event", "handler",
1709        "callback", "options", "params", "query", "list",
1710        "base", "info", "meta", "kind", "mode", "flag",
1711        "size", "length", "width", "height", "start", "stop",
1712        "begin", "done", "found", "status", "code",
1713    ].into_iter().collect()
1714});
1715
1716/// Names that are overwhelmingly local variables, not entity references.
1717/// These create massive false-positive edges in the dependency graph.
1718fn is_common_local_name(word: &str) -> bool {
1719    COMMON_LOCAL_NAMES.contains(word)
1720}
1721
1722/// Infer reference type from context using word-boundary-aware matching.
1723fn infer_ref_type(content: &str, ref_name: &str) -> RefType {
1724    // Check if it's a function call: ref_name followed by ( with word boundary before.
1725    // Avoids format! allocation by finding ref_name and checking the next char.
1726    let bytes = content.as_bytes();
1727    let name_bytes = ref_name.as_bytes();
1728    let mut search_start = 0;
1729    while let Some(rel_pos) = content[search_start..].find(ref_name) {
1730        let pos = search_start + rel_pos;
1731        let after = pos + name_bytes.len();
1732        // Check next char is '('
1733        if after < bytes.len() && bytes[after] == b'(' {
1734            // Verify word boundary before
1735            let is_boundary = pos == 0 || {
1736                let prev = bytes[pos - 1];
1737                !prev.is_ascii_alphanumeric() && prev != b'_'
1738            };
1739            if is_boundary {
1740                return RefType::Calls;
1741            }
1742        }
1743        // Advance past pos to the next char boundary to avoid slicing inside a multi-byte UTF-8 char.
1744        search_start = pos + 1;
1745        while search_start < content.len() && !content.is_char_boundary(search_start) {
1746            search_start += 1;
1747        }
1748    }
1749
1750    // Check if it's in an import/use statement (line-level, not substring)
1751    for line in content.lines() {
1752        let trimmed = line.trim();
1753        if (trimmed.starts_with("import ") || trimmed.starts_with("use ")
1754            || trimmed.starts_with("from ") || trimmed.starts_with("require("))
1755            && trimmed.contains(ref_name)
1756        {
1757            return RefType::Imports;
1758        }
1759    }
1760
1761    // Default to type reference
1762    RefType::TypeRef
1763}
1764
1765static KEYWORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1766    [
1767        // Common across languages
1768        "if", "else", "for", "while", "do", "switch", "case", "break",
1769        "continue", "return", "try", "catch", "finally", "throw",
1770        "new", "delete", "typeof", "instanceof", "in", "of",
1771        "true", "false", "null", "undefined", "void", "this",
1772        "super", "class", "extends", "implements", "interface",
1773        "enum", "const", "let", "var", "function", "async",
1774        "await", "yield", "import", "export", "default", "from",
1775        "as", "static", "public", "private", "protected",
1776        "abstract", "final", "override",
1777        // Rust
1778        "fn", "pub", "mod", "use", "struct", "impl", "trait",
1779        "where", "type", "self", "Self", "mut", "ref", "match",
1780        "loop", "move", "unsafe", "extern", "crate", "dyn",
1781        // Python
1782        "def", "elif", "except", "raise", "with",
1783        "pass", "lambda", "nonlocal", "global", "assert",
1784        "True", "False", "and", "or", "not", "is",
1785        // Go
1786        "func", "package", "range", "select", "chan", "go",
1787        "defer", "map", "make", "append", "len", "cap",
1788        // C/C++
1789        "auto", "register", "volatile", "sizeof", "typedef",
1790        "template", "typename", "namespace", "virtual", "inline",
1791        "constexpr", "nullptr", "noexcept", "explicit", "friend",
1792        "operator", "using", "cout", "endl", "cerr", "cin",
1793        "printf", "scanf", "malloc", "free", "NULL", "include",
1794        "ifdef", "ifndef", "endif", "define", "pragma",
1795        // Ruby
1796        "end", "then", "elsif", "unless", "until",
1797        "begin", "rescue", "ensure", "when", "require",
1798        "attr_accessor", "attr_reader", "attr_writer",
1799        "puts", "nil", "module", "defined",
1800        // C#
1801        "internal", "sealed", "readonly",
1802        "partial", "delegate", "event", "params", "out",
1803        "object", "decimal", "sbyte", "ushort", "uint",
1804        "ulong", "nint", "nuint", "dynamic",
1805        "get", "set", "value", "init", "record",
1806        // Types (primitives)
1807        "string", "number", "boolean", "int", "float", "double",
1808        "bool", "char", "byte", "i8", "i16", "i32", "i64",
1809        "u8", "u16", "u32", "u64", "f32", "f64", "usize",
1810        "isize", "str", "String", "Vec", "Option", "Result",
1811        "Box", "Arc", "Rc", "HashMap", "HashSet", "Some",
1812        "Ok", "Err",
1813    ].into_iter().collect()
1814});
1815
1816fn is_keyword(word: &str) -> bool {
1817    KEYWORDS.contains(word)
1818}
1819
1820#[cfg(test)]
1821mod tests {
1822    use super::*;
1823    use crate::git::types::{FileChange, FileStatus};
1824    use std::io::Write;
1825    use tempfile::TempDir;
1826
1827    fn create_test_repo() -> (TempDir, ParserRegistry) {
1828        let dir = TempDir::new().unwrap();
1829        let registry = crate::parser::plugins::create_default_registry();
1830        (dir, registry)
1831    }
1832
1833    fn write_file(dir: &Path, name: &str, content: &str) {
1834        let path = dir.join(name);
1835        if let Some(parent) = path.parent() {
1836            std::fs::create_dir_all(parent).unwrap();
1837        }
1838        let mut f = std::fs::File::create(path).unwrap();
1839        f.write_all(content.as_bytes()).unwrap();
1840    }
1841
1842    #[test]
1843    fn test_incremental_add_file() {
1844        let (dir, registry) = create_test_repo();
1845        let root = dir.path();
1846
1847        // Start with one file
1848        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1849        write_file(root, "b.ts", "export function bar() { return 1; }\n");
1850
1851        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
1852        assert_eq!(graph.entities.len(), 2);
1853
1854        // Add a new file
1855        write_file(root, "c.ts", "export function baz() { return foo(); }\n");
1856        graph.update_from_changes(
1857            &[FileChange {
1858                file_path: "c.ts".into(),
1859                status: FileStatus::Added,
1860                old_file_path: None,
1861                before_content: None,
1862                after_content: None, // will read from disk
1863            }],
1864            root,
1865            &registry,
1866        );
1867
1868        assert_eq!(graph.entities.len(), 3);
1869        assert!(graph.entities.contains_key("c.ts::function::baz"));
1870        // baz references foo
1871        let baz_deps = graph.get_dependencies("c.ts::function::baz");
1872        assert!(
1873            baz_deps.iter().any(|d| d.name == "foo"),
1874            "baz should depend on foo. Deps: {:?}",
1875            baz_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1876        );
1877    }
1878
1879    #[test]
1880    fn test_incremental_delete_file() {
1881        let (dir, registry) = create_test_repo();
1882        let root = dir.path();
1883
1884        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1885        write_file(root, "b.ts", "export function bar() { return 1; }\n");
1886
1887        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
1888        assert_eq!(graph.entities.len(), 2);
1889
1890        // Delete b.ts
1891        graph.update_from_changes(
1892            &[FileChange {
1893                file_path: "b.ts".into(),
1894                status: FileStatus::Deleted,
1895                old_file_path: None,
1896                before_content: None,
1897                after_content: None,
1898            }],
1899            root,
1900            &registry,
1901        );
1902
1903        assert_eq!(graph.entities.len(), 1);
1904        assert!(!graph.entities.contains_key("b.ts::function::bar"));
1905        // foo's dependency on bar should be pruned
1906        let foo_deps = graph.get_dependencies("a.ts::function::foo");
1907        assert!(
1908            foo_deps.is_empty(),
1909            "foo's deps should be empty after bar deleted. Deps: {:?}",
1910            foo_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1911        );
1912    }
1913
1914    #[test]
1915    fn test_incremental_modify_file() {
1916        let (dir, registry) = create_test_repo();
1917        let root = dir.path();
1918
1919        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1920        write_file(root, "b.ts", "export function bar() { return 1; }\nexport function baz() { return 2; }\n");
1921
1922        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
1923        assert_eq!(graph.entities.len(), 3);
1924
1925        // Modify a.ts to call baz instead of bar
1926        write_file(root, "a.ts", "export function foo() { return baz(); }\n");
1927        graph.update_from_changes(
1928            &[FileChange {
1929                file_path: "a.ts".into(),
1930                status: FileStatus::Modified,
1931                old_file_path: None,
1932                before_content: None,
1933                after_content: None,
1934            }],
1935            root,
1936            &registry,
1937        );
1938
1939        assert_eq!(graph.entities.len(), 3);
1940        // foo should now depend on baz, not bar
1941        let foo_deps = graph.get_dependencies("a.ts::function::foo");
1942        let dep_names: Vec<&str> = foo_deps.iter().map(|d| d.name.as_str()).collect();
1943        assert!(dep_names.contains(&"baz"), "foo should depend on baz after modification. Deps: {:?}", dep_names);
1944        assert!(!dep_names.contains(&"bar"), "foo should no longer depend on bar. Deps: {:?}", dep_names);
1945    }
1946
1947    #[test]
1948    fn test_incremental_with_content() {
1949        let (dir, registry) = create_test_repo();
1950        let root = dir.path();
1951
1952        write_file(root, "a.ts", "export function foo() { return 1; }\n");
1953        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into()], &registry);
1954        assert_eq!(graph.entities.len(), 1);
1955
1956        // Add file with content provided directly (no disk read needed)
1957        graph.update_from_changes(
1958            &[FileChange {
1959                file_path: "b.ts".into(),
1960                status: FileStatus::Added,
1961                old_file_path: None,
1962                before_content: None,
1963                after_content: Some("export function bar() { return foo(); }\n".into()),
1964            }],
1965            root,
1966            &registry,
1967        );
1968
1969        assert_eq!(graph.entities.len(), 2);
1970        let bar_deps = graph.get_dependencies("b.ts::function::bar");
1971        assert!(bar_deps.iter().any(|d| d.name == "foo"));
1972    }
1973
1974    #[test]
1975    fn test_extract_references() {
1976        let content = "function processData(input) {\n  const result = validateInput(input);\n  return transform(result);\n}";
1977        let refs = extract_references_from_content(content, "processData");
1978        assert!(refs.contains(&"validateInput"));
1979        assert!(refs.contains(&"transform"));
1980        assert!(!refs.contains(&"processData")); // self excluded
1981    }
1982
1983    #[test]
1984    fn test_extract_references_skips_keywords() {
1985        let content = "function foo() { if (true) { return false; } }";
1986        let refs = extract_references_from_content(content, "foo");
1987        assert!(!refs.contains(&"if"));
1988        assert!(!refs.contains(&"true"));
1989        assert!(!refs.contains(&"return"));
1990        assert!(!refs.contains(&"false"));
1991    }
1992
1993    #[test]
1994    fn test_infer_ref_type_call() {
1995        assert_eq!(
1996            infer_ref_type("validateInput(data)", "validateInput"),
1997            RefType::Calls,
1998        );
1999    }
2000
2001    #[test]
2002    fn test_infer_ref_type_type() {
2003        assert_eq!(
2004            infer_ref_type("let x: MyType = something", "MyType"),
2005            RefType::TypeRef,
2006        );
2007    }
2008
2009    #[test]
2010    fn test_infer_ref_type_multibyte_utf8() {
2011        // Ensure no panic when content contains multi-byte UTF-8 characters
2012        assert_eq!(
2013            infer_ref_type("let café = foo(x)", "foo"),
2014            RefType::Calls,
2015        );
2016        assert_eq!(
2017            infer_ref_type("class HandicapfrPublicationFieldsEnum:\n    É = 1\n    bar()", "bar"),
2018            RefType::Calls,
2019        );
2020        // No match should not panic either
2021        assert_eq!(
2022            infer_ref_type("// 日本語コメント\nlet x = 1", "missing"),
2023            RefType::TypeRef,
2024        );
2025    }
2026
2027    #[test]
2028    fn test_dot_chain_self_resolution() {
2029        let (dir, registry) = create_test_repo();
2030        let root = dir.path();
2031
2032        write_file(root, "service.py", "\
2033class MyService:
2034    def process(self):
2035        return self.validate()
2036
2037    def validate(self):
2038        return True
2039");
2040
2041        let (graph, _) = EntityGraph::build(root, &["service.py".into()], &registry);
2042
2043        // process should have an edge to validate via self.validate()
2044        let process_id = graph.entities.keys()
2045            .find(|id| id.contains("process"))
2046            .expect("process entity should exist");
2047        let deps = graph.get_dependencies(process_id);
2048        assert!(
2049            deps.iter().any(|d| d.name == "validate"),
2050            "process should depend on validate via self.validate(). Deps: {:?}",
2051            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2052        );
2053    }
2054
2055    #[test]
2056    fn test_dot_chain_this_resolution() {
2057        let (dir, registry) = create_test_repo();
2058        let root = dir.path();
2059
2060        write_file(root, "service.ts", "\
2061class UserService {
2062    process() {
2063        return this.validate();
2064    }
2065    validate() {
2066        return true;
2067    }
2068}
2069");
2070
2071        let (graph, _) = EntityGraph::build(root, &["service.ts".into()], &registry);
2072
2073        let process_id = graph.entities.keys()
2074            .find(|id| id.contains("process"))
2075            .expect("process entity should exist");
2076        let deps = graph.get_dependencies(process_id);
2077        assert!(
2078            deps.iter().any(|d| d.name == "validate"),
2079            "process should depend on validate via this.validate(). Deps: {:?}",
2080            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2081        );
2082    }
2083
2084    #[test]
2085    fn test_dot_chain_class_static() {
2086        let (dir, registry) = create_test_repo();
2087        let root = dir.path();
2088
2089        write_file(root, "utils.ts", "\
2090class MathUtils {
2091    static compute() { return 1; }
2092}
2093function caller() { return MathUtils.compute(); }
2094");
2095
2096        let (graph, _) = EntityGraph::build(root, &["utils.ts".into()], &registry);
2097
2098        let caller_id = graph.entities.keys()
2099            .find(|id| id.contains("caller"))
2100            .expect("caller entity should exist");
2101        let deps = graph.get_dependencies(caller_id);
2102        assert!(
2103            deps.iter().any(|d| d.name == "compute"),
2104            "caller should depend on compute via MathUtils.compute(). Deps: {:?}",
2105            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2106        );
2107    }
2108
2109    #[test]
2110    fn test_js_ts_import_resolution() {
2111        let (dir, registry) = create_test_repo();
2112        let root = dir.path();
2113
2114        write_file(root, "helper.ts", "\
2115export function helper() { return 1; }
2116");
2117        write_file(root, "main.ts", "\
2118import { helper } from './helper';
2119export function main() { return helper(); }
2120");
2121
2122        let (graph, _) = EntityGraph::build(
2123            root,
2124            &["helper.ts".into(), "main.ts".into()],
2125            &registry,
2126        );
2127
2128        let main_id = graph.entities.keys()
2129            .find(|id| id.contains("main"))
2130            .expect("main entity should exist");
2131        let deps = graph.get_dependencies(main_id);
2132        assert!(
2133            deps.iter().any(|d| d.name == "helper"),
2134            "main should depend on helper via JS import. Deps: {:?}",
2135            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2136        );
2137    }
2138
2139    #[test]
2140    fn test_dot_chain_no_false_edges() {
2141        let (dir, registry) = create_test_repo();
2142        let root = dir.path();
2143
2144        // Two classes with same method name "process".
2145        // self.process() in ClassA should NOT create edge to ClassB::process.
2146        write_file(root, "a.py", "\
2147class ClassA:
2148    def run(self):
2149        return self.process()
2150
2151    def process(self):
2152        return 1
2153");
2154        write_file(root, "b.py", "\
2155class ClassB:
2156    def process(self):
2157        return 2
2158");
2159
2160        let (graph, _) = EntityGraph::build(
2161            root,
2162            &["a.py".into(), "b.py".into()],
2163            &registry,
2164        );
2165
2166        let run_id = graph.entities.keys()
2167            .find(|id| id.contains("run"))
2168            .expect("run entity should exist");
2169        let deps = graph.get_dependencies(run_id);
2170        // Should have edge to ClassA::process, NOT ClassB::process
2171        for dep in &deps {
2172            if dep.name == "process" {
2173                assert!(
2174                    dep.file_path == "a.py",
2175                    "run's process dep should be in a.py, not {}",
2176                    dep.file_path
2177                );
2178            }
2179        }
2180    }
2181
2182    #[test]
2183    fn test_dot_chain_fallback() {
2184        let (dir, registry) = create_test_repo();
2185        let root = dir.path();
2186
2187        // someVar.unknownMethod() - "someVar" is not a class,
2188        // so the chain is unresolved and words fall through to bag-of-words.
2189        // "helper" should still resolve via bag-of-words.
2190        write_file(root, "app.ts", "\
2191export function helper() { return 1; }
2192export function caller() {
2193    const val = helper();
2194    return val;
2195}
2196");
2197
2198        let (graph, _) = EntityGraph::build(root, &["app.ts".into()], &registry);
2199
2200        let caller_id = graph.entities.keys()
2201            .find(|id| id.contains("caller"))
2202            .expect("caller entity should exist");
2203        let deps = graph.get_dependencies(caller_id);
2204        assert!(
2205            deps.iter().any(|d| d.name == "helper"),
2206            "caller should still resolve helper via bag-of-words. Deps: {:?}",
2207            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2208        );
2209    }
2210
2211}