Skip to main content

sem_core/parser/
graph.rs

1//! Entity dependency graph — cross-file reference extraction.
2//!
3//! Implements a two-pass approach inspired by arXiv:2601.08773 (Reliable Graph-RAG):
4//! Pass 1: Extract all entities, build a symbol table (name → entity ID).
5//! Pass 2: For each entity, extract identifier references from its AST subtree,
6//!         resolve them against the symbol table to create edges.
7//!
8//! This enables impact analysis: "if I change entity X, what else is affected?"
9
10use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::LazyLock;
13
14use rayon::prelude::*;
15use regex::Regex;
16use serde::{Deserialize, Serialize};
17
18use crate::git::types::{FileChange, FileStatus};
19use crate::model::entity::SemanticEntity;
20use crate::parser::registry::ParserRegistry;
21use crate::parser::scope_resolve;
22
23/// A reference from one entity to another.
24#[derive(Debug, Clone, Serialize, Deserialize)]
25#[serde(rename_all = "camelCase")]
26pub struct EntityRef {
27    pub from_entity: String,
28    pub to_entity: String,
29    pub ref_type: RefType,
30}
31
32/// Type of reference between entities.
33#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34#[serde(rename_all = "lowercase")]
35pub enum RefType {
36    /// Function/method call
37    Calls,
38    /// Type reference (extends, implements, field type)
39    TypeRef,
40    /// Import/use statement reference
41    Imports,
42}
43
44/// A complete entity dependency graph for a set of files.
45#[derive(Debug)]
46pub struct EntityGraph {
47    /// All entities indexed by ID
48    pub entities: HashMap<String, EntityInfo>,
49    /// Edges: from_entity → [(to_entity, ref_type)]
50    pub edges: Vec<EntityRef>,
51    /// Reverse index: entity_id → entities that reference it
52    pub dependents: HashMap<String, Vec<String>>,
53    /// Forward index: entity_id → entities it references
54    pub dependencies: HashMap<String, Vec<String>>,
55}
56
57/// Minimal entity info stored in the graph.
58#[derive(Debug, Clone, Serialize, Deserialize)]
59#[serde(rename_all = "camelCase")]
60pub struct EntityInfo {
61    pub id: String,
62    pub name: String,
63    pub entity_type: String,
64    pub file_path: String,
65    #[serde(skip_serializing_if = "Option::is_none")]
66    pub parent_id: Option<String>,
67    pub start_line: usize,
68    pub end_line: usize,
69}
70
71impl EntityGraph {
72    /// Reconstruct an EntityGraph from pre-loaded parts (e.g. from a cache).
73    pub fn from_parts(entities: HashMap<String, EntityInfo>, edges: Vec<EntityRef>) -> Self {
74        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
75        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
76        for edge in &edges {
77            dependents
78                .entry(edge.to_entity.clone())
79                .or_default()
80                .push(edge.from_entity.clone());
81            dependencies
82                .entry(edge.from_entity.clone())
83                .or_default()
84                .push(edge.to_entity.clone());
85        }
86        EntityGraph {
87            entities,
88            edges,
89            dependents,
90            dependencies,
91        }
92    }
93
94    /// Build an entity graph from a set of files.
95    ///
96    /// Pass 1: Extract all entities from all files using the parser registry.
97    /// Pass 2: For each entity, find identifier tokens and resolve them against
98    ///         the symbol table to create reference edges.
99    pub fn build(
100        root: &Path,
101        file_paths: &[String],
102        registry: &ParserRegistry,
103    ) -> Self {
104        // Pass 1: Extract all entities in parallel (file I/O + tree-sitter parsing)
105        let all_entities: Vec<SemanticEntity> = file_paths
106            .par_iter()
107            .filter_map(|file_path| {
108                let full_path = root.join(file_path);
109                let content = std::fs::read_to_string(&full_path).ok()?;
110                let plugin = registry.get_plugin_with_content(file_path, &content)?;
111                Some(plugin.extract_entities(&content, file_path))
112            })
113            .flatten()
114            .collect();
115
116        // Build symbol table: name → entity IDs (can be multiple with same name)
117        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
118        let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
119
120        for entity in &all_entities {
121            symbol_table
122                .entry(entity.name.clone())
123                .or_default()
124                .push(entity.id.clone());
125
126            entity_map.insert(
127                entity.id.clone(),
128                EntityInfo {
129                    id: entity.id.clone(),
130                    name: entity.name.clone(),
131                    entity_type: entity.entity_type.clone(),
132                    file_path: entity.file_path.clone(),
133                    parent_id: entity.parent_id.clone(),
134                    start_line: entity.start_line,
135                    end_line: entity.end_line,
136                },
137            );
138        }
139
140        // Build parent-child set for skipping class→method self-edges
141        let parent_child_pairs: HashSet<(&str, &str)> = all_entities
142            .iter()
143            .filter_map(|e| {
144                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
145            })
146            .collect();
147
148        // Build set of (class_id, child_method_name) so classes skip refs to their own methods
149        let class_child_names: HashSet<(&str, &str)> = all_entities
150            .iter()
151            .filter_map(|e| {
152                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
153            })
154            .collect();
155
156        // Build class-related maps for dot-chain resolution
157        // class_entity_names: all class/struct/interface entity names
158        let class_entity_names: HashSet<&str> = all_entities
159            .iter()
160            .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
161            .map(|e| e.name.as_str())
162            .collect();
163
164        // id_to_name: quick lookup for parent name resolution
165        let id_to_name: HashMap<&str, &str> = all_entities
166            .iter()
167            .map(|e| (e.id.as_str(), e.name.as_str()))
168            .collect();
169
170        // enclosing_class: entity_id → class_name (for self/this resolution)
171        // class_members: class_name → [(member_name, member_entity_id)]
172        let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
173        let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
174
175        for entity in &all_entities {
176            if let Some(ref pid) = entity.parent_id {
177                if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
178                    if class_entity_names.contains(parent_name) {
179                        enclosing_class.insert(entity.id.as_str(), parent_name);
180                        class_members
181                            .entry(parent_name)
182                            .or_default()
183                            .push((entity.name.as_str(), entity.id.as_str()));
184                    }
185                }
186            }
187        }
188
189        // Build import table: (file_path, imported_name) → target entity ID
190        // e.g. ("io_handler.py", "validate") → "core.py::function::validate"
191        let import_table = build_import_table(root, file_paths, &symbol_table, &entity_map);
192
193        // Run scope-aware resolver for supported languages
194        let has_scope_lang = file_paths.iter().any(|f| {
195            f.ends_with(".py") || f.ends_with(".ts") || f.ends_with(".tsx")
196                || f.ends_with(".js") || f.ends_with(".jsx")
197                || f.ends_with(".rs") || f.ends_with(".go")
198        });
199        let (scope_edges, scope_resolved_entities) = if has_scope_lang {
200            let result = scope_resolve::resolve_with_scopes(root, file_paths, &all_entities, &entity_map);
201            let resolved_entity_ids: HashSet<String> = result.edges.iter()
202                .map(|(from, _, _)| from.clone())
203                .collect();
204            (result.edges, resolved_entity_ids)
205        } else {
206            (vec![], HashSet::new())
207        };
208
209        // Pass 2: Extract references in parallel, then resolve against symbol table
210        // Phase 1: Dot-chain resolution (precise self.X, this.X, ClassName.X)
211        // Phase 2: Bag-of-words resolution (existing logic, skipping consumed words)
212        // Skip entities already resolved by scope resolver (Python files)
213        let resolved_refs: Vec<(String, String, RefType)> = all_entities
214            .par_iter()
215            .flat_map(|entity| {
216                // Skip entities already resolved by scope resolver
217                if scope_resolved_entities.contains(&entity.id) {
218                    return vec![];
219                }
220
221                let mut entity_edges = Vec::new();
222                let mut consumed_words: HashSet<String> = HashSet::new();
223
224                // Phase 1: Dot-chain resolution
225                let stripped = strip_comments_and_strings(&entity.content);
226                let dot_chains = extract_dot_chains(&stripped);
227
228                for (receiver, member) in &dot_chains {
229                    if *receiver == "self" || *receiver == "this" {
230                        // self.B / this.B: resolve to sibling method in enclosing class
231                        if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
232                            if let Some(members) = class_members.get(class_name) {
233                                for (n, tid) in members {
234                                    if *n == *member && *tid != entity.id.as_str() {
235                                        entity_edges.push((
236                                            entity.id.clone(),
237                                            tid.to_string(),
238                                            RefType::Calls,
239                                        ));
240                                        consumed_words.insert(member.to_string());
241                                        break;
242                                    }
243                                }
244                            }
245                        }
246                    } else if class_entity_names.contains(*receiver) {
247                        // ClassName.B: resolve to class member
248                        if let Some(members) = class_members.get(*receiver) {
249                            for (n, tid) in members {
250                                if *n == *member {
251                                    entity_edges.push((
252                                        entity.id.clone(),
253                                        tid.to_string(),
254                                        RefType::Calls,
255                                    ));
256                                    consumed_words.insert(member.to_string());
257                                    consumed_words.insert(receiver.to_string());
258                                    break;
259                                }
260                            }
261                        }
262                    }
263                    // Unresolved chains fall through to bag-of-words below
264                }
265
266                // Phase 2: Bag-of-words resolution (skip words consumed by dot-chains)
267                let refs = extract_references_from_content(&entity.content, &entity.name);
268                for ref_name in refs {
269                    if consumed_words.contains(ref_name) {
270                        continue;
271                    }
272
273                    // Skip references to names that are this class's own methods
274                    if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
275                        continue;
276                    }
277
278                    // Check import table first: if this file imports this name,
279                    // resolve to the import target instead of global symbol table
280                    let import_key = (entity.file_path.clone(), ref_name.to_string());
281                    if let Some(import_target_id) = import_table.get(&import_key) {
282                        if import_target_id != &entity.id
283                            && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
284                            && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
285                        {
286                            let ref_type = infer_ref_type(&entity.content, &ref_name);
287                            entity_edges.push((
288                                entity.id.clone(),
289                                import_target_id.clone(),
290                                ref_type,
291                            ));
292                        }
293                        continue;
294                    }
295
296                    if let Some(target_ids) = symbol_table.get(ref_name) {
297                        // Without an import, only resolve to entities in the same file.
298                        // Cross-file resolution is handled by the import table above.
299                        let target = target_ids
300                            .iter()
301                            .find(|id| {
302                                *id != &entity.id
303                                    && entity_map
304                                        .get(*id)
305                                        .map_or(false, |e| e.file_path == entity.file_path)
306                            });
307
308                        if let Some(target_id) = target {
309                            // Skip parent-child edges (class -> own method)
310                            if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
311                                || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
312                            {
313                                continue;
314                            }
315                            let ref_type = infer_ref_type(&entity.content, &ref_name);
316                            entity_edges.push((
317                                entity.id.clone(),
318                                target_id.clone(),
319                                ref_type,
320                            ));
321                        }
322                    }
323                }
324                entity_edges
325            })
326            .collect();
327
328        // Merge scope edges with bag-of-words edges, deduplicating
329        let mut all_resolved: Vec<(String, String, RefType)> = scope_edges;
330        all_resolved.extend(resolved_refs);
331        let mut seen_edges: HashSet<(String, String)> = HashSet::new();
332        all_resolved.retain(|e| seen_edges.insert((e.0.clone(), e.1.clone())));
333
334        // Build edge indexes from resolved references
335        let mut edges: Vec<EntityRef> = Vec::with_capacity(all_resolved.len());
336        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
337        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
338
339        for (from_entity, to_entity, ref_type) in all_resolved {
340            dependents
341                .entry(to_entity.clone())
342                .or_default()
343                .push(from_entity.clone());
344            dependencies
345                .entry(from_entity.clone())
346                .or_default()
347                .push(to_entity.clone());
348            edges.push(EntityRef {
349                from_entity,
350                to_entity,
351                ref_type,
352            });
353        }
354
355        EntityGraph {
356            entities: entity_map,
357            edges,
358            dependents,
359            dependencies,
360        }
361    }
362
363    /// Incrementally build an entity graph: reparse only stale files, reuse cached data for clean files.
364    ///
365    /// Uses the same full 3-phase resolution (scope + dot-chain + bag-of-words) as `build()`,
366    /// but only runs it for entities in stale files + clean entities whose cached edges
367    /// pointed into stale files (they need re-resolution since their targets may have changed).
368    pub fn build_incremental(
369        root: &Path,
370        stale_files: &[String],
371        all_file_paths: &[String],
372        cached_entities: Vec<SemanticEntity>,
373        cached_edges: Vec<EntityRef>,
374        registry: &ParserRegistry,
375    ) -> (Self, Vec<SemanticEntity>) {
376        // Build set of stale file paths for quick lookup
377        let stale_set: HashSet<&str> = stale_files.iter().map(|s| s.as_str()).collect();
378
379        // Parse stale files in parallel to get new entities
380        let new_entities: Vec<SemanticEntity> = stale_files
381            .par_iter()
382            .filter_map(|file_path| {
383                let full_path = root.join(file_path);
384                let content = std::fs::read_to_string(&full_path).ok()?;
385                let plugin = registry.get_plugin_with_content(file_path, &content)?;
386                Some(plugin.extract_entities(&content, file_path))
387            })
388            .flatten()
389            .collect();
390
391        // Merge: cached (clean) entities + new (stale) entities
392        let all_entities: Vec<SemanticEntity> = cached_entities
393            .into_iter()
394            .chain(new_entities.into_iter())
395            .collect();
396
397        // Collect stale entity IDs
398        let stale_entity_ids: HashSet<&str> = all_entities
399            .iter()
400            .filter(|e| stale_set.contains(e.file_path.as_str()))
401            .map(|e| e.id.as_str())
402            .collect();
403
404        // Find affected clean entities: those with cached edges pointing to/from stale entities
405        let mut affected_clean_ids: HashSet<String> = HashSet::new();
406        for edge in &cached_edges {
407            if stale_entity_ids.contains(edge.to_entity.as_str()) {
408                if !stale_entity_ids.contains(edge.from_entity.as_str()) {
409                    affected_clean_ids.insert(edge.from_entity.clone());
410                }
411            }
412        }
413
414        // Keep only edges where both endpoints are clean AND from_entity is not affected
415        let kept_edges: Vec<EntityRef> = cached_edges
416            .into_iter()
417            .filter(|e| {
418                !stale_entity_ids.contains(e.from_entity.as_str())
419                    && !stale_entity_ids.contains(e.to_entity.as_str())
420                    && !affected_clean_ids.contains(&e.from_entity)
421            })
422            .collect();
423
424        // Set of entity IDs that need resolution
425        let needs_resolution: HashSet<&str> = all_entities
426            .iter()
427            .filter(|e| {
428                stale_entity_ids.contains(e.id.as_str())
429                    || affected_clean_ids.contains(&e.id)
430            })
431            .map(|e| e.id.as_str())
432            .collect();
433
434        // Now run the same resolution logic as build() but only for entities in needs_resolution.
435        // We still need the full context (symbol table, import table, etc.) from ALL entities.
436
437        // Build symbol table from all entities
438        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
439        let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
440
441        for entity in &all_entities {
442            symbol_table
443                .entry(entity.name.clone())
444                .or_default()
445                .push(entity.id.clone());
446            entity_map.insert(
447                entity.id.clone(),
448                EntityInfo {
449                    id: entity.id.clone(),
450                    name: entity.name.clone(),
451                    entity_type: entity.entity_type.clone(),
452                    file_path: entity.file_path.clone(),
453                    parent_id: entity.parent_id.clone(),
454                    start_line: entity.start_line,
455                    end_line: entity.end_line,
456                },
457            );
458        }
459
460        // Build parent-child set
461        let parent_child_pairs: HashSet<(&str, &str)> = all_entities
462            .iter()
463            .filter_map(|e| {
464                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
465            })
466            .collect();
467
468        let class_child_names: HashSet<(&str, &str)> = all_entities
469            .iter()
470            .filter_map(|e| {
471                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
472            })
473            .collect();
474
475        let class_entity_names: HashSet<&str> = all_entities
476            .iter()
477            .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
478            .map(|e| e.name.as_str())
479            .collect();
480
481        let id_to_name: HashMap<&str, &str> = all_entities
482            .iter()
483            .map(|e| (e.id.as_str(), e.name.as_str()))
484            .collect();
485
486        let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
487        let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
488
489        for entity in &all_entities {
490            if let Some(ref pid) = entity.parent_id {
491                if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
492                    if class_entity_names.contains(parent_name) {
493                        enclosing_class.insert(entity.id.as_str(), parent_name);
494                        class_members
495                            .entry(parent_name)
496                            .or_default()
497                            .push((entity.name.as_str(), entity.id.as_str()));
498                    }
499                }
500            }
501        }
502
503        // Build import table from ALL files (imports may reference stale entities)
504        let import_table = build_import_table(root, all_file_paths, &symbol_table, &entity_map);
505
506        // Run scope-aware resolver only on files that need resolution
507        let resolve_file_paths: Vec<String> = all_file_paths
508            .iter()
509            .filter(|f| {
510                // Include file if any entity in needs_resolution belongs to it
511                stale_set.contains(f.as_str()) || all_entities.iter().any(|e| {
512                    e.file_path == **f && affected_clean_ids.contains(&e.id)
513                })
514            })
515            .cloned()
516            .collect();
517
518        let has_scope_lang = resolve_file_paths.iter().any(|f| {
519            f.ends_with(".py") || f.ends_with(".ts") || f.ends_with(".tsx")
520                || f.ends_with(".js") || f.ends_with(".jsx")
521                || f.ends_with(".rs") || f.ends_with(".go")
522        });
523        let (scope_edges, scope_resolved_entities) = if has_scope_lang {
524            let result = scope_resolve::resolve_with_scopes(root, &resolve_file_paths, &all_entities, &entity_map);
525            let resolved_entity_ids: HashSet<String> = result.edges.iter()
526                .map(|(from, _, _)| from.clone())
527                .collect();
528            (result.edges, resolved_entity_ids)
529        } else {
530            (vec![], HashSet::new())
531        };
532
533        // Resolve references only for entities in needs_resolution
534        let resolved_refs: Vec<(String, String, RefType)> = all_entities
535            .par_iter()
536            .filter(|e| needs_resolution.contains(e.id.as_str()))
537            .flat_map(|entity| {
538                if scope_resolved_entities.contains(&entity.id) {
539                    return vec![];
540                }
541
542                let mut entity_edges = Vec::new();
543                let mut consumed_words: HashSet<String> = HashSet::new();
544
545                // Phase 1: Dot-chain resolution
546                let stripped = strip_comments_and_strings(&entity.content);
547                let dot_chains = extract_dot_chains(&stripped);
548
549                for (receiver, member) in &dot_chains {
550                    if *receiver == "self" || *receiver == "this" {
551                        if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
552                            if let Some(members) = class_members.get(class_name) {
553                                for (n, tid) in members {
554                                    if *n == *member && *tid != entity.id.as_str() {
555                                        entity_edges.push((
556                                            entity.id.clone(),
557                                            tid.to_string(),
558                                            RefType::Calls,
559                                        ));
560                                        consumed_words.insert(member.to_string());
561                                        break;
562                                    }
563                                }
564                            }
565                        }
566                    } else if class_entity_names.contains(*receiver) {
567                        if let Some(members) = class_members.get(*receiver) {
568                            for (n, tid) in members {
569                                if *n == *member {
570                                    entity_edges.push((
571                                        entity.id.clone(),
572                                        tid.to_string(),
573                                        RefType::Calls,
574                                    ));
575                                    consumed_words.insert(member.to_string());
576                                    consumed_words.insert(receiver.to_string());
577                                    break;
578                                }
579                            }
580                        }
581                    }
582                }
583
584                // Phase 2: Bag-of-words resolution
585                let refs = extract_references_from_content(&entity.content, &entity.name);
586                for ref_name in refs {
587                    if consumed_words.contains(ref_name) {
588                        continue;
589                    }
590                    if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
591                        continue;
592                    }
593
594                    let import_key = (entity.file_path.clone(), ref_name.to_string());
595                    if let Some(import_target_id) = import_table.get(&import_key) {
596                        if import_target_id != &entity.id
597                            && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
598                            && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
599                        {
600                            let ref_type = infer_ref_type(&entity.content, &ref_name);
601                            entity_edges.push((
602                                entity.id.clone(),
603                                import_target_id.clone(),
604                                ref_type,
605                            ));
606                        }
607                        continue;
608                    }
609
610                    if let Some(target_ids) = symbol_table.get(ref_name) {
611                        let target = target_ids
612                            .iter()
613                            .find(|id| {
614                                *id != &entity.id
615                                    && entity_map
616                                        .get(*id)
617                                        .map_or(false, |e| e.file_path == entity.file_path)
618                            });
619
620                        if let Some(target_id) = target {
621                            if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
622                                || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
623                            {
624                                continue;
625                            }
626                            let ref_type = infer_ref_type(&entity.content, &ref_name);
627                            entity_edges.push((
628                                entity.id.clone(),
629                                target_id.clone(),
630                                ref_type,
631                            ));
632                        }
633                    }
634                }
635                entity_edges
636            })
637            .collect();
638
639        // Merge scope edges + bag-of-words edges + kept cached edges
640        let mut all_resolved: Vec<(String, String, RefType)> = scope_edges;
641        all_resolved.extend(resolved_refs);
642        let mut seen_edges: HashSet<(String, String)> = HashSet::new();
643        all_resolved.retain(|e| seen_edges.insert((e.0.clone(), e.1.clone())));
644
645        // Build final edge list: kept edges + newly resolved edges
646        let mut edges: Vec<EntityRef> = Vec::with_capacity(kept_edges.len() + all_resolved.len());
647        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
648        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
649
650        // Track all edge pairs for dedup
651        let mut all_edge_pairs: HashSet<(String, String)> = HashSet::new();
652
653        // Add kept cached edges
654        for edge in kept_edges {
655            all_edge_pairs.insert((edge.from_entity.clone(), edge.to_entity.clone()));
656            dependents
657                .entry(edge.to_entity.clone())
658                .or_default()
659                .push(edge.from_entity.clone());
660            dependencies
661                .entry(edge.from_entity.clone())
662                .or_default()
663                .push(edge.to_entity.clone());
664            edges.push(edge);
665        }
666
667        // Add newly resolved edges, dedup against kept edges
668        for (from_entity, to_entity, ref_type) in all_resolved {
669            if !all_edge_pairs.insert((from_entity.clone(), to_entity.clone())) {
670                continue;
671            }
672            dependents
673                .entry(to_entity.clone())
674                .or_default()
675                .push(from_entity.clone());
676            dependencies
677                .entry(from_entity.clone())
678                .or_default()
679                .push(to_entity.clone());
680            edges.push(EntityRef {
681                from_entity,
682                to_entity,
683                ref_type,
684            });
685        }
686
687        let graph = EntityGraph {
688            entities: entity_map,
689            edges,
690            dependents,
691            dependencies,
692        };
693
694        (graph, all_entities)
695    }
696
697    /// Get entities that depend on the given entity (reverse deps).
698    pub fn get_dependents(&self, entity_id: &str) -> Vec<&EntityInfo> {
699        self.dependents
700            .get(entity_id)
701            .map(|ids| {
702                ids.iter()
703                    .filter_map(|id| self.entities.get(id))
704                    .collect()
705            })
706            .unwrap_or_default()
707    }
708
709    /// Get entities that the given entity depends on (forward deps).
710    pub fn get_dependencies(&self, entity_id: &str) -> Vec<&EntityInfo> {
711        self.dependencies
712            .get(entity_id)
713            .map(|ids| {
714                ids.iter()
715                    .filter_map(|id| self.entities.get(id))
716                    .collect()
717            })
718            .unwrap_or_default()
719    }
720
721    /// Impact analysis: if the given entity changes, what else might be affected?
722    /// Returns all transitive dependents (breadth-first), capped at 10k.
723    pub fn impact_analysis(&self, entity_id: &str) -> Vec<&EntityInfo> {
724        self.impact_analysis_capped(entity_id, 10_000)
725    }
726
727    /// Impact analysis with a cap on maximum nodes visited.
728    /// Returns transitive dependents up to the cap. Uses borrowed strings.
729    pub fn impact_analysis_capped(&self, entity_id: &str, max_visited: usize) -> Vec<&EntityInfo> {
730        let mut visited: HashSet<&str> = HashSet::new();
731        let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
732        let mut result = Vec::new();
733
734        let start_key = match self.entities.get_key_value(entity_id) {
735            Some((k, _)) => k.as_str(),
736            None => return result,
737        };
738
739        queue.push_back(start_key);
740        visited.insert(start_key);
741
742        while let Some(current) = queue.pop_front() {
743            if result.len() >= max_visited {
744                break;
745            }
746            if let Some(deps) = self.dependents.get(current) {
747                for dep in deps {
748                    if visited.insert(dep.as_str()) {
749                        if let Some(info) = self.entities.get(dep.as_str()) {
750                            result.push(info);
751                        }
752                        queue.push_back(dep.as_str());
753                        if result.len() >= max_visited {
754                            break;
755                        }
756                    }
757                }
758            }
759        }
760
761        result
762    }
763
764    /// Count transitive dependents without collecting them (faster for large graphs).
765    /// Uses borrowed strings to avoid allocation overhead.
766    pub fn impact_count(&self, entity_id: &str, max_count: usize) -> usize {
767        let mut visited: HashSet<&str> = HashSet::new();
768        let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
769        let mut count = 0;
770
771        // We need entity_id to live long enough; look it up in our entities map
772        let start_key = match self.entities.get_key_value(entity_id) {
773            Some((k, _)) => k.as_str(),
774            None => return 0,
775        };
776
777        queue.push_back(start_key);
778        visited.insert(start_key);
779
780        while let Some(current) = queue.pop_front() {
781            if count >= max_count {
782                break;
783            }
784            if let Some(deps) = self.dependents.get(current) {
785                for dep in deps {
786                    if visited.insert(dep.as_str()) {
787                        count += 1;
788                        queue.push_back(dep.as_str());
789                        if count >= max_count {
790                            break;
791                        }
792                    }
793                }
794            }
795        }
796
797        count
798    }
799
800    /// Filter entities to those that look like tests.
801    /// Uses name heuristics, file path patterns, and content patterns.
802    pub fn filter_test_entities(&self, entities: &[crate::model::entity::SemanticEntity]) -> HashSet<String> {
803        let mut test_ids = HashSet::new();
804        for entity in entities {
805            if is_test_entity(entity) {
806                test_ids.insert(entity.id.clone());
807            }
808        }
809        test_ids
810    }
811
812    /// Impact analysis filtered to test entities only.
813    /// Returns transitive dependents that are test functions/methods.
814    pub fn test_impact(
815        &self,
816        entity_id: &str,
817        all_entities: &[crate::model::entity::SemanticEntity],
818    ) -> Vec<&EntityInfo> {
819        let test_ids = self.filter_test_entities(all_entities);
820        let impact = self.impact_analysis(entity_id);
821        impact
822            .into_iter()
823            .filter(|info| test_ids.contains(&info.id))
824            .collect()
825    }
826
827    /// Incrementally update the graph from a set of changed files.
828    ///
829    /// Instead of rebuilding the entire graph, this only re-extracts entities
830    /// from changed files and re-resolves their references. This is faster
831    /// than a full rebuild when only a few files changed.
832    ///
833    /// For each changed file:
834    /// - Deleted: remove all entities from that file, prune edges
835    /// - Added/Modified: remove old entities, extract new ones, rebuild references
836    /// - Renamed: update file paths in entity info
837    pub fn update_from_changes(
838        &mut self,
839        changed_files: &[FileChange],
840        root: &Path,
841        registry: &ParserRegistry,
842    ) {
843        let mut affected_files: HashSet<String> = HashSet::new();
844        let mut new_entities: Vec<SemanticEntity> = Vec::new();
845
846        for change in changed_files {
847            affected_files.insert(change.file_path.clone());
848            if let Some(ref old_path) = change.old_file_path {
849                affected_files.insert(old_path.clone());
850            }
851
852            match change.status {
853                FileStatus::Deleted => {
854                    self.remove_entities_for_file(&change.file_path);
855                }
856                FileStatus::Renamed => {
857                    // Update file paths for renamed files
858                    if let Some(ref old_path) = change.old_file_path {
859                        self.remove_entities_for_file(old_path);
860                    }
861                    // Extract entities from the new file
862                    if let Some(entities) = self.extract_file_entities(
863                        &change.file_path,
864                        change.after_content.as_deref(),
865                        root,
866                        registry,
867                    ) {
868                        new_entities.extend(entities);
869                    }
870                }
871                FileStatus::Added | FileStatus::Modified => {
872                    // Remove old entities for this file
873                    self.remove_entities_for_file(&change.file_path);
874                    // Extract new entities
875                    if let Some(entities) = self.extract_file_entities(
876                        &change.file_path,
877                        change.after_content.as_deref(),
878                        root,
879                        registry,
880                    ) {
881                        new_entities.extend(entities);
882                    }
883                }
884            }
885        }
886
887        // Add new entities to the entity map
888        for entity in &new_entities {
889            self.entities.insert(
890                entity.id.clone(),
891                EntityInfo {
892                    id: entity.id.clone(),
893                    name: entity.name.clone(),
894                    entity_type: entity.entity_type.clone(),
895                    file_path: entity.file_path.clone(),
896                    parent_id: entity.parent_id.clone(),
897                    start_line: entity.start_line,
898                    end_line: entity.end_line,
899                },
900            );
901        }
902
903        // Rebuild the global symbol table from all current entities
904        let symbol_table = self.build_symbol_table();
905
906        // Re-resolve references for new entities
907        for entity in &new_entities {
908            self.resolve_entity_references(entity, &symbol_table);
909        }
910
911        // Also re-resolve references for entities in OTHER files that might
912        // reference entities in changed files (their targets may have changed)
913        let changed_entity_names: HashSet<String> = new_entities
914            .iter()
915            .map(|e| e.name.clone())
916            .collect();
917
918        // Find entities in unchanged files that reference any changed entity name
919        let entities_to_recheck: Vec<String> = self
920            .entities
921            .values()
922            .filter(|e| !affected_files.contains(&e.file_path))
923            .filter(|e| {
924                self.dependencies
925                    .get(&e.id)
926                    .map_or(false, |deps| {
927                        deps.iter().any(|dep_id| {
928                            self.entities
929                                .get(dep_id)
930                                .map_or(false, |dep| changed_entity_names.contains(&dep.name))
931                        })
932                    })
933            })
934            .map(|e| e.id.clone())
935            .collect();
936
937        // We don't have the full SemanticEntity for unchanged files, so we skip
938        // deep re-resolution here. The forward/reverse indexes are already updated
939        // by remove_entities_for_file and resolve_entity_references.
940        // For entities that had dangling references (their target was deleted),
941        // the edges were already pruned.
942        let _ = entities_to_recheck; // acknowledge but don't act on for now
943    }
944
945    /// Extract entities from a file, using provided content or reading from disk.
946    fn extract_file_entities(
947        &self,
948        file_path: &str,
949        content: Option<&str>,
950        root: &Path,
951        registry: &ParserRegistry,
952    ) -> Option<Vec<SemanticEntity>> {
953        let content = if let Some(c) = content {
954            c.to_string()
955        } else {
956            let full_path = root.join(file_path);
957            std::fs::read_to_string(&full_path).ok()?
958        };
959
960        let plugin = registry.get_plugin_with_content(file_path, &content)?;
961
962        Some(plugin.extract_entities(&content, file_path))
963    }
964
965    /// Remove all entities belonging to a specific file and prune their edges.
966    fn remove_entities_for_file(&mut self, file_path: &str) {
967        // Collect entity IDs to remove
968        let ids_to_remove: Vec<String> = self
969            .entities
970            .values()
971            .filter(|e| e.file_path == file_path)
972            .map(|e| e.id.clone())
973            .collect();
974
975        let id_set: HashSet<&str> = ids_to_remove.iter().map(|s| s.as_str()).collect();
976
977        // Remove from entity map
978        for id in &ids_to_remove {
979            self.entities.remove(id);
980        }
981
982        // Remove edges involving these entities
983        self.edges
984            .retain(|e| !id_set.contains(e.from_entity.as_str()) && !id_set.contains(e.to_entity.as_str()));
985
986        // Clean up dependency/dependent indexes
987        for id in &ids_to_remove {
988            // Remove forward deps
989            if let Some(deps) = self.dependencies.remove(id) {
990                // Also remove from reverse index
991                for dep in &deps {
992                    if let Some(dependents) = self.dependents.get_mut(dep) {
993                        dependents.retain(|d| d != id);
994                    }
995                }
996            }
997            // Remove reverse deps
998            if let Some(deps) = self.dependents.remove(id) {
999                // Also remove from forward index
1000                for dep in &deps {
1001                    if let Some(dependencies) = self.dependencies.get_mut(dep) {
1002                        dependencies.retain(|d| d != id);
1003                    }
1004                }
1005            }
1006        }
1007    }
1008
1009    /// Build a symbol table from all current entities.
1010    fn build_symbol_table(&self) -> HashMap<String, Vec<String>> {
1011        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::new();
1012        for entity in self.entities.values() {
1013            symbol_table
1014                .entry(entity.name.clone())
1015                .or_default()
1016                .push(entity.id.clone());
1017        }
1018        symbol_table
1019    }
1020
1021    /// Resolve references for a single entity against the symbol table.
1022    fn resolve_entity_references(
1023        &mut self,
1024        entity: &SemanticEntity,
1025        symbol_table: &HashMap<String, Vec<String>>,
1026    ) {
1027        let refs = extract_references_from_content(&entity.content, &entity.name);
1028
1029        for ref_name in refs {
1030            if let Some(target_ids) = symbol_table.get(ref_name) {
1031                let target = target_ids
1032                    .iter()
1033                    .find(|id| {
1034                        *id != &entity.id
1035                            && self
1036                                .entities
1037                                .get(*id)
1038                                .map_or(false, |e| e.file_path == entity.file_path)
1039                    })
1040                    .or_else(|| target_ids.iter().find(|id| *id != &entity.id));
1041
1042                if let Some(target_id) = target {
1043                    let ref_type = infer_ref_type(&entity.content, &ref_name);
1044                    self.edges.push(EntityRef {
1045                        from_entity: entity.id.clone(),
1046                        to_entity: target_id.clone(),
1047                        ref_type,
1048                    });
1049                    self.dependents
1050                        .entry(target_id.clone())
1051                        .or_default()
1052                        .push(entity.id.clone());
1053                    self.dependencies
1054                        .entry(entity.id.clone())
1055                        .or_default()
1056                        .push(target_id.clone());
1057                }
1058            }
1059        }
1060    }
1061}
1062
1063/// Check if an entity looks like a test based on name, file path, and content patterns.
1064fn is_test_entity(entity: &crate::model::entity::SemanticEntity) -> bool {
1065    let name = &entity.name;
1066    let path = &entity.file_path;
1067    let content = &entity.content;
1068
1069    // Name patterns
1070    if name.starts_with("test_") || name.starts_with("Test") || name.ends_with("_test") || name.ends_with("Test") {
1071        return true;
1072    }
1073    if name.starts_with("it_") || name.starts_with("describe_") || name.starts_with("spec_") {
1074        return true;
1075    }
1076
1077    // File path patterns
1078    let path_lower = path.to_lowercase();
1079    let in_test_file = path_lower.contains("/test/")
1080        || path_lower.contains("/tests/")
1081        || path_lower.contains("/spec/")
1082        || path_lower.contains("_test.")
1083        || path_lower.contains(".test.")
1084        || path_lower.contains("_spec.")
1085        || path_lower.contains(".spec.");
1086
1087    // Content patterns (test annotations/decorators)
1088    let has_test_marker = content.contains("#[test]")
1089        || content.contains("#[cfg(test)]")
1090        || content.contains("@Test")
1091        || content.contains("@pytest")
1092        || content.contains("@test")
1093        || content.contains("describe(")
1094        || content.contains("it(")
1095        || content.contains("test(");
1096
1097    in_test_file && has_test_marker
1098}
1099
1100/// Build import table: maps (file_path, imported_name) → target entity ID.
1101///
1102/// Parses `from X import Y` / `import X` / `use X` style statements from entity content
1103/// and resolves Y to the entity it refers to in the symbol table.
1104fn build_import_table(
1105    root: &Path,
1106    file_paths: &[String],
1107    symbol_table: &HashMap<String, Vec<String>>,
1108    entity_map: &HashMap<String, EntityInfo>,
1109) -> HashMap<(String, String), String> {
1110    let mut import_table: HashMap<(String, String), String> = HashMap::new();
1111
1112    for file_path in file_paths {
1113        let full_path = root.join(file_path);
1114        let content = match std::fs::read_to_string(&full_path) {
1115            Ok(c) => c,
1116            Err(_) => continue,
1117        };
1118
1119        // Join multi-line imports into single logical lines
1120        // e.g. "from .cookies import (\n    foo,\n    bar,\n)" -> "from .cookies import foo, bar"
1121        let mut logical_lines: Vec<String> = Vec::new();
1122        let mut current_line = String::new();
1123        let mut in_parens = false;
1124
1125        for line in content.lines() {
1126            let trimmed = line.trim();
1127            if in_parens {
1128                // Strip parentheses and comments
1129                let clean = trimmed.trim_end_matches(|c: char| c == ')' || c == ',');
1130                let clean = clean.split('#').next().unwrap_or(clean).trim();
1131                if !clean.is_empty() && clean != "(" {
1132                    current_line.push_str(", ");
1133                    current_line.push_str(clean);
1134                }
1135                if trimmed.contains(')') {
1136                    in_parens = false;
1137                    logical_lines.push(std::mem::take(&mut current_line));
1138                }
1139            } else if trimmed.starts_with("from ") && trimmed.contains(" import ") {
1140                if trimmed.contains('(') && !trimmed.contains(')') {
1141                    // Multi-line import starts
1142                    in_parens = true;
1143                    // Take everything before the paren
1144                    let before_paren = trimmed.split('(').next().unwrap_or(trimmed);
1145                    current_line = before_paren.trim().to_string();
1146                    // Also grab anything after the paren on this line
1147                    if let Some(after) = trimmed.split('(').nth(1) {
1148                        let after = after.trim().trim_end_matches(')').trim();
1149                        if !after.is_empty() {
1150                            current_line.push(' ');
1151                            current_line.push_str(after);
1152                        }
1153                    }
1154                } else {
1155                    logical_lines.push(trimmed.to_string());
1156                }
1157            }
1158        }
1159
1160        for logical_line in &logical_lines {
1161            if let Some(rest) = logical_line.strip_prefix("from ") {
1162                // Find " import " or " import," (multi-line imports join with comma)
1163                let import_match = rest.find(" import ")
1164                    .map(|pos| (pos, 8))
1165                    .or_else(|| rest.find(" import,").map(|pos| (pos, 8)));
1166                if let Some((import_pos, skip)) = import_match {
1167                    let module_path = &rest[..import_pos];
1168                    let names_str = &rest[import_pos + skip..];
1169
1170                    let source_module = module_path
1171                        .trim_start_matches('.')
1172                        .rsplit('.')
1173                        .next()
1174                        .unwrap_or(module_path.trim_start_matches('.'));
1175
1176                    for name_part in names_str.split(',') {
1177                        let name_part = name_part.trim();
1178                        let imported_name = name_part.split_whitespace().next().unwrap_or(name_part);
1179                        // Strip trailing parens/punctuation
1180                        let imported_name = imported_name.trim_matches(|c: char| c == '(' || c == ')' || c == ',');
1181                        if imported_name.is_empty() {
1182                            continue;
1183                        }
1184
1185                        if let Some(target_ids) = symbol_table.get(imported_name) {
1186                            let target = target_ids.iter().find(|id| {
1187                                entity_map.get(*id).map_or(false, |e| {
1188                                    let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1189                                    let stem = stem.strip_suffix(".py")
1190                                        .or_else(|| stem.strip_suffix(".ts"))
1191                                        .or_else(|| stem.strip_suffix(".js"))
1192                                        .or_else(|| stem.strip_suffix(".rs"))
1193                                        .unwrap_or(stem);
1194                                    stem == source_module
1195                                })
1196                            });
1197                            if let Some(target_id) = target {
1198                                import_table.insert(
1199                                    (file_path.clone(), imported_name.to_string()),
1200                                    target_id.clone(),
1201                                );
1202                            }
1203                        }
1204                    }
1205                }
1206            }
1207        }
1208
1209        // JS/TS imports: import { foo, bar as baz } from './module'
1210        //                import Foo from './module'
1211        let is_js_ts = file_path.ends_with(".js") || file_path.ends_with(".ts")
1212            || file_path.ends_with(".jsx") || file_path.ends_with(".tsx");
1213
1214        if is_js_ts {
1215            static JS_NAMED_RE: LazyLock<Regex> = LazyLock::new(|| {
1216                Regex::new(r#"import\s*\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#).unwrap()
1217            });
1218            static JS_DEFAULT_RE: LazyLock<Regex> = LazyLock::new(|| {
1219                Regex::new(r#"import\s+(?:type\s+)?([A-Za-z_]\w*)\s+from\s*['"]([^'"]+)['"]"#).unwrap()
1220            });
1221
1222            for cap in JS_NAMED_RE.captures_iter(&content) {
1223                let names_str = cap.get(1).unwrap().as_str();
1224                let module_path = cap.get(2).unwrap().as_str();
1225                let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1226                let source_module = strip_js_ext(source_module);
1227
1228                for name_part in names_str.split(',') {
1229                    let name_part = name_part.trim();
1230                    if name_part.is_empty() { continue; }
1231
1232                    // Handle "foo as bar" aliases and "type foo" prefixes
1233                    let (original_name, local_name) = if let Some(pos) = name_part.find(" as ") {
1234                        let orig = name_part[..pos].trim();
1235                        let local = name_part[pos + 4..].trim();
1236                        let orig = orig.strip_prefix("type ").unwrap_or(orig);
1237                        (orig, local)
1238                    } else {
1239                        let name = name_part.strip_prefix("type ").unwrap_or(name_part);
1240                        (name, name)
1241                    };
1242
1243                    if original_name.is_empty() || local_name.is_empty() { continue; }
1244
1245                    if let Some(target_ids) = symbol_table.get(original_name) {
1246                        let target = target_ids.iter().find(|id| {
1247                            entity_map.get(*id).map_or(false, |e| {
1248                                let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1249                                let stem = strip_file_ext(stem);
1250                                stem == source_module
1251                            })
1252                        });
1253                        if let Some(target_id) = target {
1254                            import_table.insert(
1255                                (file_path.clone(), local_name.to_string()),
1256                                target_id.clone(),
1257                            );
1258                        }
1259                    }
1260                }
1261            }
1262
1263            for cap in JS_DEFAULT_RE.captures_iter(&content) {
1264                let local_name = cap.get(1).unwrap().as_str();
1265                let module_path = cap.get(2).unwrap().as_str();
1266                let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1267                let source_module = strip_js_ext(source_module);
1268
1269                if let Some(target_ids) = symbol_table.get(local_name) {
1270                    let target = target_ids.iter().find(|id| {
1271                        entity_map.get(*id).map_or(false, |e| {
1272                            let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1273                            let stem = strip_file_ext(stem);
1274                            stem == source_module
1275                        })
1276                    });
1277                    if let Some(target_id) = target {
1278                        import_table.insert(
1279                            (file_path.clone(), local_name.to_string()),
1280                            target_id.clone(),
1281                        );
1282                    }
1283                }
1284            }
1285        }
1286
1287        // Rust imports: use crate::module::Name; / use crate::module::{A, B};
1288        // Also: use super::module::Name; / use self::module::Name;
1289        let is_rust = file_path.ends_with(".rs");
1290        if is_rust {
1291            static RUST_USE_SIMPLE_RE: LazyLock<Regex> = LazyLock::new(|| {
1292                // use crate::config::Config;
1293                // use super::types::Entity;
1294                // use config::Config;  (bare module path in binary crates)
1295                Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*;").unwrap()
1296            });
1297            static RUST_USE_GROUP_RE: LazyLock<Regex> = LazyLock::new(|| {
1298                // use crate::types::{Entity, ParseError};
1299                // use types::{Entity, ParseError};  (bare module path)
1300                Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)::\{([^}]+)\}\s*;").unwrap()
1301            });
1302
1303            // Build a map: module_name -> list of file paths whose stem matches
1304            // For "use crate::config::Config", module is "config", name is "Config"
1305            for cap in RUST_USE_SIMPLE_RE.captures_iter(&content) {
1306                let full_path_str = cap.get(1).unwrap().as_str();
1307                let parts: Vec<&str> = full_path_str.split("::").collect();
1308                if parts.is_empty() { continue; }
1309
1310                // Last part is the imported name, everything before is the module path
1311                let imported_name = parts[parts.len() - 1];
1312                // The module is the second-to-last part, or the first if only one part
1313                let source_module = if parts.len() >= 2 {
1314                    parts[parts.len() - 2]
1315                } else {
1316                    parts[0]
1317                };
1318
1319                resolve_rust_import(
1320                    file_path, imported_name, source_module,
1321                    symbol_table, entity_map, &mut import_table,
1322                );
1323            }
1324
1325            for cap in RUST_USE_GROUP_RE.captures_iter(&content) {
1326                let module_path = cap.get(1).unwrap().as_str();
1327                let names_str = cap.get(2).unwrap().as_str();
1328
1329                // source_module is the last segment of the module path
1330                let source_module = module_path.rsplit("::").next().unwrap_or(module_path);
1331
1332                for name_part in names_str.split(',') {
1333                    let name_part = name_part.trim();
1334                    // Handle "Name as Alias"
1335                    let (original, local) = if let Some(pos) = name_part.find(" as ") {
1336                        (&name_part[..pos], name_part[pos + 4..].trim())
1337                    } else {
1338                        (name_part, name_part)
1339                    };
1340                    let original = original.trim();
1341                    let local = local.trim();
1342                    if original.is_empty() || local.is_empty() { continue; }
1343
1344                    resolve_rust_import(
1345                        file_path, original, source_module,
1346                        symbol_table, entity_map, &mut import_table,
1347                    );
1348                    // If aliased, also map the local name
1349                    if local != original {
1350                        if let Some(target) = import_table.get(&(file_path.clone(), original.to_string())).cloned() {
1351                            import_table.insert(
1352                                (file_path.clone(), local.to_string()),
1353                                target,
1354                            );
1355                        }
1356                    }
1357                }
1358            }
1359        }
1360
1361        // Go imports: import "module/path" or import ( "module/path" )
1362        // Go uses the last path component as the package name
1363        let is_go = file_path.ends_with(".go");
1364        if is_go {
1365            static GO_IMPORT_RE: LazyLock<Regex> = LazyLock::new(|| {
1366                Regex::new(r#"(?m)"([^"]+)""#).unwrap()
1367            });
1368
1369            // Only look in import blocks
1370            let import_section = extract_go_import_section(&content);
1371            for cap in GO_IMPORT_RE.captures_iter(&import_section) {
1372                let import_path = cap.get(1).unwrap().as_str();
1373                let pkg_name = import_path.rsplit('/').next().unwrap_or(import_path);
1374
1375                // Map all entities from files matching this package name
1376                for (name, target_ids) in symbol_table.iter() {
1377                    for target_id in target_ids {
1378                        if let Some(entity) = entity_map.get(target_id) {
1379                            let stem = entity.file_path.rsplit('/').next().unwrap_or(&entity.file_path);
1380                            let stem = strip_file_ext(stem);
1381                            // Go: file stem or directory matches package name
1382                            if stem == pkg_name || entity.file_path.contains(&format!("{}/", pkg_name)) {
1383                                import_table.insert(
1384                                    (file_path.clone(), name.clone()),
1385                                    target_id.clone(),
1386                                );
1387                            }
1388                        }
1389                    }
1390                }
1391            }
1392        }
1393    }
1394
1395    import_table
1396}
1397
1398/// Resolve a Rust import: find the target entity in the symbol table
1399/// by matching the imported name against entities in files whose stem matches source_module.
1400fn resolve_rust_import(
1401    file_path: &str,
1402    imported_name: &str,
1403    source_module: &str,
1404    symbol_table: &HashMap<String, Vec<String>>,
1405    entity_map: &HashMap<String, EntityInfo>,
1406    import_table: &mut HashMap<(String, String), String>,
1407) {
1408    if let Some(target_ids) = symbol_table.get(imported_name) {
1409        let target = target_ids.iter().find(|id| {
1410            entity_map.get(*id).map_or(false, |e| {
1411                let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1412                let stem = strip_file_ext(stem);
1413                stem == source_module
1414            })
1415        });
1416        if let Some(target_id) = target {
1417            import_table.insert(
1418                (file_path.to_string(), imported_name.to_string()),
1419                target_id.clone(),
1420            );
1421        }
1422    }
1423}
1424
1425/// Extract Go import section (everything inside import blocks).
1426fn extract_go_import_section(content: &str) -> String {
1427    let mut result = String::new();
1428    let mut in_import_block = false;
1429    for line in content.lines() {
1430        let trimmed = line.trim();
1431        if trimmed.starts_with("import (") {
1432            in_import_block = true;
1433            continue;
1434        }
1435        if trimmed.starts_with("import \"") || trimmed.starts_with("import `") {
1436            result.push_str(trimmed);
1437            result.push('\n');
1438            continue;
1439        }
1440        if in_import_block {
1441            if trimmed == ")" {
1442                in_import_block = false;
1443            } else {
1444                result.push_str(trimmed);
1445                result.push('\n');
1446            }
1447        }
1448    }
1449    result
1450}
1451
1452/// Strip JS/TS extensions from a module name.
1453fn strip_js_ext(s: &str) -> &str {
1454    s.strip_suffix(".js")
1455        .or_else(|| s.strip_suffix(".ts"))
1456        .or_else(|| s.strip_suffix(".jsx"))
1457        .or_else(|| s.strip_suffix(".tsx"))
1458        .unwrap_or(s)
1459}
1460
1461/// Strip common file extensions from a filename.
1462fn strip_file_ext(s: &str) -> &str {
1463    s.strip_suffix(".py")
1464        .or_else(|| s.strip_suffix(".ts"))
1465        .or_else(|| s.strip_suffix(".js"))
1466        .or_else(|| s.strip_suffix(".tsx"))
1467        .or_else(|| s.strip_suffix(".jsx"))
1468        .or_else(|| s.strip_suffix(".rs"))
1469        .unwrap_or(s)
1470}
1471
1472/// Strip comments and string literals from content to avoid false references.
1473/// Returns a new string with comments/docstrings replaced by spaces.
1474fn strip_comments_and_strings(content: &str) -> String {
1475    let bytes = content.as_bytes();
1476    let len = bytes.len();
1477    let mut result = vec![b' '; len];
1478    let mut i = 0;
1479
1480    while i < len {
1481        // Triple-quoted strings (Python docstrings)
1482        if i + 2 < len && bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1483            i += 3;
1484            while i + 2 < len {
1485                if bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1486                    i += 3;
1487                    break;
1488                }
1489                i += 1;
1490            }
1491            continue;
1492        }
1493        if i + 2 < len && bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1494            i += 3;
1495            while i + 2 < len {
1496                if bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1497                    i += 3;
1498                    break;
1499                }
1500                i += 1;
1501            }
1502            continue;
1503        }
1504        // Double-quoted strings
1505        if bytes[i] == b'"' {
1506            i += 1;
1507            while i < len {
1508                if bytes[i] == b'\\' { i += 2; continue; }
1509                if bytes[i] == b'"' { i += 1; break; }
1510                i += 1;
1511            }
1512            continue;
1513        }
1514        // Single-quoted strings
1515        if bytes[i] == b'\'' {
1516            i += 1;
1517            while i < len {
1518                if bytes[i] == b'\\' { i += 2; continue; }
1519                if bytes[i] == b'\'' { i += 1; break; }
1520                i += 1;
1521            }
1522            continue;
1523        }
1524        // Python/Ruby single-line comments
1525        if bytes[i] == b'#' {
1526            while i < len && bytes[i] != b'\n' { i += 1; }
1527            continue;
1528        }
1529        // C-style single-line comments
1530        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'/' {
1531            while i < len && bytes[i] != b'\n' { i += 1; }
1532            continue;
1533        }
1534        // C-style block comments
1535        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
1536            i += 2;
1537            while i + 1 < len {
1538                if bytes[i] == b'*' && bytes[i + 1] == b'/' { i += 2; break; }
1539                i += 1;
1540            }
1541            continue;
1542        }
1543        // Regular code: copy through
1544        result[i] = bytes[i];
1545        i += 1;
1546    }
1547
1548    String::from_utf8_lossy(&result).into_owned()
1549}
1550
1551/// Extract dot-chains (receiver.member) from content for precise resolution.
1552/// Returns unique (receiver, member) pairs found in the content.
1553fn extract_dot_chains<'a>(content: &'a str) -> Vec<(&'a str, &'a str)> {
1554    static DOT_CHAIN_RE: LazyLock<Regex> = LazyLock::new(|| {
1555        Regex::new(r"\b([A-Za-z_]\w*)\.([A-Za-z_]\w*)").unwrap()
1556    });
1557
1558    let mut chains = Vec::new();
1559    let mut seen: HashSet<(&str, &str)> = HashSet::new();
1560    for cap in DOT_CHAIN_RE.captures_iter(content) {
1561        let receiver = cap.get(1).unwrap().as_str();
1562        let member = cap.get(2).unwrap().as_str();
1563        if seen.insert((receiver, member)) {
1564            chains.push((receiver, member));
1565        }
1566    }
1567    chains
1568}
1569
1570/// Extract identifier references from entity content using simple token analysis.
1571/// Strips comments and strings first to avoid false positives from docstrings.
1572/// Returns borrowed slices from the stripped content.
1573fn extract_references_from_content<'a>(content: &'a str, own_name: &str) -> Vec<&'a str> {
1574    // We need to figure out which words appear only in comments/strings vs real code.
1575    // Strategy: strip comments/strings, then only accept words that appear in the stripped version.
1576    let stripped = strip_comments_and_strings(content);
1577    let stripped_words: HashSet<&str> = stripped
1578        .split(|c: char| !c.is_alphanumeric() && c != '_')
1579        .filter(|w| !w.is_empty())
1580        .collect();
1581
1582    let mut refs = Vec::new();
1583    let mut seen: HashSet<&str> = HashSet::new();
1584
1585    for word in content.split(|c: char| !c.is_alphanumeric() && c != '_') {
1586        if word.is_empty() || word == own_name {
1587            continue;
1588        }
1589        if is_keyword(word) || word.len() < 2 {
1590            continue;
1591        }
1592        // Skip very short lowercase identifiers (likely local vars: i, x, a, ok, id, etc.)
1593        if word.starts_with(|c: char| c.is_lowercase()) && word.len() < 3 {
1594            continue;
1595        }
1596        if !word.starts_with(|c: char| c.is_alphabetic() || c == '_') {
1597            continue;
1598        }
1599        // Skip common local variable names that create false graph edges
1600        if is_common_local_name(word) {
1601            continue;
1602        }
1603        // Skip words that only appear in comments/strings
1604        if !stripped_words.contains(word) {
1605            continue;
1606        }
1607        if seen.insert(word) {
1608            refs.push(word);
1609        }
1610    }
1611
1612    refs
1613}
1614
1615static COMMON_LOCAL_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1616    [
1617        "result", "results", "data", "config", "value", "values",
1618        "item", "items", "input", "output", "args", "opts",
1619        "name", "path", "file", "line", "count", "index",
1620        "temp", "prev", "next", "curr", "current", "node",
1621        "left", "right", "root", "head", "tail", "body",
1622        "text", "content", "source", "target", "entry",
1623        "error", "errors", "message", "response", "request",
1624        "context", "state", "props", "event", "handler",
1625        "callback", "options", "params", "query", "list",
1626        "base", "info", "meta", "kind", "mode", "flag",
1627        "size", "length", "width", "height", "start", "stop",
1628        "begin", "done", "found", "status", "code", "test",
1629    ].into_iter().collect()
1630});
1631
1632/// Names that are overwhelmingly local variables, not entity references.
1633/// These create massive false-positive edges in the dependency graph.
1634fn is_common_local_name(word: &str) -> bool {
1635    COMMON_LOCAL_NAMES.contains(word)
1636}
1637
1638/// Infer reference type from context using word-boundary-aware matching.
1639fn infer_ref_type(content: &str, ref_name: &str) -> RefType {
1640    // Check if it's a function call: ref_name followed by ( with word boundary before.
1641    // Avoids format! allocation by finding ref_name and checking the next char.
1642    let bytes = content.as_bytes();
1643    let name_bytes = ref_name.as_bytes();
1644    let mut search_start = 0;
1645    while let Some(rel_pos) = content[search_start..].find(ref_name) {
1646        let pos = search_start + rel_pos;
1647        let after = pos + name_bytes.len();
1648        // Check next char is '('
1649        if after < bytes.len() && bytes[after] == b'(' {
1650            // Verify word boundary before
1651            let is_boundary = pos == 0 || {
1652                let prev = bytes[pos - 1];
1653                !prev.is_ascii_alphanumeric() && prev != b'_'
1654            };
1655            if is_boundary {
1656                return RefType::Calls;
1657            }
1658        }
1659        // Advance past pos to the next char boundary to avoid slicing inside a multi-byte UTF-8 char.
1660        search_start = pos + 1;
1661        while search_start < content.len() && !content.is_char_boundary(search_start) {
1662            search_start += 1;
1663        }
1664    }
1665
1666    // Check if it's in an import/use statement (line-level, not substring)
1667    for line in content.lines() {
1668        let trimmed = line.trim();
1669        if (trimmed.starts_with("import ") || trimmed.starts_with("use ")
1670            || trimmed.starts_with("from ") || trimmed.starts_with("require("))
1671            && trimmed.contains(ref_name)
1672        {
1673            return RefType::Imports;
1674        }
1675    }
1676
1677    // Default to type reference
1678    RefType::TypeRef
1679}
1680
1681static KEYWORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1682    [
1683        // Common across languages
1684        "if", "else", "for", "while", "do", "switch", "case", "break",
1685        "continue", "return", "try", "catch", "finally", "throw",
1686        "new", "delete", "typeof", "instanceof", "in", "of",
1687        "true", "false", "null", "undefined", "void", "this",
1688        "super", "class", "extends", "implements", "interface",
1689        "enum", "const", "let", "var", "function", "async",
1690        "await", "yield", "import", "export", "default", "from",
1691        "as", "static", "public", "private", "protected",
1692        "abstract", "final", "override",
1693        // Rust
1694        "fn", "pub", "mod", "use", "struct", "impl", "trait",
1695        "where", "type", "self", "Self", "mut", "ref", "match",
1696        "loop", "move", "unsafe", "extern", "crate", "dyn",
1697        // Python
1698        "def", "elif", "except", "raise", "with",
1699        "pass", "lambda", "nonlocal", "global", "assert",
1700        "True", "False", "and", "or", "not", "is",
1701        // Go
1702        "func", "package", "range", "select", "chan", "go",
1703        "defer", "map", "make", "append", "len", "cap",
1704        // C/C++
1705        "auto", "register", "volatile", "sizeof", "typedef",
1706        "template", "typename", "namespace", "virtual", "inline",
1707        "constexpr", "nullptr", "noexcept", "explicit", "friend",
1708        "operator", "using", "cout", "endl", "cerr", "cin",
1709        "printf", "scanf", "malloc", "free", "NULL", "include",
1710        "ifdef", "ifndef", "endif", "define", "pragma",
1711        // Ruby
1712        "end", "then", "elsif", "unless", "until",
1713        "begin", "rescue", "ensure", "when", "require",
1714        "attr_accessor", "attr_reader", "attr_writer",
1715        "puts", "nil", "module", "defined",
1716        // C#
1717        "internal", "sealed", "readonly",
1718        "partial", "delegate", "event", "params", "out",
1719        "object", "decimal", "sbyte", "ushort", "uint",
1720        "ulong", "nint", "nuint", "dynamic",
1721        "get", "set", "value", "init", "record",
1722        // Types (primitives)
1723        "string", "number", "boolean", "int", "float", "double",
1724        "bool", "char", "byte", "i8", "i16", "i32", "i64",
1725        "u8", "u16", "u32", "u64", "f32", "f64", "usize",
1726        "isize", "str", "String", "Vec", "Option", "Result",
1727        "Box", "Arc", "Rc", "HashMap", "HashSet", "Some",
1728        "Ok", "Err",
1729    ].into_iter().collect()
1730});
1731
1732fn is_keyword(word: &str) -> bool {
1733    KEYWORDS.contains(word)
1734}
1735
1736#[cfg(test)]
1737mod tests {
1738    use super::*;
1739    use crate::git::types::{FileChange, FileStatus};
1740    use std::io::Write;
1741    use tempfile::TempDir;
1742
1743    fn create_test_repo() -> (TempDir, ParserRegistry) {
1744        let dir = TempDir::new().unwrap();
1745        let registry = crate::parser::plugins::create_default_registry();
1746        (dir, registry)
1747    }
1748
1749    fn write_file(dir: &Path, name: &str, content: &str) {
1750        let path = dir.join(name);
1751        if let Some(parent) = path.parent() {
1752            std::fs::create_dir_all(parent).unwrap();
1753        }
1754        let mut f = std::fs::File::create(path).unwrap();
1755        f.write_all(content.as_bytes()).unwrap();
1756    }
1757
1758    #[test]
1759    fn test_incremental_add_file() {
1760        let (dir, registry) = create_test_repo();
1761        let root = dir.path();
1762
1763        // Start with one file
1764        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1765        write_file(root, "b.ts", "export function bar() { return 1; }\n");
1766
1767        let mut graph = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
1768        assert_eq!(graph.entities.len(), 2);
1769
1770        // Add a new file
1771        write_file(root, "c.ts", "export function baz() { return foo(); }\n");
1772        graph.update_from_changes(
1773            &[FileChange {
1774                file_path: "c.ts".into(),
1775                status: FileStatus::Added,
1776                old_file_path: None,
1777                before_content: None,
1778                after_content: None, // will read from disk
1779            }],
1780            root,
1781            &registry,
1782        );
1783
1784        assert_eq!(graph.entities.len(), 3);
1785        assert!(graph.entities.contains_key("c.ts::function::baz"));
1786        // baz references foo
1787        let baz_deps = graph.get_dependencies("c.ts::function::baz");
1788        assert!(
1789            baz_deps.iter().any(|d| d.name == "foo"),
1790            "baz should depend on foo. Deps: {:?}",
1791            baz_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1792        );
1793    }
1794
1795    #[test]
1796    fn test_incremental_delete_file() {
1797        let (dir, registry) = create_test_repo();
1798        let root = dir.path();
1799
1800        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1801        write_file(root, "b.ts", "export function bar() { return 1; }\n");
1802
1803        let mut graph = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
1804        assert_eq!(graph.entities.len(), 2);
1805
1806        // Delete b.ts
1807        graph.update_from_changes(
1808            &[FileChange {
1809                file_path: "b.ts".into(),
1810                status: FileStatus::Deleted,
1811                old_file_path: None,
1812                before_content: None,
1813                after_content: None,
1814            }],
1815            root,
1816            &registry,
1817        );
1818
1819        assert_eq!(graph.entities.len(), 1);
1820        assert!(!graph.entities.contains_key("b.ts::function::bar"));
1821        // foo's dependency on bar should be pruned
1822        let foo_deps = graph.get_dependencies("a.ts::function::foo");
1823        assert!(
1824            foo_deps.is_empty(),
1825            "foo's deps should be empty after bar deleted. Deps: {:?}",
1826            foo_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1827        );
1828    }
1829
1830    #[test]
1831    fn test_incremental_modify_file() {
1832        let (dir, registry) = create_test_repo();
1833        let root = dir.path();
1834
1835        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1836        write_file(root, "b.ts", "export function bar() { return 1; }\nexport function baz() { return 2; }\n");
1837
1838        let mut graph = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
1839        assert_eq!(graph.entities.len(), 3);
1840
1841        // Modify a.ts to call baz instead of bar
1842        write_file(root, "a.ts", "export function foo() { return baz(); }\n");
1843        graph.update_from_changes(
1844            &[FileChange {
1845                file_path: "a.ts".into(),
1846                status: FileStatus::Modified,
1847                old_file_path: None,
1848                before_content: None,
1849                after_content: None,
1850            }],
1851            root,
1852            &registry,
1853        );
1854
1855        assert_eq!(graph.entities.len(), 3);
1856        // foo should now depend on baz, not bar
1857        let foo_deps = graph.get_dependencies("a.ts::function::foo");
1858        let dep_names: Vec<&str> = foo_deps.iter().map(|d| d.name.as_str()).collect();
1859        assert!(dep_names.contains(&"baz"), "foo should depend on baz after modification. Deps: {:?}", dep_names);
1860        assert!(!dep_names.contains(&"bar"), "foo should no longer depend on bar. Deps: {:?}", dep_names);
1861    }
1862
1863    #[test]
1864    fn test_incremental_with_content() {
1865        let (dir, registry) = create_test_repo();
1866        let root = dir.path();
1867
1868        write_file(root, "a.ts", "export function foo() { return 1; }\n");
1869        let mut graph = EntityGraph::build(root, &["a.ts".into()], &registry);
1870        assert_eq!(graph.entities.len(), 1);
1871
1872        // Add file with content provided directly (no disk read needed)
1873        graph.update_from_changes(
1874            &[FileChange {
1875                file_path: "b.ts".into(),
1876                status: FileStatus::Added,
1877                old_file_path: None,
1878                before_content: None,
1879                after_content: Some("export function bar() { return foo(); }\n".into()),
1880            }],
1881            root,
1882            &registry,
1883        );
1884
1885        assert_eq!(graph.entities.len(), 2);
1886        let bar_deps = graph.get_dependencies("b.ts::function::bar");
1887        assert!(bar_deps.iter().any(|d| d.name == "foo"));
1888    }
1889
1890    #[test]
1891    fn test_extract_references() {
1892        let content = "function processData(input) {\n  const result = validateInput(input);\n  return transform(result);\n}";
1893        let refs = extract_references_from_content(content, "processData");
1894        assert!(refs.contains(&"validateInput"));
1895        assert!(refs.contains(&"transform"));
1896        assert!(!refs.contains(&"processData")); // self excluded
1897    }
1898
1899    #[test]
1900    fn test_extract_references_skips_keywords() {
1901        let content = "function foo() { if (true) { return false; } }";
1902        let refs = extract_references_from_content(content, "foo");
1903        assert!(!refs.contains(&"if"));
1904        assert!(!refs.contains(&"true"));
1905        assert!(!refs.contains(&"return"));
1906        assert!(!refs.contains(&"false"));
1907    }
1908
1909    #[test]
1910    fn test_infer_ref_type_call() {
1911        assert_eq!(
1912            infer_ref_type("validateInput(data)", "validateInput"),
1913            RefType::Calls,
1914        );
1915    }
1916
1917    #[test]
1918    fn test_infer_ref_type_type() {
1919        assert_eq!(
1920            infer_ref_type("let x: MyType = something", "MyType"),
1921            RefType::TypeRef,
1922        );
1923    }
1924
1925    #[test]
1926    fn test_infer_ref_type_multibyte_utf8() {
1927        // Ensure no panic when content contains multi-byte UTF-8 characters
1928        assert_eq!(
1929            infer_ref_type("let café = foo(x)", "foo"),
1930            RefType::Calls,
1931        );
1932        assert_eq!(
1933            infer_ref_type("class HandicapfrPublicationFieldsEnum:\n    É = 1\n    bar()", "bar"),
1934            RefType::Calls,
1935        );
1936        // No match should not panic either
1937        assert_eq!(
1938            infer_ref_type("// 日本語コメント\nlet x = 1", "missing"),
1939            RefType::TypeRef,
1940        );
1941    }
1942
1943    #[test]
1944    fn test_dot_chain_self_resolution() {
1945        let (dir, registry) = create_test_repo();
1946        let root = dir.path();
1947
1948        write_file(root, "service.py", "\
1949class MyService:
1950    def process(self):
1951        return self.validate()
1952
1953    def validate(self):
1954        return True
1955");
1956
1957        let graph = EntityGraph::build(root, &["service.py".into()], &registry);
1958
1959        // process should have an edge to validate via self.validate()
1960        let process_id = graph.entities.keys()
1961            .find(|id| id.contains("process"))
1962            .expect("process entity should exist");
1963        let deps = graph.get_dependencies(process_id);
1964        assert!(
1965            deps.iter().any(|d| d.name == "validate"),
1966            "process should depend on validate via self.validate(). Deps: {:?}",
1967            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1968        );
1969    }
1970
1971    #[test]
1972    fn test_dot_chain_this_resolution() {
1973        let (dir, registry) = create_test_repo();
1974        let root = dir.path();
1975
1976        write_file(root, "service.ts", "\
1977class UserService {
1978    process() {
1979        return this.validate();
1980    }
1981    validate() {
1982        return true;
1983    }
1984}
1985");
1986
1987        let graph = EntityGraph::build(root, &["service.ts".into()], &registry);
1988
1989        let process_id = graph.entities.keys()
1990            .find(|id| id.contains("process"))
1991            .expect("process entity should exist");
1992        let deps = graph.get_dependencies(process_id);
1993        assert!(
1994            deps.iter().any(|d| d.name == "validate"),
1995            "process should depend on validate via this.validate(). Deps: {:?}",
1996            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1997        );
1998    }
1999
2000    #[test]
2001    fn test_dot_chain_class_static() {
2002        let (dir, registry) = create_test_repo();
2003        let root = dir.path();
2004
2005        write_file(root, "utils.ts", "\
2006class MathUtils {
2007    static compute() { return 1; }
2008}
2009function caller() { return MathUtils.compute(); }
2010");
2011
2012        let graph = EntityGraph::build(root, &["utils.ts".into()], &registry);
2013
2014        let caller_id = graph.entities.keys()
2015            .find(|id| id.contains("caller"))
2016            .expect("caller entity should exist");
2017        let deps = graph.get_dependencies(caller_id);
2018        assert!(
2019            deps.iter().any(|d| d.name == "compute"),
2020            "caller should depend on compute via MathUtils.compute(). Deps: {:?}",
2021            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2022        );
2023    }
2024
2025    #[test]
2026    fn test_js_ts_import_resolution() {
2027        let (dir, registry) = create_test_repo();
2028        let root = dir.path();
2029
2030        write_file(root, "helper.ts", "\
2031export function helper() { return 1; }
2032");
2033        write_file(root, "main.ts", "\
2034import { helper } from './helper';
2035export function main() { return helper(); }
2036");
2037
2038        let graph = EntityGraph::build(
2039            root,
2040            &["helper.ts".into(), "main.ts".into()],
2041            &registry,
2042        );
2043
2044        let main_id = graph.entities.keys()
2045            .find(|id| id.contains("main"))
2046            .expect("main entity should exist");
2047        let deps = graph.get_dependencies(main_id);
2048        assert!(
2049            deps.iter().any(|d| d.name == "helper"),
2050            "main should depend on helper via JS import. Deps: {:?}",
2051            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2052        );
2053    }
2054
2055    #[test]
2056    fn test_dot_chain_no_false_edges() {
2057        let (dir, registry) = create_test_repo();
2058        let root = dir.path();
2059
2060        // Two classes with same method name "process".
2061        // self.process() in ClassA should NOT create edge to ClassB::process.
2062        write_file(root, "a.py", "\
2063class ClassA:
2064    def run(self):
2065        return self.process()
2066
2067    def process(self):
2068        return 1
2069");
2070        write_file(root, "b.py", "\
2071class ClassB:
2072    def process(self):
2073        return 2
2074");
2075
2076        let graph = EntityGraph::build(
2077            root,
2078            &["a.py".into(), "b.py".into()],
2079            &registry,
2080        );
2081
2082        let run_id = graph.entities.keys()
2083            .find(|id| id.contains("run"))
2084            .expect("run entity should exist");
2085        let deps = graph.get_dependencies(run_id);
2086        // Should have edge to ClassA::process, NOT ClassB::process
2087        for dep in &deps {
2088            if dep.name == "process" {
2089                assert!(
2090                    dep.file_path == "a.py",
2091                    "run's process dep should be in a.py, not {}",
2092                    dep.file_path
2093                );
2094            }
2095        }
2096    }
2097
2098    #[test]
2099    fn test_dot_chain_fallback() {
2100        let (dir, registry) = create_test_repo();
2101        let root = dir.path();
2102
2103        // someVar.unknownMethod() - "someVar" is not a class,
2104        // so the chain is unresolved and words fall through to bag-of-words.
2105        // "helper" should still resolve via bag-of-words.
2106        write_file(root, "app.ts", "\
2107export function helper() { return 1; }
2108export function caller() {
2109    const val = helper();
2110    return val;
2111}
2112");
2113
2114        let graph = EntityGraph::build(root, &["app.ts".into()], &registry);
2115
2116        let caller_id = graph.entities.keys()
2117            .find(|id| id.contains("caller"))
2118            .expect("caller entity should exist");
2119        let deps = graph.get_dependencies(caller_id);
2120        assert!(
2121            deps.iter().any(|d| d.name == "helper"),
2122            "caller should still resolve helper via bag-of-words. Deps: {:?}",
2123            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2124        );
2125    }
2126
2127}