Skip to main content

sem_core/parser/
graph.rs

1//! Entity dependency graph — cross-file reference extraction.
2//!
3//! Implements a two-pass approach inspired by arXiv:2601.08773 (Reliable Graph-RAG):
4//! Pass 1: Extract all entities, build a symbol table (name → entity ID).
5//! Pass 2: For each entity, extract identifier references from its AST subtree,
6//!         resolve them against the symbol table to create edges.
7//!
8//! This enables impact analysis: "if I change entity X, what else is affected?"
9
10use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::{Arc, LazyLock};
13
14#[cfg(feature = "parallel")]
15use rayon::prelude::*;
16use regex::Regex;
17use serde::{Deserialize, Serialize};
18
19/// Helper macro to select parallel or sequential iteration based on feature flag.
20macro_rules! maybe_par_iter {
21    ($slice:expr) => {{
22        #[cfg(feature = "parallel")]
23        { $slice.par_iter() }
24        #[cfg(not(feature = "parallel"))]
25        { $slice.iter() }
26    }};
27}
28
29use crate::git::types::{FileChange, FileStatus};
30use crate::model::entity::SemanticEntity;
31use crate::parser::registry::ParserRegistry;
32use crate::parser::scope_resolve;
33
34/// A reference from one entity to another.
35#[derive(Debug, Clone, Serialize, Deserialize)]
36#[serde(rename_all = "camelCase")]
37pub struct EntityRef {
38    pub from_entity: String,
39    pub to_entity: String,
40    pub ref_type: RefType,
41}
42
43/// Type of reference between entities.
44#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
45#[serde(rename_all = "lowercase")]
46pub enum RefType {
47    /// Function/method call
48    Calls,
49    /// Type reference (extends, implements, field type)
50    TypeRef,
51    /// Import/use statement reference
52    Imports,
53}
54
55/// A complete entity dependency graph for a set of files.
56#[derive(Debug)]
57pub struct EntityGraph {
58    /// All entities indexed by ID
59    pub entities: HashMap<String, EntityInfo>,
60    /// Edges: from_entity → [(to_entity, ref_type)]
61    pub edges: Vec<EntityRef>,
62    /// Reverse index: entity_id → entities that reference it
63    pub dependents: HashMap<String, Vec<String>>,
64    /// Forward index: entity_id → entities it references
65    pub dependencies: HashMap<String, Vec<String>>,
66}
67
68/// Minimal entity info stored in the graph.
69#[derive(Debug, Clone, Serialize, Deserialize)]
70#[serde(rename_all = "camelCase")]
71pub struct EntityInfo {
72    pub id: String,
73    pub name: String,
74    pub entity_type: String,
75    pub file_path: String,
76    #[serde(skip_serializing_if = "Option::is_none")]
77    pub parent_id: Option<String>,
78    pub start_line: usize,
79    pub end_line: usize,
80}
81
82impl EntityGraph {
83    /// Reconstruct an EntityGraph from pre-loaded parts (e.g. from a cache).
84    pub fn from_parts(entities: HashMap<String, EntityInfo>, edges: Vec<EntityRef>) -> Self {
85        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
86        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
87        for edge in &edges {
88            dependents
89                .entry(edge.to_entity.clone())
90                .or_default()
91                .push(edge.from_entity.clone());
92            dependencies
93                .entry(edge.from_entity.clone())
94                .or_default()
95                .push(edge.to_entity.clone());
96        }
97        EntityGraph {
98            entities,
99            edges,
100            dependents,
101            dependencies,
102        }
103    }
104
105    /// Build an entity graph from a set of files.
106    ///
107    /// Pass 1: Extract all entities from all files using the parser registry.
108    /// Pass 2: For each entity, find identifier tokens and resolve them against
109    ///         the symbol table to create reference edges.
110    pub fn build(
111        root: &Path,
112        file_paths: &[String],
113        registry: &ParserRegistry,
114    ) -> (Self, Vec<SemanticEntity>) {
115        // Pass 1: Extract all entities in parallel (file I/O + tree-sitter parsing)
116        // Also collect (file_path, content, tree) for scope_resolve reuse
117        let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = maybe_par_iter!(file_paths)
118            .filter_map(|file_path| {
119                let full_path = root.join(file_path);
120                let content = std::fs::read_to_string(&full_path).ok()?;
121                let (entities, tree) = registry.extract_entities_with_tree(file_path, &content)?;
122                let parsed = tree.map(|t| (file_path.clone(), content, t));
123                Some((entities, parsed))
124            })
125            .collect();
126
127        let mut all_entities: Vec<SemanticEntity> = Vec::new();
128        let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
129        for (entities, parsed) in per_file {
130            all_entities.extend(entities);
131            if let Some(p) = parsed {
132                parsed_files.push(p);
133            }
134        }
135
136        // Pass A: Build all lookup structures in a single pass over all_entities.
137        // This merges what was previously 6 separate O(E) iterations.
138        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
139        let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
140        let mut parent_child_pairs: HashSet<(&str, &str)> = HashSet::new();
141        let mut class_child_names: HashSet<(&str, &str)> = HashSet::new();
142        let mut class_entity_names: HashSet<&str> = HashSet::new();
143        let mut id_to_name: HashMap<&str, &str> = HashMap::with_capacity(all_entities.len());
144        let mut scope_entity_ranges: HashMap<String, Vec<(usize, usize, String)>> = HashMap::new();
145
146        for entity in &all_entities {
147            symbol_table
148                .entry(entity.name.clone())
149                .or_default()
150                .push(entity.id.clone());
151
152            entity_map.insert(
153                entity.id.clone(),
154                EntityInfo {
155                    id: entity.id.clone(),
156                    name: entity.name.clone(),
157                    entity_type: entity.entity_type.clone(),
158                    file_path: entity.file_path.clone(),
159                    parent_id: entity.parent_id.clone(),
160                    start_line: entity.start_line,
161                    end_line: entity.end_line,
162                },
163            );
164
165            if let Some(ref pid) = entity.parent_id {
166                parent_child_pairs.insert((pid.as_str(), entity.id.as_str()));
167                class_child_names.insert((pid.as_str(), entity.name.as_str()));
168            }
169
170            if matches!(entity.entity_type.as_str(), "class" | "struct" | "interface" | "class_type") {
171                class_entity_names.insert(entity.name.as_str());
172            }
173
174            id_to_name.insert(entity.id.as_str(), entity.name.as_str());
175
176            scope_entity_ranges.entry(entity.file_path.clone()).or_default()
177                .push((entity.start_line, entity.end_line, entity.id.clone()));
178        }
179
180        // Pass B: Build enclosing_class, class_members, and scope_class_members
181        // (depends on id_to_name, class_entity_names, and entity_map from Pass A)
182        let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
183        let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
184        let mut scope_class_members: HashMap<String, Vec<(String, String)>> = HashMap::new();
185
186        for entity in &all_entities {
187            if let Some(ref pid) = entity.parent_id {
188                if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
189                    if class_entity_names.contains(parent_name) {
190                        enclosing_class.insert(entity.id.as_str(), parent_name);
191                        class_members
192                            .entry(parent_name)
193                            .or_default()
194                            .push((entity.name.as_str(), entity.id.as_str()));
195                    }
196                }
197                // scope_class_members for scope resolver (checks entity_type of parent)
198                if let Some(parent) = entity_map.get(pid.as_str()) {
199                    if matches!(parent.entity_type.as_str(), "class" | "struct" | "interface" | "impl") {
200                        scope_class_members.entry(parent.name.clone()).or_default()
201                            .push((entity.name.clone(), entity.id.clone()));
202                    }
203                }
204            }
205            // Go receiver-based methods
206            if entity.entity_type == "method" && entity.file_path.ends_with(".go") {
207                if let Some(struct_name) = scope_resolve::extract_go_receiver_type(&entity.content) {
208                    scope_class_members.entry(struct_name).or_default()
209                        .push((entity.name.clone(), entity.id.clone()));
210                }
211            }
212        }
213
214        // Build import table: (file_path, imported_name) → target entity ID
215        // e.g. ("io_handler.py", "validate") → "core.py::function::validate"
216        let import_table = build_import_table(root, file_paths, &symbol_table, &entity_map, Some(&parsed_files));
217        // Build owned Go package index for scope resolver
218        let owned_go_pkg_index: HashMap<String, Vec<(String, String)>> = if file_paths.iter().any(|f| f.ends_with(".go")) {
219            let mut idx: HashMap<String, Vec<(String, String)>> = HashMap::new();
220            for (name, target_ids) in symbol_table.iter() {
221                for target_id in target_ids {
222                    if let Some(entity) = entity_map.get(target_id) {
223                        let file_stem = entity.file_path.rsplit('/').next().unwrap_or(&entity.file_path);
224                        let file_stem = strip_file_ext(file_stem);
225                        idx.entry(file_stem.to_string())
226                            .or_default()
227                            .push((name.clone(), target_id.clone()));
228                        if let Some(parent_start) = entity.file_path.rfind('/') {
229                            let parent_path = &entity.file_path[..parent_start];
230                            if let Some(dir_name_start) = parent_path.rfind('/') {
231                                let dir_name = &parent_path[dir_name_start + 1..];
232                                if dir_name != file_stem {
233                                    idx.entry(dir_name.to_string())
234                                        .or_default()
235                                        .push((name.clone(), target_id.clone()));
236                                }
237                            } else if !parent_path.is_empty() && parent_path != file_stem {
238                                idx.entry(parent_path.to_string())
239                                    .or_default()
240                                    .push((name.clone(), target_id.clone()));
241                            }
242                        }
243                    }
244                }
245            }
246            idx
247        } else {
248            HashMap::new()
249        };
250
251        // Wrap symbol_table in Arc to avoid expensive deep clone (621K entries)
252        let symbol_table = Arc::new(symbol_table);
253
254        let pre_built = scope_resolve::PreBuiltLookups {
255            symbol_table: Arc::clone(&symbol_table),
256            class_members: scope_class_members,
257            entity_ranges: scope_entity_ranges,
258            go_pkg_index: owned_go_pkg_index,
259        };
260
261        // Run scope-aware resolver for supported languages (reuse pre-parsed trees)
262        let has_scope_lang = file_paths.iter().any(|f| {
263            let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
264            crate::parser::plugins::code::languages::get_language_config(ext)
265                .and_then(|c| c.scope_resolve)
266                .is_some()
267        });
268        let (scope_edges, scope_resolved_entities) = if has_scope_lang {
269            let result = scope_resolve::resolve_with_scopes_full(root, file_paths, &all_entities, &entity_map, Some(parsed_files), Some(pre_built));
270            let resolved_entity_ids: HashSet<String> = result.edges.iter()
271                .map(|(from, _, _)| from.clone())
272                .collect();
273            (result.edges, resolved_entity_ids)
274        } else {
275            (vec![], HashSet::new())
276        };
277
278        // Pass 2: Extract references in parallel, then resolve against symbol table
279        // Phase 1: Dot-chain resolution (precise self.X, this.X, ClassName.X)
280        // Phase 2: Bag-of-words resolution (existing logic, skipping consumed words)
281        // Skip entities already resolved by scope resolver (Python files)
282        // Skip entities from non-code file types (JSON, SQL, etc.) that can't produce edges
283        let resolved_refs: Vec<(String, String, RefType)> = maybe_par_iter!(all_entities)
284            .flat_map(|entity| {
285                // Skip entities already resolved by scope resolver
286                if scope_resolved_entities.contains(&entity.id) {
287                    return vec![];
288                }
289
290                // Skip entities from file types that don't have language configs
291                // (JSON, SQL, YAML, etc. — they extract entities but never produce reference edges)
292                let ext = entity.file_path.rfind('.').map(|i| &entity.file_path[i..]).unwrap_or("");
293                if crate::parser::plugins::code::languages::get_language_config(ext).is_none() {
294                    return vec![];
295                }
296
297                let mut entity_edges = Vec::new();
298                let mut consumed_words: HashSet<String> = HashSet::new();
299
300                // Strip comments/strings once, reuse for both dot-chain and bag-of-words
301                let stripped = strip_comments_and_strings(&entity.content);
302
303                // Phase 1: Dot-chain resolution
304                let dot_chains = extract_dot_chains(&stripped);
305
306                for (receiver, member) in &dot_chains {
307                    if *receiver == "self" || *receiver == "this" {
308                        // self.B / this.B: resolve to sibling method in enclosing class
309                        if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
310                            if let Some(members) = class_members.get(class_name) {
311                                for (n, tid) in members {
312                                    if *n == *member && *tid != entity.id.as_str() {
313                                        entity_edges.push((
314                                            entity.id.clone(),
315                                            tid.to_string(),
316                                            RefType::Calls,
317                                        ));
318                                        consumed_words.insert(member.to_string());
319                                        break;
320                                    }
321                                }
322                            }
323                        }
324                    } else if class_entity_names.contains(*receiver) {
325                        // ClassName.B: resolve to class member
326                        if let Some(members) = class_members.get(*receiver) {
327                            for (n, tid) in members {
328                                if *n == *member {
329                                    entity_edges.push((
330                                        entity.id.clone(),
331                                        tid.to_string(),
332                                        RefType::Calls,
333                                    ));
334                                    consumed_words.insert(member.to_string());
335                                    consumed_words.insert(receiver.to_string());
336                                    break;
337                                }
338                            }
339                        }
340                    }
341                    // Unresolved chains fall through to bag-of-words below
342                }
343
344                // Phase 2: Bag-of-words resolution (skip words consumed by dot-chains)
345                // Reuse the stripped content to avoid stripping twice
346                let refs = extract_references_with_stripped(&entity.content, &entity.name, &stripped);
347                for ref_name in refs {
348                    if consumed_words.contains(ref_name) {
349                        continue;
350                    }
351
352                    // Skip references to names that are this class's own methods
353                    if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
354                        continue;
355                    }
356
357                    // Check import table first: if this file imports this name,
358                    // resolve to the import target instead of global symbol table
359                    let import_key = (entity.file_path.clone(), ref_name.to_string());
360                    if let Some(import_target_id) = import_table.get(&import_key) {
361                        if import_target_id != &entity.id
362                            && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
363                            && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
364                        {
365                            let ref_type = infer_ref_type(&entity.content, &ref_name);
366                            entity_edges.push((
367                                entity.id.clone(),
368                                import_target_id.clone(),
369                                ref_type,
370                            ));
371                        }
372                        continue;
373                    }
374
375                    if let Some(target_ids) = symbol_table.get(ref_name) {
376                        // Without an import, only resolve to entities in the same file.
377                        // Cross-file resolution is handled by the import table above.
378                        let target = target_ids
379                            .iter()
380                            .find(|id| {
381                                *id != &entity.id
382                                    && entity_map
383                                        .get(*id)
384                                        .map_or(false, |e| e.file_path == entity.file_path)
385                            });
386
387                        if let Some(target_id) = target {
388                            // Skip parent-child edges (class -> own method)
389                            if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
390                                || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
391                            {
392                                continue;
393                            }
394                            let ref_type = infer_ref_type(&entity.content, &ref_name);
395                            entity_edges.push((
396                                entity.id.clone(),
397                                target_id.clone(),
398                                ref_type,
399                            ));
400                        }
401                    }
402                }
403                entity_edges
404            })
405            .collect();
406
407        // Merge scope edges with bag-of-words edges, deduplicating
408        let mut combined: Vec<(String, String, RefType)> = scope_edges;
409        combined.extend(resolved_refs);
410        let mut seen_edges: HashSet<(String, String)> = HashSet::with_capacity(combined.len());
411        let mut all_resolved: Vec<(String, String, RefType)> = Vec::with_capacity(combined.len());
412        for edge in combined {
413            if seen_edges.insert((edge.0.clone(), edge.1.clone())) {
414                all_resolved.push(edge);
415            }
416        }
417
418        // Build edge indexes from resolved references
419        let mut edges: Vec<EntityRef> = Vec::with_capacity(all_resolved.len());
420        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
421        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
422
423        for (from_entity, to_entity, ref_type) in all_resolved {
424            dependents
425                .entry(to_entity.clone())
426                .or_default()
427                .push(from_entity.clone());
428            dependencies
429                .entry(from_entity.clone())
430                .or_default()
431                .push(to_entity.clone());
432            edges.push(EntityRef {
433                from_entity,
434                to_entity,
435                ref_type,
436            });
437        }
438
439        let graph = EntityGraph {
440            entities: entity_map,
441            edges,
442            dependents,
443            dependencies,
444        };
445
446        (graph, all_entities)
447    }
448
449    /// Incrementally build an entity graph: reparse only stale files, reuse cached data for clean files.
450    ///
451    /// Uses the same full 3-phase resolution (scope + dot-chain + bag-of-words) as `build()`,
452    /// but only runs it for entities in stale files + clean entities whose cached edges
453    /// pointed into stale files (they need re-resolution since their targets may have changed).
454    pub fn build_incremental(
455        root: &Path,
456        stale_files: &[String],
457        all_file_paths: &[String],
458        cached_entities: Vec<SemanticEntity>,
459        cached_edges: Vec<EntityRef>,
460        stale_file_cached_entities: Vec<SemanticEntity>,
461        registry: &ParserRegistry,
462    ) -> (Self, Vec<SemanticEntity>) {
463        // Build set of stale file paths for quick lookup
464        let stale_set: HashSet<&str> = stale_files.iter().map(|s| s.as_str()).collect();
465
466        // Parse stale files in parallel to get new entities + trees
467        let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = maybe_par_iter!(stale_files)
468            .filter_map(|file_path| {
469                let full_path = root.join(file_path);
470                let content = std::fs::read_to_string(&full_path).ok()?;
471                let (entities, tree) = registry.extract_entities_with_tree(file_path, &content)?;
472                let parsed = tree.map(|t| (file_path.clone(), content, t));
473                Some((entities, parsed))
474            })
475            .collect();
476
477        let mut new_entities: Vec<SemanticEntity> = Vec::new();
478        let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
479        for (entities, parsed) in per_file {
480            new_entities.extend(entities);
481            if let Some(p) = parsed {
482                parsed_files.push(p);
483            }
484        }
485
486        // Entity-level diffing: compare new stale-file entities against cached versions
487        // Build content_hash lookup from cached stale-file entities
488        let cached_hashes: HashMap<&str, &str> = stale_file_cached_entities
489            .iter()
490            .map(|e| (e.id.as_str(), e.content_hash.as_str()))
491            .collect();
492
493        // Classify new stale-file entities
494        let mut truly_changed_ids: HashSet<String> = HashSet::new();
495        let mut content_clean_ids: HashSet<String> = HashSet::new();
496        for entity in &new_entities {
497            match cached_hashes.get(entity.id.as_str()) {
498                Some(old_hash) if *old_hash == entity.content_hash.as_str() => {
499                    content_clean_ids.insert(entity.id.clone());
500                }
501                _ => {
502                    // Hash differs or entity is new
503                    truly_changed_ids.insert(entity.id.clone());
504                }
505            }
506        }
507
508        // Detect deleted entities: in cached stale but not in new
509        let new_entity_ids: HashSet<&str> = new_entities.iter().map(|e| e.id.as_str()).collect();
510        let deleted_ids: HashSet<&str> = stale_file_cached_entities
511            .iter()
512            .filter(|e| !new_entity_ids.contains(e.id.as_str()))
513            .map(|e| e.id.as_str())
514            .collect();
515
516        // Merge: cached (clean) entities + new (stale) entities
517        let all_entities: Vec<SemanticEntity> = cached_entities
518            .into_iter()
519            .chain(new_entities.into_iter())
520            .collect();
521
522        // Find affected clean entities: only care about edges pointing to truly_changed/deleted
523        let mut affected_clean_ids: HashSet<String> = HashSet::new();
524        for edge in &cached_edges {
525            let to_truly_changed = truly_changed_ids.contains(&edge.to_entity)
526                || deleted_ids.contains(edge.to_entity.as_str());
527            if to_truly_changed && !stale_set.contains(
528                all_entities.iter()
529                    .find(|e| e.id == edge.from_entity)
530                    .map(|e| e.file_path.as_str())
531                    .unwrap_or("")
532            ) {
533                affected_clean_ids.insert(edge.from_entity.clone());
534            }
535        }
536
537        // Collect all stale entity IDs (for edge filtering)
538        let stale_entity_ids: HashSet<&str> = all_entities
539            .iter()
540            .filter(|e| stale_set.contains(e.file_path.as_str()))
541            .map(|e| e.id.as_str())
542            .collect();
543
544        // Keep edges where both endpoints are in clean (non-stale) files and from_entity
545        // is not affected by target changes. Drop ALL cached edges from stale-file entities
546        // (even content_clean ones) because import/scope context may have changed even when
547        // entity content didn't. See: https://github.com/Ataraxy-Labs/sem/issues/116
548        let kept_edges: Vec<EntityRef> = cached_edges
549            .into_iter()
550            .filter(|e| {
551                let from_stale = stale_entity_ids.contains(e.from_entity.as_str());
552                let to_stale = stale_entity_ids.contains(e.to_entity.as_str());
553
554                if !from_stale && !to_stale && !affected_clean_ids.contains(&e.from_entity) {
555                    // Both endpoints in clean files, from not affected
556                    return true;
557                }
558                false
559            })
560            .collect();
561
562        // Set of entity IDs that need resolution: all stale-file entities + affected clean.
563        // Content-clean stale entities must be re-resolved because import/scope context
564        // may have changed even if entity body content is identical.
565        let needs_resolution: HashSet<&str> = all_entities
566            .iter()
567            .filter(|e| {
568                truly_changed_ids.contains(&e.id)
569                    || content_clean_ids.contains(&e.id)
570                    || affected_clean_ids.contains(&e.id)
571            })
572            .map(|e| e.id.as_str())
573            .collect();
574
575        // Now run the same resolution logic as build() but only for entities in needs_resolution.
576        // We still need the full context (symbol table, import table, etc.) from ALL entities.
577
578        // Build symbol table from all entities
579        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
580        let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
581
582        for entity in &all_entities {
583            symbol_table
584                .entry(entity.name.clone())
585                .or_default()
586                .push(entity.id.clone());
587            entity_map.insert(
588                entity.id.clone(),
589                EntityInfo {
590                    id: entity.id.clone(),
591                    name: entity.name.clone(),
592                    entity_type: entity.entity_type.clone(),
593                    file_path: entity.file_path.clone(),
594                    parent_id: entity.parent_id.clone(),
595                    start_line: entity.start_line,
596                    end_line: entity.end_line,
597                },
598            );
599        }
600
601        // Build parent-child set
602        let parent_child_pairs: HashSet<(&str, &str)> = all_entities
603            .iter()
604            .filter_map(|e| {
605                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
606            })
607            .collect();
608
609        let class_child_names: HashSet<(&str, &str)> = all_entities
610            .iter()
611            .filter_map(|e| {
612                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
613            })
614            .collect();
615
616        let class_entity_names: HashSet<&str> = all_entities
617            .iter()
618            .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
619            .map(|e| e.name.as_str())
620            .collect();
621
622        let id_to_name: HashMap<&str, &str> = all_entities
623            .iter()
624            .map(|e| (e.id.as_str(), e.name.as_str()))
625            .collect();
626
627        let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
628        let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
629
630        for entity in &all_entities {
631            if let Some(ref pid) = entity.parent_id {
632                if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
633                    if class_entity_names.contains(parent_name) {
634                        enclosing_class.insert(entity.id.as_str(), parent_name);
635                        class_members
636                            .entry(parent_name)
637                            .or_default()
638                            .push((entity.name.as_str(), entity.id.as_str()));
639                    }
640                }
641            }
642        }
643
644        // Build import table from ALL files (imports may reference stale entities)
645        let import_table = build_import_table(root, all_file_paths, &symbol_table, &entity_map, Some(&parsed_files));
646
647        // Run scope-aware resolver only on files that need resolution
648        let resolve_file_paths: Vec<String> = all_file_paths
649            .iter()
650            .filter(|f| {
651                // Include file if any entity in needs_resolution belongs to it
652                stale_set.contains(f.as_str()) || all_entities.iter().any(|e| {
653                    e.file_path == **f && affected_clean_ids.contains(&e.id)
654                })
655            })
656            .cloned()
657            .collect();
658
659        let has_scope_lang = resolve_file_paths.iter().any(|f| {
660            let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
661            crate::parser::plugins::code::languages::get_language_config(ext)
662                .and_then(|c| c.scope_resolve)
663                .is_some()
664        });
665        let (scope_edges, scope_resolved_entities) = if has_scope_lang {
666            // Pass pre-parsed stale-file trees; scope_resolve reads affected clean files from disk
667            let resolve_set: HashSet<&str> = resolve_file_paths.iter().map(|s| s.as_str()).collect();
668            let relevant_parsed: Vec<(String, String, tree_sitter::Tree)> = parsed_files
669                .into_iter()
670                .filter(|(fp, _, _)| resolve_set.contains(fp.as_str()))
671                .collect();
672            let pre = if relevant_parsed.is_empty() { None } else { Some(relevant_parsed) };
673            let result = scope_resolve::resolve_with_scopes_full(root, &resolve_file_paths, &all_entities, &entity_map, pre, None);
674            let resolved_entity_ids: HashSet<String> = result.edges.iter()
675                .map(|(from, _, _)| from.clone())
676                .collect();
677            (result.edges, resolved_entity_ids)
678        } else {
679            (vec![], HashSet::new())
680        };
681
682        // Resolve references only for entities in needs_resolution
683        let resolved_refs: Vec<(String, String, RefType)> = maybe_par_iter!(all_entities)
684            .filter(|e| needs_resolution.contains(e.id.as_str()))
685            .flat_map(|entity| {
686                if scope_resolved_entities.contains(&entity.id) {
687                    return vec![];
688                }
689
690                // Skip entities from non-code file types (JSON, SQL, etc.)
691                let ext = entity.file_path.rfind('.').map(|i| &entity.file_path[i..]).unwrap_or("");
692                if crate::parser::plugins::code::languages::get_language_config(ext).is_none() {
693                    return vec![];
694                }
695
696                let mut entity_edges = Vec::new();
697                let mut consumed_words: HashSet<String> = HashSet::new();
698
699                // Strip comments/strings once, reuse for both dot-chain and bag-of-words
700                let stripped = strip_comments_and_strings(&entity.content);
701
702                // Phase 1: Dot-chain resolution
703                let dot_chains = extract_dot_chains(&stripped);
704
705                for (receiver, member) in &dot_chains {
706                    if *receiver == "self" || *receiver == "this" {
707                        if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
708                            if let Some(members) = class_members.get(class_name) {
709                                for (n, tid) in members {
710                                    if *n == *member && *tid != entity.id.as_str() {
711                                        entity_edges.push((
712                                            entity.id.clone(),
713                                            tid.to_string(),
714                                            RefType::Calls,
715                                        ));
716                                        consumed_words.insert(member.to_string());
717                                        break;
718                                    }
719                                }
720                            }
721                        }
722                    } else if class_entity_names.contains(*receiver) {
723                        if let Some(members) = class_members.get(*receiver) {
724                            for (n, tid) in members {
725                                if *n == *member {
726                                    entity_edges.push((
727                                        entity.id.clone(),
728                                        tid.to_string(),
729                                        RefType::Calls,
730                                    ));
731                                    consumed_words.insert(member.to_string());
732                                    consumed_words.insert(receiver.to_string());
733                                    break;
734                                }
735                            }
736                        }
737                    }
738                }
739
740                // Phase 2: Bag-of-words resolution (reuse stripped content)
741                let refs = extract_references_with_stripped(&entity.content, &entity.name, &stripped);
742                for ref_name in refs {
743                    if consumed_words.contains(ref_name) {
744                        continue;
745                    }
746                    if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
747                        continue;
748                    }
749
750                    let import_key = (entity.file_path.clone(), ref_name.to_string());
751                    if let Some(import_target_id) = import_table.get(&import_key) {
752                        if import_target_id != &entity.id
753                            && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
754                            && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
755                        {
756                            let ref_type = infer_ref_type(&entity.content, &ref_name);
757                            entity_edges.push((
758                                entity.id.clone(),
759                                import_target_id.clone(),
760                                ref_type,
761                            ));
762                        }
763                        continue;
764                    }
765
766                    if let Some(target_ids) = symbol_table.get(ref_name) {
767                        let target = target_ids
768                            .iter()
769                            .find(|id| {
770                                *id != &entity.id
771                                    && entity_map
772                                        .get(*id)
773                                        .map_or(false, |e| e.file_path == entity.file_path)
774                            });
775
776                        if let Some(target_id) = target {
777                            if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
778                                || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
779                            {
780                                continue;
781                            }
782                            let ref_type = infer_ref_type(&entity.content, &ref_name);
783                            entity_edges.push((
784                                entity.id.clone(),
785                                target_id.clone(),
786                                ref_type,
787                            ));
788                        }
789                    }
790                }
791                entity_edges
792            })
793            .collect();
794
795        // Merge scope edges + bag-of-words edges + kept cached edges
796        let mut combined: Vec<(String, String, RefType)> = scope_edges;
797        combined.extend(resolved_refs);
798        let mut seen_edges: HashSet<(String, String)> = HashSet::with_capacity(combined.len());
799        let mut all_resolved: Vec<(String, String, RefType)> = Vec::with_capacity(combined.len());
800        for edge in combined {
801            if seen_edges.insert((edge.0.clone(), edge.1.clone())) {
802                all_resolved.push(edge);
803            }
804        }
805
806        // Build final edge list: kept edges + newly resolved edges
807        let mut edges: Vec<EntityRef> = Vec::with_capacity(kept_edges.len() + all_resolved.len());
808        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
809        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
810
811        // Track all edge pairs for dedup
812        let mut all_edge_pairs: HashSet<(String, String)> = HashSet::new();
813
814        // Add kept cached edges
815        for edge in kept_edges {
816            all_edge_pairs.insert((edge.from_entity.clone(), edge.to_entity.clone()));
817            dependents
818                .entry(edge.to_entity.clone())
819                .or_default()
820                .push(edge.from_entity.clone());
821            dependencies
822                .entry(edge.from_entity.clone())
823                .or_default()
824                .push(edge.to_entity.clone());
825            edges.push(edge);
826        }
827
828        // Add newly resolved edges, dedup against kept edges
829        for (from_entity, to_entity, ref_type) in all_resolved {
830            if !all_edge_pairs.insert((from_entity.clone(), to_entity.clone())) {
831                continue;
832            }
833            dependents
834                .entry(to_entity.clone())
835                .or_default()
836                .push(from_entity.clone());
837            dependencies
838                .entry(from_entity.clone())
839                .or_default()
840                .push(to_entity.clone());
841            edges.push(EntityRef {
842                from_entity,
843                to_entity,
844                ref_type,
845            });
846        }
847
848        let graph = EntityGraph {
849            entities: entity_map,
850            edges,
851            dependents,
852            dependencies,
853        };
854
855        (graph, all_entities)
856    }
857
858    /// Get entities that depend on the given entity (reverse deps).
859    pub fn get_dependents(&self, entity_id: &str) -> Vec<&EntityInfo> {
860        self.dependents
861            .get(entity_id)
862            .map(|ids| {
863                ids.iter()
864                    .filter_map(|id| self.entities.get(id))
865                    .collect()
866            })
867            .unwrap_or_default()
868    }
869
870    /// Get entities that the given entity depends on (forward deps).
871    pub fn get_dependencies(&self, entity_id: &str) -> Vec<&EntityInfo> {
872        self.dependencies
873            .get(entity_id)
874            .map(|ids| {
875                ids.iter()
876                    .filter_map(|id| self.entities.get(id))
877                    .collect()
878            })
879            .unwrap_or_default()
880    }
881
882    /// Impact analysis: if the given entity changes, what else might be affected?
883    /// Returns all transitive dependents (breadth-first), capped at 10k.
884    pub fn impact_analysis(&self, entity_id: &str) -> Vec<&EntityInfo> {
885        self.impact_analysis_capped(entity_id, 10_000)
886    }
887
888    /// Depth-limited impact analysis. Returns transitive dependents with their BFS depth.
889    /// `max_depth == 0` means unlimited. Default depth of 2 covers direct + one transitive level.
890    pub fn impact_analysis_bounded(&self, entity_id: &str, max_depth: usize) -> Vec<(&EntityInfo, usize)> {
891        let mut visited: HashSet<&str> = HashSet::new();
892        let mut queue: std::collections::VecDeque<(&str, usize)> = std::collections::VecDeque::new();
893        let mut result = Vec::new();
894
895        let start_key = match self.entities.get_key_value(entity_id) {
896            Some((k, _)) => k.as_str(),
897            None => return result,
898        };
899
900        queue.push_back((start_key, 0));
901        visited.insert(start_key);
902
903        while let Some((current, depth)) = queue.pop_front() {
904            if let Some(deps) = self.dependents.get(current) {
905                let next_depth = depth + 1;
906                if max_depth > 0 && next_depth > max_depth {
907                    continue;
908                }
909                for dep in deps {
910                    if visited.insert(dep.as_str()) {
911                        if let Some(info) = self.entities.get(dep.as_str()) {
912                            result.push((info, next_depth));
913                        }
914                        queue.push_back((dep.as_str(), next_depth));
915                    }
916                }
917            }
918        }
919
920        result
921    }
922
923    /// Impact analysis with a cap on maximum nodes visited.
924    /// Returns transitive dependents up to the cap. Uses borrowed strings.
925    pub fn impact_analysis_capped(&self, entity_id: &str, max_visited: usize) -> Vec<&EntityInfo> {
926        let mut visited: HashSet<&str> = HashSet::new();
927        let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
928        let mut result = Vec::new();
929
930        let start_key = match self.entities.get_key_value(entity_id) {
931            Some((k, _)) => k.as_str(),
932            None => return result,
933        };
934
935        queue.push_back(start_key);
936        visited.insert(start_key);
937
938        while let Some(current) = queue.pop_front() {
939            if result.len() >= max_visited {
940                break;
941            }
942            if let Some(deps) = self.dependents.get(current) {
943                for dep in deps {
944                    if visited.insert(dep.as_str()) {
945                        if let Some(info) = self.entities.get(dep.as_str()) {
946                            result.push(info);
947                        }
948                        queue.push_back(dep.as_str());
949                        if result.len() >= max_visited {
950                            break;
951                        }
952                    }
953                }
954            }
955        }
956
957        result
958    }
959
960    /// Count transitive dependents without collecting them (faster for large graphs).
961    /// Uses borrowed strings to avoid allocation overhead.
962    pub fn impact_count(&self, entity_id: &str, max_count: usize) -> usize {
963        let mut visited: HashSet<&str> = HashSet::new();
964        let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
965        let mut count = 0;
966
967        // We need entity_id to live long enough; look it up in our entities map
968        let start_key = match self.entities.get_key_value(entity_id) {
969            Some((k, _)) => k.as_str(),
970            None => return 0,
971        };
972
973        queue.push_back(start_key);
974        visited.insert(start_key);
975
976        while let Some(current) = queue.pop_front() {
977            if count >= max_count {
978                break;
979            }
980            if let Some(deps) = self.dependents.get(current) {
981                for dep in deps {
982                    if visited.insert(dep.as_str()) {
983                        count += 1;
984                        queue.push_back(dep.as_str());
985                        if count >= max_count {
986                            break;
987                        }
988                    }
989                }
990            }
991        }
992
993        count
994    }
995
996    /// Filter entities to those that look like tests.
997    /// Uses name heuristics, file path patterns, and content patterns.
998    pub fn filter_test_entities(&self, entities: &[crate::model::entity::SemanticEntity]) -> HashSet<String> {
999        let mut test_ids = HashSet::new();
1000        for entity in entities {
1001            if is_test_entity(entity) {
1002                test_ids.insert(entity.id.clone());
1003            }
1004        }
1005        test_ids
1006    }
1007
1008    /// Impact analysis filtered to test entities only.
1009    /// Returns transitive dependents that are test functions/methods.
1010    pub fn test_impact(
1011        &self,
1012        entity_id: &str,
1013        all_entities: &[crate::model::entity::SemanticEntity],
1014    ) -> Vec<&EntityInfo> {
1015        let test_ids = self.filter_test_entities(all_entities);
1016        let impact = self.impact_analysis(entity_id);
1017        impact
1018            .into_iter()
1019            .filter(|info| test_ids.contains(&info.id))
1020            .collect()
1021    }
1022
1023    /// Incrementally update the graph from a set of changed files.
1024    ///
1025    /// Instead of rebuilding the entire graph, this only re-extracts entities
1026    /// from changed files and re-resolves their references. This is faster
1027    /// than a full rebuild when only a few files changed.
1028    ///
1029    /// For each changed file:
1030    /// - Deleted: remove all entities from that file, prune edges
1031    /// - Added/Modified: remove old entities, extract new ones, rebuild references
1032    /// - Renamed: update file paths in entity info
1033    pub fn update_from_changes(
1034        &mut self,
1035        changed_files: &[FileChange],
1036        root: &Path,
1037        registry: &ParserRegistry,
1038    ) {
1039        let mut affected_files: HashSet<String> = HashSet::new();
1040        let mut new_entities: Vec<SemanticEntity> = Vec::new();
1041
1042        for change in changed_files {
1043            affected_files.insert(change.file_path.clone());
1044            if let Some(ref old_path) = change.old_file_path {
1045                affected_files.insert(old_path.clone());
1046            }
1047
1048            match change.status {
1049                FileStatus::Deleted => {
1050                    self.remove_entities_for_file(&change.file_path);
1051                }
1052                FileStatus::Renamed => {
1053                    // Update file paths for renamed files
1054                    if let Some(ref old_path) = change.old_file_path {
1055                        self.remove_entities_for_file(old_path);
1056                    }
1057                    // Extract entities from the new file
1058                    if let Some(entities) = self.extract_file_entities(
1059                        &change.file_path,
1060                        change.after_content.as_deref(),
1061                        root,
1062                        registry,
1063                    ) {
1064                        new_entities.extend(entities);
1065                    }
1066                }
1067                FileStatus::Added | FileStatus::Modified => {
1068                    // Remove old entities for this file
1069                    self.remove_entities_for_file(&change.file_path);
1070                    // Extract new entities
1071                    if let Some(entities) = self.extract_file_entities(
1072                        &change.file_path,
1073                        change.after_content.as_deref(),
1074                        root,
1075                        registry,
1076                    ) {
1077                        new_entities.extend(entities);
1078                    }
1079                }
1080            }
1081        }
1082
1083        // Add new entities to the entity map
1084        for entity in &new_entities {
1085            self.entities.insert(
1086                entity.id.clone(),
1087                EntityInfo {
1088                    id: entity.id.clone(),
1089                    name: entity.name.clone(),
1090                    entity_type: entity.entity_type.clone(),
1091                    file_path: entity.file_path.clone(),
1092                    parent_id: entity.parent_id.clone(),
1093                    start_line: entity.start_line,
1094                    end_line: entity.end_line,
1095                },
1096            );
1097        }
1098
1099        // Rebuild the global symbol table from all current entities
1100        let symbol_table = self.build_symbol_table();
1101
1102        // Re-resolve references for new entities
1103        for entity in &new_entities {
1104            self.resolve_entity_references(entity, &symbol_table);
1105        }
1106
1107        // Also re-resolve references for entities in OTHER files that might
1108        // reference entities in changed files (their targets may have changed)
1109        let changed_entity_names: HashSet<String> = new_entities
1110            .iter()
1111            .map(|e| e.name.clone())
1112            .collect();
1113
1114        // Find entities in unchanged files that reference any changed entity name
1115        let entities_to_recheck: Vec<String> = self
1116            .entities
1117            .values()
1118            .filter(|e| !affected_files.contains(&e.file_path))
1119            .filter(|e| {
1120                self.dependencies
1121                    .get(&e.id)
1122                    .map_or(false, |deps| {
1123                        deps.iter().any(|dep_id| {
1124                            self.entities
1125                                .get(dep_id)
1126                                .map_or(false, |dep| changed_entity_names.contains(&dep.name))
1127                        })
1128                    })
1129            })
1130            .map(|e| e.id.clone())
1131            .collect();
1132
1133        // We don't have the full SemanticEntity for unchanged files, so we skip
1134        // deep re-resolution here. The forward/reverse indexes are already updated
1135        // by remove_entities_for_file and resolve_entity_references.
1136        // For entities that had dangling references (their target was deleted),
1137        // the edges were already pruned.
1138        let _ = entities_to_recheck; // acknowledge but don't act on for now
1139    }
1140
1141    /// Extract entities from a file, using provided content or reading from disk.
1142    fn extract_file_entities(
1143        &self,
1144        file_path: &str,
1145        content: Option<&str>,
1146        root: &Path,
1147        registry: &ParserRegistry,
1148    ) -> Option<Vec<SemanticEntity>> {
1149        let content = if let Some(c) = content {
1150            c.to_string()
1151        } else {
1152            let full_path = root.join(file_path);
1153            std::fs::read_to_string(&full_path).ok()?
1154        };
1155
1156        Some(registry.extract_entities(file_path, &content))
1157    }
1158
1159    /// Remove all entities belonging to a specific file and prune their edges.
1160    fn remove_entities_for_file(&mut self, file_path: &str) {
1161        // Collect entity IDs to remove
1162        let ids_to_remove: Vec<String> = self
1163            .entities
1164            .values()
1165            .filter(|e| e.file_path == file_path)
1166            .map(|e| e.id.clone())
1167            .collect();
1168
1169        let id_set: HashSet<&str> = ids_to_remove.iter().map(|s| s.as_str()).collect();
1170
1171        // Remove from entity map
1172        for id in &ids_to_remove {
1173            self.entities.remove(id);
1174        }
1175
1176        // Remove edges involving these entities
1177        self.edges
1178            .retain(|e| !id_set.contains(e.from_entity.as_str()) && !id_set.contains(e.to_entity.as_str()));
1179
1180        // Clean up dependency/dependent indexes
1181        for id in &ids_to_remove {
1182            // Remove forward deps
1183            if let Some(deps) = self.dependencies.remove(id) {
1184                // Also remove from reverse index
1185                for dep in &deps {
1186                    if let Some(dependents) = self.dependents.get_mut(dep) {
1187                        dependents.retain(|d| d != id);
1188                    }
1189                }
1190            }
1191            // Remove reverse deps
1192            if let Some(deps) = self.dependents.remove(id) {
1193                // Also remove from forward index
1194                for dep in &deps {
1195                    if let Some(dependencies) = self.dependencies.get_mut(dep) {
1196                        dependencies.retain(|d| d != id);
1197                    }
1198                }
1199            }
1200        }
1201    }
1202
1203    /// Build a symbol table from all current entities.
1204    fn build_symbol_table(&self) -> HashMap<String, Vec<String>> {
1205        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::new();
1206        for entity in self.entities.values() {
1207            symbol_table
1208                .entry(entity.name.clone())
1209                .or_default()
1210                .push(entity.id.clone());
1211        }
1212        symbol_table
1213    }
1214
1215    /// Resolve references for a single entity against the symbol table.
1216    fn resolve_entity_references(
1217        &mut self,
1218        entity: &SemanticEntity,
1219        symbol_table: &HashMap<String, Vec<String>>,
1220    ) {
1221        let refs = extract_references_from_content(&entity.content, &entity.name);
1222
1223        for ref_name in refs {
1224            if let Some(target_ids) = symbol_table.get(ref_name) {
1225                let target = target_ids
1226                    .iter()
1227                    .find(|id| {
1228                        *id != &entity.id
1229                            && self
1230                                .entities
1231                                .get(*id)
1232                                .map_or(false, |e| e.file_path == entity.file_path)
1233                    })
1234                    .or_else(|| target_ids.iter().find(|id| *id != &entity.id));
1235
1236                if let Some(target_id) = target {
1237                    let ref_type = infer_ref_type(&entity.content, &ref_name);
1238                    self.edges.push(EntityRef {
1239                        from_entity: entity.id.clone(),
1240                        to_entity: target_id.clone(),
1241                        ref_type,
1242                    });
1243                    self.dependents
1244                        .entry(target_id.clone())
1245                        .or_default()
1246                        .push(entity.id.clone());
1247                    self.dependencies
1248                        .entry(entity.id.clone())
1249                        .or_default()
1250                        .push(target_id.clone());
1251                }
1252            }
1253        }
1254    }
1255}
1256
1257/// Check if an entity looks like a test based on name, file path, and content patterns.
1258fn is_test_entity(entity: &crate::model::entity::SemanticEntity) -> bool {
1259    let name = &entity.name;
1260    let path = &entity.file_path;
1261    let content = &entity.content;
1262
1263    // Name patterns
1264    if name.starts_with("test_") || name.starts_with("Test") || name.ends_with("_test") || name.ends_with("Test") {
1265        return true;
1266    }
1267    if name.starts_with("it_") || name.starts_with("describe_") || name.starts_with("spec_") {
1268        return true;
1269    }
1270
1271    // File path patterns
1272    let path_lower = path.to_lowercase();
1273    let in_test_file = path_lower.contains("/test/")
1274        || path_lower.contains("/tests/")
1275        || path_lower.contains("/spec/")
1276        || path_lower.contains("_test.")
1277        || path_lower.contains(".test.")
1278        || path_lower.contains("_spec.")
1279        || path_lower.contains(".spec.");
1280
1281    // Content patterns (test annotations/decorators)
1282    let has_test_marker = content.contains("#[test]")
1283        || content.contains("#[cfg(test)]")
1284        || content.contains("@Test")
1285        || content.contains("@pytest")
1286        || content.contains("@test")
1287        || content.contains("describe(")
1288        || content.contains("it(")
1289        || content.contains("test(");
1290
1291    in_test_file && has_test_marker
1292}
1293
1294/// Build import table: maps (file_path, imported_name) → target entity ID.
1295///
1296/// Parses `from X import Y` / `import X` / `use X` style statements from entity content
1297/// and resolves Y to the entity it refers to in the symbol table.
1298fn build_import_table(
1299    root: &Path,
1300    file_paths: &[String],
1301    symbol_table: &HashMap<String, Vec<String>>,
1302    entity_map: &HashMap<String, EntityInfo>,
1303    pre_parsed_content: Option<&[(String, String, tree_sitter::Tree)]>,
1304) -> HashMap<(String, String), String> {
1305    // Build a content lookup from pre-parsed files to avoid re-reading from disk
1306    let content_map: HashMap<&str, &str> = pre_parsed_content
1307        .map(|files| {
1308            files.iter().map(|(fp, content, _)| (fp.as_str(), content.as_str())).collect()
1309        })
1310        .unwrap_or_default();
1311
1312    // Go imports are handled entirely by the scope resolver (which uses an indexed approach).
1313    // We no longer need a go_pkg_index here since Go files are skipped below.
1314
1315    // Process files in parallel, each producing local import entries
1316    let per_file_imports: Vec<Vec<((String, String), String)>> = maybe_par_iter!(file_paths)
1317        .filter_map(|file_path| {
1318            // Go imports are handled entirely by the scope resolver — skip here
1319            if file_path.ends_with(".go") {
1320                return None;
1321            }
1322
1323            // Use pre-parsed content if available, otherwise read from disk
1324            let owned_content: Option<String>;
1325            let content: &str = if let Some(c) = content_map.get(file_path.as_str()) {
1326                c
1327            } else {
1328                let full_path = root.join(file_path);
1329                owned_content = std::fs::read_to_string(&full_path).ok();
1330                match owned_content.as_deref() {
1331                    Some(c) => c,
1332                    None => return None,
1333                }
1334            };
1335
1336            let mut local_imports: Vec<((String, String), String)> = Vec::new();
1337
1338            // Join multi-line imports into single logical lines
1339            // e.g. "from .cookies import (\n    foo,\n    bar,\n)" -> "from .cookies import foo, bar"
1340            let mut logical_lines: Vec<String> = Vec::new();
1341            let mut current_line = String::new();
1342            let mut in_parens = false;
1343
1344            for line in content.lines() {
1345                let trimmed = line.trim();
1346                if in_parens {
1347                    // Strip parentheses and comments
1348                    let clean = trimmed.trim_end_matches(|c: char| c == ')' || c == ',');
1349                    let clean = clean.split('#').next().unwrap_or(clean).trim();
1350                    if !clean.is_empty() && clean != "(" {
1351                        current_line.push_str(", ");
1352                        current_line.push_str(clean);
1353                    }
1354                    if trimmed.contains(')') {
1355                        in_parens = false;
1356                        logical_lines.push(std::mem::take(&mut current_line));
1357                    }
1358                } else if trimmed.starts_with("from ") && trimmed.contains(" import ") {
1359                    if trimmed.contains('(') && !trimmed.contains(')') {
1360                        // Multi-line import starts
1361                        in_parens = true;
1362                        // Take everything before the paren
1363                        let before_paren = trimmed.split('(').next().unwrap_or(trimmed);
1364                        current_line = before_paren.trim().to_string();
1365                        // Also grab anything after the paren on this line
1366                        if let Some(after) = trimmed.split('(').nth(1) {
1367                            let after = after.trim().trim_end_matches(')').trim();
1368                            if !after.is_empty() {
1369                                current_line.push(' ');
1370                                current_line.push_str(after);
1371                            }
1372                        }
1373                    } else {
1374                        logical_lines.push(trimmed.to_string());
1375                    }
1376                }
1377            }
1378
1379            for logical_line in &logical_lines {
1380                if let Some(rest) = logical_line.strip_prefix("from ") {
1381                    // Find " import " or " import," (multi-line imports join with comma)
1382                    let import_match = rest.find(" import ")
1383                        .map(|pos| (pos, 8))
1384                        .or_else(|| rest.find(" import,").map(|pos| (pos, 8)));
1385                    if let Some((import_pos, skip)) = import_match {
1386                        let module_path = &rest[..import_pos];
1387                        let names_str = &rest[import_pos + skip..];
1388
1389                        let source_module = module_path
1390                            .trim_start_matches('.')
1391                            .rsplit('.')
1392                            .next()
1393                            .unwrap_or(module_path.trim_start_matches('.'));
1394
1395                        for name_part in names_str.split(',') {
1396                            let name_part = name_part.trim();
1397                            let imported_name = name_part.split_whitespace().next().unwrap_or(name_part);
1398                            // Strip trailing parens/punctuation
1399                            let imported_name = imported_name.trim_matches(|c: char| c == '(' || c == ')' || c == ',');
1400                            if imported_name.is_empty() {
1401                                continue;
1402                            }
1403
1404                            if let Some(target_ids) = symbol_table.get(imported_name) {
1405                                let target = target_ids.iter().find(|id| {
1406                                    entity_map.get(*id).map_or(false, |e| {
1407                                        let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1408                                        let stem = stem.strip_suffix(".py")
1409                                            .or_else(|| stem.strip_suffix(".ts"))
1410                                            .or_else(|| stem.strip_suffix(".js"))
1411                                            .or_else(|| stem.strip_suffix(".rs"))
1412                                            .unwrap_or(stem);
1413                                        stem == source_module
1414                                    })
1415                                });
1416                                if let Some(target_id) = target {
1417                                    local_imports.push((
1418                                        (file_path.clone(), imported_name.to_string()),
1419                                        target_id.clone(),
1420                                    ));
1421                                }
1422                            }
1423                        }
1424                    }
1425                }
1426            }
1427
1428            // JS/TS imports: import { foo, bar as baz } from './module'
1429            //                import Foo from './module'
1430            let is_js_ts = file_path.ends_with(".js") || file_path.ends_with(".ts")
1431                || file_path.ends_with(".jsx") || file_path.ends_with(".tsx");
1432
1433            if is_js_ts {
1434                static JS_NAMED_RE: LazyLock<Regex> = LazyLock::new(|| {
1435                    Regex::new(r#"import\s*\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#).unwrap()
1436                });
1437                static JS_DEFAULT_RE: LazyLock<Regex> = LazyLock::new(|| {
1438                    Regex::new(r#"import\s+(?:type\s+)?([A-Za-z_]\w*)\s+from\s*['"]([^'"]+)['"]"#).unwrap()
1439                });
1440
1441                for cap in JS_NAMED_RE.captures_iter(content) {
1442                    let names_str = cap.get(1).unwrap().as_str();
1443                    let module_path = cap.get(2).unwrap().as_str();
1444                    let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1445                    let source_module = strip_js_ext(source_module);
1446
1447                    for name_part in names_str.split(',') {
1448                        let name_part = name_part.trim();
1449                        if name_part.is_empty() { continue; }
1450
1451                        // Handle "foo as bar" aliases and "type foo" prefixes
1452                        let (original_name, local_name) = if let Some(pos) = name_part.find(" as ") {
1453                            let orig = name_part[..pos].trim();
1454                            let local = name_part[pos + 4..].trim();
1455                            let orig = orig.strip_prefix("type ").unwrap_or(orig);
1456                            (orig, local)
1457                        } else {
1458                            let name = name_part.strip_prefix("type ").unwrap_or(name_part);
1459                            (name, name)
1460                        };
1461
1462                        if original_name.is_empty() || local_name.is_empty() { continue; }
1463
1464                        if let Some(target_ids) = symbol_table.get(original_name) {
1465                            let target = target_ids.iter().find(|id| {
1466                                entity_map.get(*id).map_or(false, |e| {
1467                                    let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1468                                    let stem = strip_file_ext(stem);
1469                                    stem == source_module
1470                                })
1471                            });
1472                            if let Some(target_id) = target {
1473                                local_imports.push((
1474                                    (file_path.clone(), local_name.to_string()),
1475                                    target_id.clone(),
1476                                ));
1477                            }
1478                        }
1479                    }
1480                }
1481
1482                for cap in JS_DEFAULT_RE.captures_iter(content) {
1483                    let local_name = cap.get(1).unwrap().as_str();
1484                    let module_path = cap.get(2).unwrap().as_str();
1485                    let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1486                    let source_module = strip_js_ext(source_module);
1487
1488                    if let Some(target_ids) = symbol_table.get(local_name) {
1489                        let target = target_ids.iter().find(|id| {
1490                            entity_map.get(*id).map_or(false, |e| {
1491                                let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1492                                let stem = strip_file_ext(stem);
1493                                stem == source_module
1494                            })
1495                        });
1496                        if let Some(target_id) = target {
1497                            local_imports.push((
1498                                (file_path.clone(), local_name.to_string()),
1499                                target_id.clone(),
1500                            ));
1501                        }
1502                    }
1503                }
1504            }
1505
1506            // Rust imports: use crate::module::Name; / use crate::module::{A, B};
1507            // Also: use super::module::Name; / use self::module::Name;
1508            let is_rust = file_path.ends_with(".rs");
1509            if is_rust {
1510                static RUST_USE_SIMPLE_RE: LazyLock<Regex> = LazyLock::new(|| {
1511                    // use crate::config::Config;
1512                    // use super::types::Entity;
1513                    // use config::Config;  (bare module path in binary crates)
1514                    Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*;").unwrap()
1515                });
1516                static RUST_USE_GROUP_RE: LazyLock<Regex> = LazyLock::new(|| {
1517                    // use crate::types::{Entity, ParseError};
1518                    // use types::{Entity, ParseError};  (bare module path)
1519                    Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)::\{([^}]+)\}\s*;").unwrap()
1520                });
1521
1522                // Use a local import table for Rust alias resolution
1523                let mut local_import_table: HashMap<(String, String), String> = HashMap::new();
1524
1525                // Build a map: module_name -> list of file paths whose stem matches
1526                // For "use crate::config::Config", module is "config", name is "Config"
1527                for cap in RUST_USE_SIMPLE_RE.captures_iter(content) {
1528                    let full_path_str = cap.get(1).unwrap().as_str();
1529                    let parts: Vec<&str> = full_path_str.split("::").collect();
1530                    if parts.is_empty() { continue; }
1531
1532                    // Last part is the imported name, everything before is the module path
1533                    let imported_name = parts[parts.len() - 1];
1534                    // The module is the second-to-last part, or the first if only one part
1535                    let source_module = if parts.len() >= 2 {
1536                        parts[parts.len() - 2]
1537                    } else {
1538                        parts[0]
1539                    };
1540
1541                    resolve_rust_import(
1542                        file_path, imported_name, source_module,
1543                        symbol_table, entity_map, &mut local_import_table,
1544                    );
1545                }
1546
1547                for cap in RUST_USE_GROUP_RE.captures_iter(content) {
1548                    let module_path = cap.get(1).unwrap().as_str();
1549                    let names_str = cap.get(2).unwrap().as_str();
1550
1551                    // source_module is the last segment of the module path
1552                    let source_module = module_path.rsplit("::").next().unwrap_or(module_path);
1553
1554                    for name_part in names_str.split(',') {
1555                        let name_part = name_part.trim();
1556                        // Handle "Name as Alias"
1557                        let (original, local) = if let Some(pos) = name_part.find(" as ") {
1558                            (&name_part[..pos], name_part[pos + 4..].trim())
1559                        } else {
1560                            (name_part, name_part)
1561                        };
1562                        let original = original.trim();
1563                        let local = local.trim();
1564                        if original.is_empty() || local.is_empty() { continue; }
1565
1566                        resolve_rust_import(
1567                            file_path, original, source_module,
1568                            symbol_table, entity_map, &mut local_import_table,
1569                        );
1570                        // If aliased, also map the local name
1571                        if local != original {
1572                            if let Some(target) = local_import_table.get(&(file_path.clone(), original.to_string())).cloned() {
1573                                local_import_table.insert(
1574                                    (file_path.clone(), local.to_string()),
1575                                    target,
1576                                );
1577                            }
1578                        }
1579                    }
1580                }
1581
1582                // Collect all Rust imports into local_imports
1583                for (key, val) in local_import_table {
1584                    local_imports.push((key, val));
1585                }
1586            }
1587
1588            // Go imports are handled by the scope resolver (avoids O(n²) import table explosion).
1589            // Skip Go files here entirely.
1590
1591            Some(local_imports)
1592        })
1593        .collect();
1594
1595    // Merge all per-file imports into a single table
1596    let mut import_table: HashMap<(String, String), String> = HashMap::new();
1597    for local_imports in per_file_imports {
1598        for (key, val) in local_imports {
1599            import_table.insert(key, val);
1600        }
1601    }
1602
1603    import_table
1604}
1605
1606/// Resolve a Rust import: find the target entity in the symbol table
1607/// by matching the imported name against entities in files whose stem matches source_module.
1608fn resolve_rust_import(
1609    file_path: &str,
1610    imported_name: &str,
1611    source_module: &str,
1612    symbol_table: &HashMap<String, Vec<String>>,
1613    entity_map: &HashMap<String, EntityInfo>,
1614    import_table: &mut HashMap<(String, String), String>,
1615) {
1616    if let Some(target_ids) = symbol_table.get(imported_name) {
1617        let target = target_ids.iter().find(|id| {
1618            entity_map.get(*id).map_or(false, |e| {
1619                let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1620                let stem = strip_file_ext(stem);
1621                stem == source_module
1622            })
1623        });
1624        if let Some(target_id) = target {
1625            import_table.insert(
1626                (file_path.to_string(), imported_name.to_string()),
1627                target_id.clone(),
1628            );
1629        }
1630    }
1631}
1632
1633/// Strip JS/TS extensions from a module name.
1634fn strip_js_ext(s: &str) -> &str {
1635    s.strip_suffix(".js")
1636        .or_else(|| s.strip_suffix(".ts"))
1637        .or_else(|| s.strip_suffix(".jsx"))
1638        .or_else(|| s.strip_suffix(".tsx"))
1639        .unwrap_or(s)
1640}
1641
1642/// Strip common file extensions from a filename.
1643fn strip_file_ext(s: &str) -> &str {
1644    s.strip_suffix(".py")
1645        .or_else(|| s.strip_suffix(".ts"))
1646        .or_else(|| s.strip_suffix(".js"))
1647        .or_else(|| s.strip_suffix(".tsx"))
1648        .or_else(|| s.strip_suffix(".jsx"))
1649        .or_else(|| s.strip_suffix(".rs"))
1650        .unwrap_or(s)
1651}
1652
1653/// Strip comments and string literals from content to avoid false references.
1654/// Returns a new string with comments/docstrings replaced by spaces.
1655fn strip_comments_and_strings(content: &str) -> String {
1656    let bytes = content.as_bytes();
1657    let len = bytes.len();
1658    let mut result = vec![b' '; len];
1659    let mut i = 0;
1660
1661    while i < len {
1662        // Triple-quoted strings (Python docstrings)
1663        if i + 2 < len && bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1664            i += 3;
1665            while i + 2 < len {
1666                if bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1667                    i += 3;
1668                    break;
1669                }
1670                i += 1;
1671            }
1672            continue;
1673        }
1674        if i + 2 < len && bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1675            i += 3;
1676            while i + 2 < len {
1677                if bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1678                    i += 3;
1679                    break;
1680                }
1681                i += 1;
1682            }
1683            continue;
1684        }
1685        // Double-quoted strings
1686        if bytes[i] == b'"' {
1687            i += 1;
1688            while i < len {
1689                if bytes[i] == b'\\' { i += 2; continue; }
1690                if bytes[i] == b'"' { i += 1; break; }
1691                i += 1;
1692            }
1693            continue;
1694        }
1695        // Single-quoted strings
1696        if bytes[i] == b'\'' {
1697            i += 1;
1698            while i < len {
1699                if bytes[i] == b'\\' { i += 2; continue; }
1700                if bytes[i] == b'\'' { i += 1; break; }
1701                i += 1;
1702            }
1703            continue;
1704        }
1705        // Python/Ruby single-line comments
1706        if bytes[i] == b'#' {
1707            while i < len && bytes[i] != b'\n' { i += 1; }
1708            continue;
1709        }
1710        // C-style single-line comments
1711        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'/' {
1712            while i < len && bytes[i] != b'\n' { i += 1; }
1713            continue;
1714        }
1715        // C-style block comments
1716        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
1717            i += 2;
1718            while i + 1 < len {
1719                if bytes[i] == b'*' && bytes[i + 1] == b'/' { i += 2; break; }
1720                i += 1;
1721            }
1722            continue;
1723        }
1724        // Regular code: copy through
1725        result[i] = bytes[i];
1726        i += 1;
1727    }
1728
1729    String::from_utf8_lossy(&result).into_owned()
1730}
1731
1732/// Extract dot-chains (receiver.member) from content for precise resolution.
1733/// Returns unique (receiver, member) pairs found in the content.
1734fn extract_dot_chains<'a>(content: &'a str) -> Vec<(&'a str, &'a str)> {
1735    static DOT_CHAIN_RE: LazyLock<Regex> = LazyLock::new(|| {
1736        Regex::new(r"\b([A-Za-z_]\w*)\.([A-Za-z_]\w*)").unwrap()
1737    });
1738
1739    let mut chains = Vec::new();
1740    let mut seen: HashSet<(&str, &str)> = HashSet::new();
1741    for cap in DOT_CHAIN_RE.captures_iter(content) {
1742        let receiver = cap.get(1).unwrap().as_str();
1743        let member = cap.get(2).unwrap().as_str();
1744        if seen.insert((receiver, member)) {
1745            chains.push((receiver, member));
1746        }
1747    }
1748    chains
1749}
1750
1751/// Extract identifier references from entity content using simple token analysis.
1752/// Strips comments and strings first to avoid false positives from docstrings.
1753/// Returns borrowed slices from the stripped content.
1754fn extract_references_from_content<'a>(content: &'a str, own_name: &str) -> Vec<&'a str> {
1755    let stripped = strip_comments_and_strings(content);
1756    extract_references_with_stripped(content, own_name, &stripped)
1757}
1758
1759/// Extract references using a pre-stripped version of the content.
1760/// Use this when you already have the stripped content (e.g. from dot-chain extraction)
1761/// to avoid stripping comments/strings twice.
1762fn extract_references_with_stripped<'a>(content: &'a str, own_name: &str, stripped: &str) -> Vec<&'a str> {
1763    let stripped_words: HashSet<&str> = stripped
1764        .split(|c: char| !c.is_alphanumeric() && c != '_')
1765        .filter(|w| !w.is_empty())
1766        .collect();
1767
1768    let mut refs = Vec::new();
1769    let mut seen: HashSet<&str> = HashSet::new();
1770
1771    for word in content.split(|c: char| !c.is_alphanumeric() && c != '_') {
1772        if word.is_empty() || word == own_name {
1773            continue;
1774        }
1775        if is_keyword(word) || word.len() < 2 {
1776            continue;
1777        }
1778        // Skip very short lowercase identifiers (likely local vars: i, x, a, ok, id, etc.)
1779        if word.starts_with(|c: char| c.is_lowercase()) && word.len() < 3 {
1780            continue;
1781        }
1782        if !word.starts_with(|c: char| c.is_alphabetic() || c == '_') {
1783            continue;
1784        }
1785        // Skip common local variable names that create false graph edges
1786        if is_common_local_name(word) {
1787            continue;
1788        }
1789        // Skip words that only appear in comments/strings
1790        if !stripped_words.contains(word) {
1791            continue;
1792        }
1793        if seen.insert(word) {
1794            refs.push(word);
1795        }
1796    }
1797
1798    refs
1799}
1800
1801static COMMON_LOCAL_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1802    [
1803        "result", "results", "data", "config", "value", "values",
1804        "item", "items", "input", "output", "args", "opts",
1805        "name", "path", "file", "line", "count", "index",
1806        "temp", "prev", "next", "curr", "current", "node",
1807        "left", "right", "root", "head", "tail", "body",
1808        "text", "content", "source", "target", "entry",
1809        "error", "errors", "message", "response", "request",
1810        "context", "state", "props", "event", "handler",
1811        "callback", "options", "params", "query", "list",
1812        "base", "info", "meta", "kind", "mode", "flag",
1813        "size", "length", "width", "height", "start", "stop",
1814        "begin", "done", "found", "status", "code",
1815    ].into_iter().collect()
1816});
1817
1818/// Names that are overwhelmingly local variables, not entity references.
1819/// These create massive false-positive edges in the dependency graph.
1820fn is_common_local_name(word: &str) -> bool {
1821    COMMON_LOCAL_NAMES.contains(word)
1822}
1823
1824/// Infer reference type from context using word-boundary-aware matching.
1825fn infer_ref_type(content: &str, ref_name: &str) -> RefType {
1826    // Check if it's a function call: ref_name followed by ( with word boundary before.
1827    // Avoids format! allocation by finding ref_name and checking the next char.
1828    let bytes = content.as_bytes();
1829    let name_bytes = ref_name.as_bytes();
1830    let mut search_start = 0;
1831    while let Some(rel_pos) = content[search_start..].find(ref_name) {
1832        let pos = search_start + rel_pos;
1833        let after = pos + name_bytes.len();
1834        // Check next char is '('
1835        if after < bytes.len() && bytes[after] == b'(' {
1836            // Verify word boundary before
1837            let is_boundary = pos == 0 || {
1838                let prev = bytes[pos - 1];
1839                !prev.is_ascii_alphanumeric() && prev != b'_'
1840            };
1841            if is_boundary {
1842                return RefType::Calls;
1843            }
1844        }
1845        // Advance past pos to the next char boundary to avoid slicing inside a multi-byte UTF-8 char.
1846        search_start = pos + 1;
1847        while search_start < content.len() && !content.is_char_boundary(search_start) {
1848            search_start += 1;
1849        }
1850    }
1851
1852    // Check if it's in an import/use statement (line-level, not substring)
1853    for line in content.lines() {
1854        let trimmed = line.trim();
1855        if (trimmed.starts_with("import ") || trimmed.starts_with("use ")
1856            || trimmed.starts_with("from ") || trimmed.starts_with("require("))
1857            && trimmed.contains(ref_name)
1858        {
1859            return RefType::Imports;
1860        }
1861    }
1862
1863    // Default to type reference
1864    RefType::TypeRef
1865}
1866
1867static KEYWORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1868    [
1869        // Common across languages
1870        "if", "else", "for", "while", "do", "switch", "case", "break",
1871        "continue", "return", "try", "catch", "finally", "throw",
1872        "new", "delete", "typeof", "instanceof", "in", "of",
1873        "true", "false", "null", "undefined", "void", "this",
1874        "super", "class", "extends", "implements", "interface",
1875        "enum", "const", "let", "var", "function", "async",
1876        "await", "yield", "import", "export", "default", "from",
1877        "as", "static", "public", "private", "protected",
1878        "abstract", "final", "override",
1879        // Rust
1880        "fn", "pub", "mod", "use", "struct", "impl", "trait",
1881        "where", "type", "self", "Self", "mut", "ref", "match",
1882        "loop", "move", "unsafe", "extern", "crate", "dyn",
1883        // Python
1884        "def", "elif", "except", "raise", "with",
1885        "pass", "lambda", "nonlocal", "global", "assert",
1886        "True", "False", "and", "or", "not", "is",
1887        // Go
1888        "func", "package", "range", "select", "chan", "go",
1889        "defer", "map", "make", "append", "len", "cap",
1890        // C/C++
1891        "auto", "register", "volatile", "sizeof", "typedef",
1892        "template", "typename", "namespace", "virtual", "inline",
1893        "constexpr", "nullptr", "noexcept", "explicit", "friend",
1894        "operator", "using", "cout", "endl", "cerr", "cin",
1895        "printf", "scanf", "malloc", "free", "NULL", "include",
1896        "ifdef", "ifndef", "endif", "define", "pragma",
1897        // Ruby
1898        "end", "then", "elsif", "unless", "until",
1899        "begin", "rescue", "ensure", "when", "require",
1900        "attr_accessor", "attr_reader", "attr_writer",
1901        "puts", "nil", "module", "defined",
1902        // C#
1903        "internal", "sealed", "readonly",
1904        "partial", "delegate", "event", "params", "out",
1905        "object", "decimal", "sbyte", "ushort", "uint",
1906        "ulong", "nint", "nuint", "dynamic",
1907        "get", "set", "value", "init", "record",
1908        // Types (primitives)
1909        "string", "number", "boolean", "int", "float", "double",
1910        "bool", "char", "byte", "i8", "i16", "i32", "i64",
1911        "u8", "u16", "u32", "u64", "f32", "f64", "usize",
1912        "isize", "str", "String", "Vec", "Option", "Result",
1913        "Box", "Arc", "Rc", "HashMap", "HashSet", "Some",
1914        "Ok", "Err",
1915    ].into_iter().collect()
1916});
1917
1918fn is_keyword(word: &str) -> bool {
1919    KEYWORDS.contains(word)
1920}
1921
1922#[cfg(test)]
1923mod tests {
1924    use super::*;
1925    use crate::git::types::{FileChange, FileStatus};
1926    use std::io::Write;
1927    use tempfile::TempDir;
1928
1929    fn create_test_repo() -> (TempDir, ParserRegistry) {
1930        let dir = TempDir::new().unwrap();
1931        let registry = crate::parser::plugins::create_default_registry();
1932        (dir, registry)
1933    }
1934
1935    fn write_file(dir: &Path, name: &str, content: &str) {
1936        let path = dir.join(name);
1937        if let Some(parent) = path.parent() {
1938            std::fs::create_dir_all(parent).unwrap();
1939        }
1940        let mut f = std::fs::File::create(path).unwrap();
1941        f.write_all(content.as_bytes()).unwrap();
1942    }
1943
1944    #[test]
1945    fn test_incremental_add_file() {
1946        let (dir, registry) = create_test_repo();
1947        let root = dir.path();
1948
1949        // Start with one file
1950        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1951        write_file(root, "b.ts", "export function bar() { return 1; }\n");
1952
1953        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
1954        assert_eq!(graph.entities.len(), 2);
1955
1956        // Add a new file
1957        write_file(root, "c.ts", "export function baz() { return foo(); }\n");
1958        graph.update_from_changes(
1959            &[FileChange {
1960                file_path: "c.ts".into(),
1961                status: FileStatus::Added,
1962                old_file_path: None,
1963                before_content: None,
1964                after_content: None, // will read from disk
1965            }],
1966            root,
1967            &registry,
1968        );
1969
1970        assert_eq!(graph.entities.len(), 3);
1971        assert!(graph.entities.contains_key("c.ts::function::baz"));
1972        // baz references foo
1973        let baz_deps = graph.get_dependencies("c.ts::function::baz");
1974        assert!(
1975            baz_deps.iter().any(|d| d.name == "foo"),
1976            "baz should depend on foo. Deps: {:?}",
1977            baz_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1978        );
1979    }
1980
1981    #[test]
1982    fn test_incremental_delete_file() {
1983        let (dir, registry) = create_test_repo();
1984        let root = dir.path();
1985
1986        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1987        write_file(root, "b.ts", "export function bar() { return 1; }\n");
1988
1989        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
1990        assert_eq!(graph.entities.len(), 2);
1991
1992        // Delete b.ts
1993        graph.update_from_changes(
1994            &[FileChange {
1995                file_path: "b.ts".into(),
1996                status: FileStatus::Deleted,
1997                old_file_path: None,
1998                before_content: None,
1999                after_content: None,
2000            }],
2001            root,
2002            &registry,
2003        );
2004
2005        assert_eq!(graph.entities.len(), 1);
2006        assert!(!graph.entities.contains_key("b.ts::function::bar"));
2007        // foo's dependency on bar should be pruned
2008        let foo_deps = graph.get_dependencies("a.ts::function::foo");
2009        assert!(
2010            foo_deps.is_empty(),
2011            "foo's deps should be empty after bar deleted. Deps: {:?}",
2012            foo_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2013        );
2014    }
2015
2016    #[test]
2017    fn test_incremental_modify_file() {
2018        let (dir, registry) = create_test_repo();
2019        let root = dir.path();
2020
2021        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
2022        write_file(root, "b.ts", "export function bar() { return 1; }\nexport function baz() { return 2; }\n");
2023
2024        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
2025        assert_eq!(graph.entities.len(), 3);
2026
2027        // Modify a.ts to call baz instead of bar
2028        write_file(root, "a.ts", "export function foo() { return baz(); }\n");
2029        graph.update_from_changes(
2030            &[FileChange {
2031                file_path: "a.ts".into(),
2032                status: FileStatus::Modified,
2033                old_file_path: None,
2034                before_content: None,
2035                after_content: None,
2036            }],
2037            root,
2038            &registry,
2039        );
2040
2041        assert_eq!(graph.entities.len(), 3);
2042        // foo should now depend on baz, not bar
2043        let foo_deps = graph.get_dependencies("a.ts::function::foo");
2044        let dep_names: Vec<&str> = foo_deps.iter().map(|d| d.name.as_str()).collect();
2045        assert!(dep_names.contains(&"baz"), "foo should depend on baz after modification. Deps: {:?}", dep_names);
2046        assert!(!dep_names.contains(&"bar"), "foo should no longer depend on bar. Deps: {:?}", dep_names);
2047    }
2048
2049    #[test]
2050    fn test_incremental_with_content() {
2051        let (dir, registry) = create_test_repo();
2052        let root = dir.path();
2053
2054        write_file(root, "a.ts", "export function foo() { return 1; }\n");
2055        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into()], &registry);
2056        assert_eq!(graph.entities.len(), 1);
2057
2058        // Add file with content provided directly (no disk read needed)
2059        graph.update_from_changes(
2060            &[FileChange {
2061                file_path: "b.ts".into(),
2062                status: FileStatus::Added,
2063                old_file_path: None,
2064                before_content: None,
2065                after_content: Some("export function bar() { return foo(); }\n".into()),
2066            }],
2067            root,
2068            &registry,
2069        );
2070
2071        assert_eq!(graph.entities.len(), 2);
2072        let bar_deps = graph.get_dependencies("b.ts::function::bar");
2073        assert!(bar_deps.iter().any(|d| d.name == "foo"));
2074    }
2075
2076    #[test]
2077    fn test_extract_references() {
2078        let content = "function processData(input) {\n  const result = validateInput(input);\n  return transform(result);\n}";
2079        let refs = extract_references_from_content(content, "processData");
2080        assert!(refs.contains(&"validateInput"));
2081        assert!(refs.contains(&"transform"));
2082        assert!(!refs.contains(&"processData")); // self excluded
2083    }
2084
2085    #[test]
2086    fn test_extract_references_skips_keywords() {
2087        let content = "function foo() { if (true) { return false; } }";
2088        let refs = extract_references_from_content(content, "foo");
2089        assert!(!refs.contains(&"if"));
2090        assert!(!refs.contains(&"true"));
2091        assert!(!refs.contains(&"return"));
2092        assert!(!refs.contains(&"false"));
2093    }
2094
2095    #[test]
2096    fn test_infer_ref_type_call() {
2097        assert_eq!(
2098            infer_ref_type("validateInput(data)", "validateInput"),
2099            RefType::Calls,
2100        );
2101    }
2102
2103    #[test]
2104    fn test_infer_ref_type_type() {
2105        assert_eq!(
2106            infer_ref_type("let x: MyType = something", "MyType"),
2107            RefType::TypeRef,
2108        );
2109    }
2110
2111    #[test]
2112    fn test_infer_ref_type_multibyte_utf8() {
2113        // Ensure no panic when content contains multi-byte UTF-8 characters
2114        assert_eq!(
2115            infer_ref_type("let café = foo(x)", "foo"),
2116            RefType::Calls,
2117        );
2118        assert_eq!(
2119            infer_ref_type("class HandicapfrPublicationFieldsEnum:\n    É = 1\n    bar()", "bar"),
2120            RefType::Calls,
2121        );
2122        // No match should not panic either
2123        assert_eq!(
2124            infer_ref_type("// 日本語コメント\nlet x = 1", "missing"),
2125            RefType::TypeRef,
2126        );
2127    }
2128
2129    #[test]
2130    fn test_dot_chain_self_resolution() {
2131        let (dir, registry) = create_test_repo();
2132        let root = dir.path();
2133
2134        write_file(root, "service.py", "\
2135class MyService:
2136    def process(self):
2137        return self.validate()
2138
2139    def validate(self):
2140        return True
2141");
2142
2143        let (graph, _) = EntityGraph::build(root, &["service.py".into()], &registry);
2144
2145        // process should have an edge to validate via self.validate()
2146        let process_id = graph.entities.keys()
2147            .find(|id| id.contains("process"))
2148            .expect("process entity should exist");
2149        let deps = graph.get_dependencies(process_id);
2150        assert!(
2151            deps.iter().any(|d| d.name == "validate"),
2152            "process should depend on validate via self.validate(). Deps: {:?}",
2153            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2154        );
2155    }
2156
2157    #[test]
2158    fn test_dot_chain_this_resolution() {
2159        let (dir, registry) = create_test_repo();
2160        let root = dir.path();
2161
2162        write_file(root, "service.ts", "\
2163class UserService {
2164    process() {
2165        return this.validate();
2166    }
2167    validate() {
2168        return true;
2169    }
2170}
2171");
2172
2173        let (graph, _) = EntityGraph::build(root, &["service.ts".into()], &registry);
2174
2175        let process_id = graph.entities.keys()
2176            .find(|id| id.contains("process"))
2177            .expect("process entity should exist");
2178        let deps = graph.get_dependencies(process_id);
2179        assert!(
2180            deps.iter().any(|d| d.name == "validate"),
2181            "process should depend on validate via this.validate(). Deps: {:?}",
2182            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2183        );
2184    }
2185
2186    #[test]
2187    fn test_dot_chain_class_static() {
2188        let (dir, registry) = create_test_repo();
2189        let root = dir.path();
2190
2191        write_file(root, "utils.ts", "\
2192class MathUtils {
2193    static compute() { return 1; }
2194}
2195function caller() { return MathUtils.compute(); }
2196");
2197
2198        let (graph, _) = EntityGraph::build(root, &["utils.ts".into()], &registry);
2199
2200        let caller_id = graph.entities.keys()
2201            .find(|id| id.contains("caller"))
2202            .expect("caller entity should exist");
2203        let deps = graph.get_dependencies(caller_id);
2204        assert!(
2205            deps.iter().any(|d| d.name == "compute"),
2206            "caller should depend on compute via MathUtils.compute(). Deps: {:?}",
2207            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2208        );
2209    }
2210
2211    #[test]
2212    fn test_js_ts_import_resolution() {
2213        let (dir, registry) = create_test_repo();
2214        let root = dir.path();
2215
2216        write_file(root, "helper.ts", "\
2217export function helper() { return 1; }
2218");
2219        write_file(root, "main.ts", "\
2220import { helper } from './helper';
2221export function main() { return helper(); }
2222");
2223
2224        let (graph, _) = EntityGraph::build(
2225            root,
2226            &["helper.ts".into(), "main.ts".into()],
2227            &registry,
2228        );
2229
2230        let main_id = graph.entities.keys()
2231            .find(|id| id.contains("main"))
2232            .expect("main entity should exist");
2233        let deps = graph.get_dependencies(main_id);
2234        assert!(
2235            deps.iter().any(|d| d.name == "helper"),
2236            "main should depend on helper via JS import. Deps: {:?}",
2237            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2238        );
2239    }
2240
2241    #[test]
2242    fn test_dot_chain_no_false_edges() {
2243        let (dir, registry) = create_test_repo();
2244        let root = dir.path();
2245
2246        // Two classes with same method name "process".
2247        // self.process() in ClassA should NOT create edge to ClassB::process.
2248        write_file(root, "a.py", "\
2249class ClassA:
2250    def run(self):
2251        return self.process()
2252
2253    def process(self):
2254        return 1
2255");
2256        write_file(root, "b.py", "\
2257class ClassB:
2258    def process(self):
2259        return 2
2260");
2261
2262        let (graph, _) = EntityGraph::build(
2263            root,
2264            &["a.py".into(), "b.py".into()],
2265            &registry,
2266        );
2267
2268        let run_id = graph.entities.keys()
2269            .find(|id| id.contains("run"))
2270            .expect("run entity should exist");
2271        let deps = graph.get_dependencies(run_id);
2272        // Should have edge to ClassA::process, NOT ClassB::process
2273        for dep in &deps {
2274            if dep.name == "process" {
2275                assert!(
2276                    dep.file_path == "a.py",
2277                    "run's process dep should be in a.py, not {}",
2278                    dep.file_path
2279                );
2280            }
2281        }
2282    }
2283
2284    #[test]
2285    fn test_dot_chain_fallback() {
2286        let (dir, registry) = create_test_repo();
2287        let root = dir.path();
2288
2289        // someVar.unknownMethod() - "someVar" is not a class,
2290        // so the chain is unresolved and words fall through to bag-of-words.
2291        // "helper" should still resolve via bag-of-words.
2292        write_file(root, "app.ts", "\
2293export function helper() { return 1; }
2294export function caller() {
2295    const val = helper();
2296    return val;
2297}
2298");
2299
2300        let (graph, _) = EntityGraph::build(root, &["app.ts".into()], &registry);
2301
2302        let caller_id = graph.entities.keys()
2303            .find(|id| id.contains("caller"))
2304            .expect("caller entity should exist");
2305        let deps = graph.get_dependencies(caller_id);
2306        assert!(
2307            deps.iter().any(|d| d.name == "helper"),
2308            "caller should still resolve helper via bag-of-words. Deps: {:?}",
2309            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2310        );
2311    }
2312
2313}