Skip to main content

sem_core/parser/
graph.rs

1//! Entity dependency graph — cross-file reference extraction.
2//!
3//! Implements a two-pass approach inspired by arXiv:2601.08773 (Reliable Graph-RAG):
4//! Pass 1: Extract all entities, build a symbol table (name → entity ID).
5//! Pass 2: For each entity, extract identifier references from its AST subtree,
6//!         resolve them against the symbol table to create edges.
7//!
8//! This enables impact analysis: "if I change entity X, what else is affected?"
9
10use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::{Arc, LazyLock};
13
14use rayon::prelude::*;
15use regex::Regex;
16use serde::{Deserialize, Serialize};
17
18use crate::git::types::{FileChange, FileStatus};
19use crate::model::entity::SemanticEntity;
20use crate::parser::registry::ParserRegistry;
21use crate::parser::scope_resolve;
22
23/// A reference from one entity to another.
24#[derive(Debug, Clone, Serialize, Deserialize)]
25#[serde(rename_all = "camelCase")]
26pub struct EntityRef {
27    pub from_entity: String,
28    pub to_entity: String,
29    pub ref_type: RefType,
30}
31
32/// Type of reference between entities.
33#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34#[serde(rename_all = "lowercase")]
35pub enum RefType {
36    /// Function/method call
37    Calls,
38    /// Type reference (extends, implements, field type)
39    TypeRef,
40    /// Import/use statement reference
41    Imports,
42}
43
44/// A complete entity dependency graph for a set of files.
45#[derive(Debug)]
46pub struct EntityGraph {
47    /// All entities indexed by ID
48    pub entities: HashMap<String, EntityInfo>,
49    /// Edges: from_entity → [(to_entity, ref_type)]
50    pub edges: Vec<EntityRef>,
51    /// Reverse index: entity_id → entities that reference it
52    pub dependents: HashMap<String, Vec<String>>,
53    /// Forward index: entity_id → entities it references
54    pub dependencies: HashMap<String, Vec<String>>,
55}
56
57/// Minimal entity info stored in the graph.
58#[derive(Debug, Clone, Serialize, Deserialize)]
59#[serde(rename_all = "camelCase")]
60pub struct EntityInfo {
61    pub id: String,
62    pub name: String,
63    pub entity_type: String,
64    pub file_path: String,
65    #[serde(skip_serializing_if = "Option::is_none")]
66    pub parent_id: Option<String>,
67    pub start_line: usize,
68    pub end_line: usize,
69}
70
71impl EntityGraph {
72    /// Reconstruct an EntityGraph from pre-loaded parts (e.g. from a cache).
73    pub fn from_parts(entities: HashMap<String, EntityInfo>, edges: Vec<EntityRef>) -> Self {
74        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
75        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
76        for edge in &edges {
77            dependents
78                .entry(edge.to_entity.clone())
79                .or_default()
80                .push(edge.from_entity.clone());
81            dependencies
82                .entry(edge.from_entity.clone())
83                .or_default()
84                .push(edge.to_entity.clone());
85        }
86        EntityGraph {
87            entities,
88            edges,
89            dependents,
90            dependencies,
91        }
92    }
93
94    /// Build an entity graph from a set of files.
95    ///
96    /// Pass 1: Extract all entities from all files using the parser registry.
97    /// Pass 2: For each entity, find identifier tokens and resolve them against
98    ///         the symbol table to create reference edges.
99    pub fn build(
100        root: &Path,
101        file_paths: &[String],
102        registry: &ParserRegistry,
103    ) -> (Self, Vec<SemanticEntity>) {
104        // Pass 1: Extract all entities in parallel (file I/O + tree-sitter parsing)
105        // Also collect (file_path, content, tree) for scope_resolve reuse
106        let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = file_paths
107            .par_iter()
108            .filter_map(|file_path| {
109                let full_path = root.join(file_path);
110                let content = std::fs::read_to_string(&full_path).ok()?;
111                let (entities, tree) = registry.extract_entities_with_tree(file_path, &content)?;
112                let parsed = tree.map(|t| (file_path.clone(), content, t));
113                Some((entities, parsed))
114            })
115            .collect();
116
117        let mut all_entities: Vec<SemanticEntity> = Vec::new();
118        let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
119        for (entities, parsed) in per_file {
120            all_entities.extend(entities);
121            if let Some(p) = parsed {
122                parsed_files.push(p);
123            }
124        }
125
126        // Pass A: Build all lookup structures in a single pass over all_entities.
127        // This merges what was previously 6 separate O(E) iterations.
128        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
129        let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
130        let mut parent_child_pairs: HashSet<(&str, &str)> = HashSet::new();
131        let mut class_child_names: HashSet<(&str, &str)> = HashSet::new();
132        let mut class_entity_names: HashSet<&str> = HashSet::new();
133        let mut id_to_name: HashMap<&str, &str> = HashMap::with_capacity(all_entities.len());
134        let mut scope_entity_ranges: HashMap<String, Vec<(usize, usize, String)>> = HashMap::new();
135
136        for entity in &all_entities {
137            symbol_table
138                .entry(entity.name.clone())
139                .or_default()
140                .push(entity.id.clone());
141
142            entity_map.insert(
143                entity.id.clone(),
144                EntityInfo {
145                    id: entity.id.clone(),
146                    name: entity.name.clone(),
147                    entity_type: entity.entity_type.clone(),
148                    file_path: entity.file_path.clone(),
149                    parent_id: entity.parent_id.clone(),
150                    start_line: entity.start_line,
151                    end_line: entity.end_line,
152                },
153            );
154
155            if let Some(ref pid) = entity.parent_id {
156                parent_child_pairs.insert((pid.as_str(), entity.id.as_str()));
157                class_child_names.insert((pid.as_str(), entity.name.as_str()));
158            }
159
160            if matches!(entity.entity_type.as_str(), "class" | "struct" | "interface" | "class_type") {
161                class_entity_names.insert(entity.name.as_str());
162            }
163
164            id_to_name.insert(entity.id.as_str(), entity.name.as_str());
165
166            scope_entity_ranges.entry(entity.file_path.clone()).or_default()
167                .push((entity.start_line, entity.end_line, entity.id.clone()));
168        }
169
170        // Pass B: Build enclosing_class, class_members, and scope_class_members
171        // (depends on id_to_name, class_entity_names, and entity_map from Pass A)
172        let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
173        let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
174        let mut scope_class_members: HashMap<String, Vec<(String, String)>> = HashMap::new();
175
176        for entity in &all_entities {
177            if let Some(ref pid) = entity.parent_id {
178                if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
179                    if class_entity_names.contains(parent_name) {
180                        enclosing_class.insert(entity.id.as_str(), parent_name);
181                        class_members
182                            .entry(parent_name)
183                            .or_default()
184                            .push((entity.name.as_str(), entity.id.as_str()));
185                    }
186                }
187                // scope_class_members for scope resolver (checks entity_type of parent)
188                if let Some(parent) = entity_map.get(pid.as_str()) {
189                    if matches!(parent.entity_type.as_str(), "class" | "struct" | "interface" | "impl") {
190                        scope_class_members.entry(parent.name.clone()).or_default()
191                            .push((entity.name.clone(), entity.id.clone()));
192                    }
193                }
194            }
195            // Go receiver-based methods
196            if entity.entity_type == "method" && entity.file_path.ends_with(".go") {
197                if let Some(struct_name) = scope_resolve::extract_go_receiver_type(&entity.content) {
198                    scope_class_members.entry(struct_name).or_default()
199                        .push((entity.name.clone(), entity.id.clone()));
200                }
201            }
202        }
203
204        // Build import table: (file_path, imported_name) → target entity ID
205        // e.g. ("io_handler.py", "validate") → "core.py::function::validate"
206        let import_table = build_import_table(root, file_paths, &symbol_table, &entity_map, Some(&parsed_files));
207        // Build owned Go package index for scope resolver
208        let owned_go_pkg_index: HashMap<String, Vec<(String, String)>> = if file_paths.iter().any(|f| f.ends_with(".go")) {
209            let mut idx: HashMap<String, Vec<(String, String)>> = HashMap::new();
210            for (name, target_ids) in symbol_table.iter() {
211                for target_id in target_ids {
212                    if let Some(entity) = entity_map.get(target_id) {
213                        let file_stem = entity.file_path.rsplit('/').next().unwrap_or(&entity.file_path);
214                        let file_stem = strip_file_ext(file_stem);
215                        idx.entry(file_stem.to_string())
216                            .or_default()
217                            .push((name.clone(), target_id.clone()));
218                        if let Some(parent_start) = entity.file_path.rfind('/') {
219                            let parent_path = &entity.file_path[..parent_start];
220                            if let Some(dir_name_start) = parent_path.rfind('/') {
221                                let dir_name = &parent_path[dir_name_start + 1..];
222                                if dir_name != file_stem {
223                                    idx.entry(dir_name.to_string())
224                                        .or_default()
225                                        .push((name.clone(), target_id.clone()));
226                                }
227                            } else if !parent_path.is_empty() && parent_path != file_stem {
228                                idx.entry(parent_path.to_string())
229                                    .or_default()
230                                    .push((name.clone(), target_id.clone()));
231                            }
232                        }
233                    }
234                }
235            }
236            idx
237        } else {
238            HashMap::new()
239        };
240
241        // Wrap symbol_table in Arc to avoid expensive deep clone (621K entries)
242        let symbol_table = Arc::new(symbol_table);
243
244        let pre_built = scope_resolve::PreBuiltLookups {
245            symbol_table: Arc::clone(&symbol_table),
246            class_members: scope_class_members,
247            entity_ranges: scope_entity_ranges,
248            go_pkg_index: owned_go_pkg_index,
249        };
250
251        // Run scope-aware resolver for supported languages (reuse pre-parsed trees)
252        let has_scope_lang = file_paths.iter().any(|f| {
253            let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
254            crate::parser::plugins::code::languages::get_language_config(ext)
255                .and_then(|c| c.scope_resolve)
256                .is_some()
257        });
258        let (scope_edges, scope_resolved_entities) = if has_scope_lang {
259            let result = scope_resolve::resolve_with_scopes_full(root, file_paths, &all_entities, &entity_map, Some(parsed_files), Some(pre_built));
260            let resolved_entity_ids: HashSet<String> = result.edges.iter()
261                .map(|(from, _, _)| from.clone())
262                .collect();
263            (result.edges, resolved_entity_ids)
264        } else {
265            (vec![], HashSet::new())
266        };
267
268        // Pass 2: Extract references in parallel, then resolve against symbol table
269        // Phase 1: Dot-chain resolution (precise self.X, this.X, ClassName.X)
270        // Phase 2: Bag-of-words resolution (existing logic, skipping consumed words)
271        // Skip entities already resolved by scope resolver (Python files)
272        // Skip entities from non-code file types (JSON, SQL, etc.) that can't produce edges
273        let resolved_refs: Vec<(String, String, RefType)> = all_entities
274            .par_iter()
275            .flat_map(|entity| {
276                // Skip entities already resolved by scope resolver
277                if scope_resolved_entities.contains(&entity.id) {
278                    return vec![];
279                }
280
281                // Skip entities from file types that don't have language configs
282                // (JSON, SQL, YAML, etc. — they extract entities but never produce reference edges)
283                let ext = entity.file_path.rfind('.').map(|i| &entity.file_path[i..]).unwrap_or("");
284                if crate::parser::plugins::code::languages::get_language_config(ext).is_none() {
285                    return vec![];
286                }
287
288                let mut entity_edges = Vec::new();
289                let mut consumed_words: HashSet<String> = HashSet::new();
290
291                // Strip comments/strings once, reuse for both dot-chain and bag-of-words
292                let stripped = strip_comments_and_strings(&entity.content);
293
294                // Phase 1: Dot-chain resolution
295                let dot_chains = extract_dot_chains(&stripped);
296
297                for (receiver, member) in &dot_chains {
298                    if *receiver == "self" || *receiver == "this" {
299                        // self.B / this.B: resolve to sibling method in enclosing class
300                        if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
301                            if let Some(members) = class_members.get(class_name) {
302                                for (n, tid) in members {
303                                    if *n == *member && *tid != entity.id.as_str() {
304                                        entity_edges.push((
305                                            entity.id.clone(),
306                                            tid.to_string(),
307                                            RefType::Calls,
308                                        ));
309                                        consumed_words.insert(member.to_string());
310                                        break;
311                                    }
312                                }
313                            }
314                        }
315                    } else if class_entity_names.contains(*receiver) {
316                        // ClassName.B: resolve to class member
317                        if let Some(members) = class_members.get(*receiver) {
318                            for (n, tid) in members {
319                                if *n == *member {
320                                    entity_edges.push((
321                                        entity.id.clone(),
322                                        tid.to_string(),
323                                        RefType::Calls,
324                                    ));
325                                    consumed_words.insert(member.to_string());
326                                    consumed_words.insert(receiver.to_string());
327                                    break;
328                                }
329                            }
330                        }
331                    }
332                    // Unresolved chains fall through to bag-of-words below
333                }
334
335                // Phase 2: Bag-of-words resolution (skip words consumed by dot-chains)
336                // Reuse the stripped content to avoid stripping twice
337                let refs = extract_references_with_stripped(&entity.content, &entity.name, &stripped);
338                for ref_name in refs {
339                    if consumed_words.contains(ref_name) {
340                        continue;
341                    }
342
343                    // Skip references to names that are this class's own methods
344                    if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
345                        continue;
346                    }
347
348                    // Check import table first: if this file imports this name,
349                    // resolve to the import target instead of global symbol table
350                    let import_key = (entity.file_path.clone(), ref_name.to_string());
351                    if let Some(import_target_id) = import_table.get(&import_key) {
352                        if import_target_id != &entity.id
353                            && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
354                            && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
355                        {
356                            let ref_type = infer_ref_type(&entity.content, &ref_name);
357                            entity_edges.push((
358                                entity.id.clone(),
359                                import_target_id.clone(),
360                                ref_type,
361                            ));
362                        }
363                        continue;
364                    }
365
366                    if let Some(target_ids) = symbol_table.get(ref_name) {
367                        // Without an import, only resolve to entities in the same file.
368                        // Cross-file resolution is handled by the import table above.
369                        let target = target_ids
370                            .iter()
371                            .find(|id| {
372                                *id != &entity.id
373                                    && entity_map
374                                        .get(*id)
375                                        .map_or(false, |e| e.file_path == entity.file_path)
376                            });
377
378                        if let Some(target_id) = target {
379                            // Skip parent-child edges (class -> own method)
380                            if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
381                                || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
382                            {
383                                continue;
384                            }
385                            let ref_type = infer_ref_type(&entity.content, &ref_name);
386                            entity_edges.push((
387                                entity.id.clone(),
388                                target_id.clone(),
389                                ref_type,
390                            ));
391                        }
392                    }
393                }
394                entity_edges
395            })
396            .collect();
397
398        // Merge scope edges with bag-of-words edges, deduplicating
399        let mut combined: Vec<(String, String, RefType)> = scope_edges;
400        combined.extend(resolved_refs);
401        let mut seen_edges: HashSet<(String, String)> = HashSet::with_capacity(combined.len());
402        let mut all_resolved: Vec<(String, String, RefType)> = Vec::with_capacity(combined.len());
403        for edge in combined {
404            if seen_edges.insert((edge.0.clone(), edge.1.clone())) {
405                all_resolved.push(edge);
406            }
407        }
408
409        // Build edge indexes from resolved references
410        let mut edges: Vec<EntityRef> = Vec::with_capacity(all_resolved.len());
411        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
412        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
413
414        for (from_entity, to_entity, ref_type) in all_resolved {
415            dependents
416                .entry(to_entity.clone())
417                .or_default()
418                .push(from_entity.clone());
419            dependencies
420                .entry(from_entity.clone())
421                .or_default()
422                .push(to_entity.clone());
423            edges.push(EntityRef {
424                from_entity,
425                to_entity,
426                ref_type,
427            });
428        }
429
430        let graph = EntityGraph {
431            entities: entity_map,
432            edges,
433            dependents,
434            dependencies,
435        };
436
437        (graph, all_entities)
438    }
439
440    /// Incrementally build an entity graph: reparse only stale files, reuse cached data for clean files.
441    ///
442    /// Uses the same full 3-phase resolution (scope + dot-chain + bag-of-words) as `build()`,
443    /// but only runs it for entities in stale files + clean entities whose cached edges
444    /// pointed into stale files (they need re-resolution since their targets may have changed).
445    pub fn build_incremental(
446        root: &Path,
447        stale_files: &[String],
448        all_file_paths: &[String],
449        cached_entities: Vec<SemanticEntity>,
450        cached_edges: Vec<EntityRef>,
451        stale_file_cached_entities: Vec<SemanticEntity>,
452        registry: &ParserRegistry,
453    ) -> (Self, Vec<SemanticEntity>) {
454        // Build set of stale file paths for quick lookup
455        let stale_set: HashSet<&str> = stale_files.iter().map(|s| s.as_str()).collect();
456
457        // Parse stale files in parallel to get new entities + trees
458        let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = stale_files
459            .par_iter()
460            .filter_map(|file_path| {
461                let full_path = root.join(file_path);
462                let content = std::fs::read_to_string(&full_path).ok()?;
463                let (entities, tree) = registry.extract_entities_with_tree(file_path, &content)?;
464                let parsed = tree.map(|t| (file_path.clone(), content, t));
465                Some((entities, parsed))
466            })
467            .collect();
468
469        let mut new_entities: Vec<SemanticEntity> = Vec::new();
470        let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
471        for (entities, parsed) in per_file {
472            new_entities.extend(entities);
473            if let Some(p) = parsed {
474                parsed_files.push(p);
475            }
476        }
477
478        // Entity-level diffing: compare new stale-file entities against cached versions
479        // Build content_hash lookup from cached stale-file entities
480        let cached_hashes: HashMap<&str, &str> = stale_file_cached_entities
481            .iter()
482            .map(|e| (e.id.as_str(), e.content_hash.as_str()))
483            .collect();
484
485        // Classify new stale-file entities
486        let mut truly_changed_ids: HashSet<String> = HashSet::new();
487        let mut content_clean_ids: HashSet<String> = HashSet::new();
488        for entity in &new_entities {
489            match cached_hashes.get(entity.id.as_str()) {
490                Some(old_hash) if *old_hash == entity.content_hash.as_str() => {
491                    content_clean_ids.insert(entity.id.clone());
492                }
493                _ => {
494                    // Hash differs or entity is new
495                    truly_changed_ids.insert(entity.id.clone());
496                }
497            }
498        }
499
500        // Detect deleted entities: in cached stale but not in new
501        let new_entity_ids: HashSet<&str> = new_entities.iter().map(|e| e.id.as_str()).collect();
502        let deleted_ids: HashSet<&str> = stale_file_cached_entities
503            .iter()
504            .filter(|e| !new_entity_ids.contains(e.id.as_str()))
505            .map(|e| e.id.as_str())
506            .collect();
507
508        // Merge: cached (clean) entities + new (stale) entities
509        let all_entities: Vec<SemanticEntity> = cached_entities
510            .into_iter()
511            .chain(new_entities.into_iter())
512            .collect();
513
514        // Find affected clean entities: only care about edges pointing to truly_changed/deleted
515        let mut affected_clean_ids: HashSet<String> = HashSet::new();
516        for edge in &cached_edges {
517            let to_truly_changed = truly_changed_ids.contains(&edge.to_entity)
518                || deleted_ids.contains(edge.to_entity.as_str());
519            if to_truly_changed && !stale_set.contains(
520                all_entities.iter()
521                    .find(|e| e.id == edge.from_entity)
522                    .map(|e| e.file_path.as_str())
523                    .unwrap_or("")
524            ) {
525                affected_clean_ids.insert(edge.from_entity.clone());
526            }
527        }
528
529        // Collect all stale entity IDs (for edge filtering)
530        let stale_entity_ids: HashSet<&str> = all_entities
531            .iter()
532            .filter(|e| stale_set.contains(e.file_path.as_str()))
533            .map(|e| e.id.as_str())
534            .collect();
535
536        // Keep edges where:
537        // - Both endpoints are clean files AND from_entity is not affected, OR
538        // - From a content_clean stale entity whose targets are also clean/content_clean
539        let kept_edges: Vec<EntityRef> = cached_edges
540            .into_iter()
541            .filter(|e| {
542                let from_stale = stale_entity_ids.contains(e.from_entity.as_str());
543                let to_stale = stale_entity_ids.contains(e.to_entity.as_str());
544
545                if !from_stale && !to_stale && !affected_clean_ids.contains(&e.from_entity) {
546                    // Both clean, from not affected
547                    return true;
548                }
549                if content_clean_ids.contains(&e.from_entity)
550                    && !truly_changed_ids.contains(&e.to_entity)
551                    && !deleted_ids.contains(e.to_entity.as_str())
552                    && !affected_clean_ids.contains(&e.from_entity)
553                {
554                    // From content_clean stale entity, target not truly changed
555                    return true;
556                }
557                false
558            })
559            .collect();
560
561        // Set of entity IDs that need resolution: truly_changed + affected clean
562        // (content_clean stale entities keep their cached edges)
563        let needs_resolution: HashSet<&str> = all_entities
564            .iter()
565            .filter(|e| {
566                truly_changed_ids.contains(&e.id)
567                    || affected_clean_ids.contains(&e.id)
568            })
569            .map(|e| e.id.as_str())
570            .collect();
571
572        // Now run the same resolution logic as build() but only for entities in needs_resolution.
573        // We still need the full context (symbol table, import table, etc.) from ALL entities.
574
575        // Build symbol table from all entities
576        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
577        let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
578
579        for entity in &all_entities {
580            symbol_table
581                .entry(entity.name.clone())
582                .or_default()
583                .push(entity.id.clone());
584            entity_map.insert(
585                entity.id.clone(),
586                EntityInfo {
587                    id: entity.id.clone(),
588                    name: entity.name.clone(),
589                    entity_type: entity.entity_type.clone(),
590                    file_path: entity.file_path.clone(),
591                    parent_id: entity.parent_id.clone(),
592                    start_line: entity.start_line,
593                    end_line: entity.end_line,
594                },
595            );
596        }
597
598        // Build parent-child set
599        let parent_child_pairs: HashSet<(&str, &str)> = all_entities
600            .iter()
601            .filter_map(|e| {
602                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
603            })
604            .collect();
605
606        let class_child_names: HashSet<(&str, &str)> = all_entities
607            .iter()
608            .filter_map(|e| {
609                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
610            })
611            .collect();
612
613        let class_entity_names: HashSet<&str> = all_entities
614            .iter()
615            .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
616            .map(|e| e.name.as_str())
617            .collect();
618
619        let id_to_name: HashMap<&str, &str> = all_entities
620            .iter()
621            .map(|e| (e.id.as_str(), e.name.as_str()))
622            .collect();
623
624        let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
625        let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
626
627        for entity in &all_entities {
628            if let Some(ref pid) = entity.parent_id {
629                if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
630                    if class_entity_names.contains(parent_name) {
631                        enclosing_class.insert(entity.id.as_str(), parent_name);
632                        class_members
633                            .entry(parent_name)
634                            .or_default()
635                            .push((entity.name.as_str(), entity.id.as_str()));
636                    }
637                }
638            }
639        }
640
641        // Build import table from ALL files (imports may reference stale entities)
642        let import_table = build_import_table(root, all_file_paths, &symbol_table, &entity_map, Some(&parsed_files));
643
644        // Run scope-aware resolver only on files that need resolution
645        let resolve_file_paths: Vec<String> = all_file_paths
646            .iter()
647            .filter(|f| {
648                // Include file if any entity in needs_resolution belongs to it
649                stale_set.contains(f.as_str()) || all_entities.iter().any(|e| {
650                    e.file_path == **f && affected_clean_ids.contains(&e.id)
651                })
652            })
653            .cloned()
654            .collect();
655
656        let has_scope_lang = resolve_file_paths.iter().any(|f| {
657            let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
658            crate::parser::plugins::code::languages::get_language_config(ext)
659                .and_then(|c| c.scope_resolve)
660                .is_some()
661        });
662        let (scope_edges, scope_resolved_entities) = if has_scope_lang {
663            // Pass pre-parsed stale-file trees; scope_resolve reads affected clean files from disk
664            let resolve_set: HashSet<&str> = resolve_file_paths.iter().map(|s| s.as_str()).collect();
665            let relevant_parsed: Vec<(String, String, tree_sitter::Tree)> = parsed_files
666                .into_iter()
667                .filter(|(fp, _, _)| resolve_set.contains(fp.as_str()))
668                .collect();
669            let pre = if relevant_parsed.is_empty() { None } else { Some(relevant_parsed) };
670            let result = scope_resolve::resolve_with_scopes_full(root, &resolve_file_paths, &all_entities, &entity_map, pre, None);
671            let resolved_entity_ids: HashSet<String> = result.edges.iter()
672                .map(|(from, _, _)| from.clone())
673                .collect();
674            (result.edges, resolved_entity_ids)
675        } else {
676            (vec![], HashSet::new())
677        };
678
679        // Resolve references only for entities in needs_resolution
680        let resolved_refs: Vec<(String, String, RefType)> = all_entities
681            .par_iter()
682            .filter(|e| needs_resolution.contains(e.id.as_str()))
683            .flat_map(|entity| {
684                if scope_resolved_entities.contains(&entity.id) {
685                    return vec![];
686                }
687
688                // Skip entities from non-code file types (JSON, SQL, etc.)
689                let ext = entity.file_path.rfind('.').map(|i| &entity.file_path[i..]).unwrap_or("");
690                if crate::parser::plugins::code::languages::get_language_config(ext).is_none() {
691                    return vec![];
692                }
693
694                let mut entity_edges = Vec::new();
695                let mut consumed_words: HashSet<String> = HashSet::new();
696
697                // Strip comments/strings once, reuse for both dot-chain and bag-of-words
698                let stripped = strip_comments_and_strings(&entity.content);
699
700                // Phase 1: Dot-chain resolution
701                let dot_chains = extract_dot_chains(&stripped);
702
703                for (receiver, member) in &dot_chains {
704                    if *receiver == "self" || *receiver == "this" {
705                        if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
706                            if let Some(members) = class_members.get(class_name) {
707                                for (n, tid) in members {
708                                    if *n == *member && *tid != entity.id.as_str() {
709                                        entity_edges.push((
710                                            entity.id.clone(),
711                                            tid.to_string(),
712                                            RefType::Calls,
713                                        ));
714                                        consumed_words.insert(member.to_string());
715                                        break;
716                                    }
717                                }
718                            }
719                        }
720                    } else if class_entity_names.contains(*receiver) {
721                        if let Some(members) = class_members.get(*receiver) {
722                            for (n, tid) in members {
723                                if *n == *member {
724                                    entity_edges.push((
725                                        entity.id.clone(),
726                                        tid.to_string(),
727                                        RefType::Calls,
728                                    ));
729                                    consumed_words.insert(member.to_string());
730                                    consumed_words.insert(receiver.to_string());
731                                    break;
732                                }
733                            }
734                        }
735                    }
736                }
737
738                // Phase 2: Bag-of-words resolution (reuse stripped content)
739                let refs = extract_references_with_stripped(&entity.content, &entity.name, &stripped);
740                for ref_name in refs {
741                    if consumed_words.contains(ref_name) {
742                        continue;
743                    }
744                    if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
745                        continue;
746                    }
747
748                    let import_key = (entity.file_path.clone(), ref_name.to_string());
749                    if let Some(import_target_id) = import_table.get(&import_key) {
750                        if import_target_id != &entity.id
751                            && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
752                            && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
753                        {
754                            let ref_type = infer_ref_type(&entity.content, &ref_name);
755                            entity_edges.push((
756                                entity.id.clone(),
757                                import_target_id.clone(),
758                                ref_type,
759                            ));
760                        }
761                        continue;
762                    }
763
764                    if let Some(target_ids) = symbol_table.get(ref_name) {
765                        let target = target_ids
766                            .iter()
767                            .find(|id| {
768                                *id != &entity.id
769                                    && entity_map
770                                        .get(*id)
771                                        .map_or(false, |e| e.file_path == entity.file_path)
772                            });
773
774                        if let Some(target_id) = target {
775                            if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
776                                || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
777                            {
778                                continue;
779                            }
780                            let ref_type = infer_ref_type(&entity.content, &ref_name);
781                            entity_edges.push((
782                                entity.id.clone(),
783                                target_id.clone(),
784                                ref_type,
785                            ));
786                        }
787                    }
788                }
789                entity_edges
790            })
791            .collect();
792
793        // Merge scope edges + bag-of-words edges + kept cached edges
794        let mut combined: Vec<(String, String, RefType)> = scope_edges;
795        combined.extend(resolved_refs);
796        let mut seen_edges: HashSet<(String, String)> = HashSet::with_capacity(combined.len());
797        let mut all_resolved: Vec<(String, String, RefType)> = Vec::with_capacity(combined.len());
798        for edge in combined {
799            if seen_edges.insert((edge.0.clone(), edge.1.clone())) {
800                all_resolved.push(edge);
801            }
802        }
803
804        // Build final edge list: kept edges + newly resolved edges
805        let mut edges: Vec<EntityRef> = Vec::with_capacity(kept_edges.len() + all_resolved.len());
806        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
807        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
808
809        // Track all edge pairs for dedup
810        let mut all_edge_pairs: HashSet<(String, String)> = HashSet::new();
811
812        // Add kept cached edges
813        for edge in kept_edges {
814            all_edge_pairs.insert((edge.from_entity.clone(), edge.to_entity.clone()));
815            dependents
816                .entry(edge.to_entity.clone())
817                .or_default()
818                .push(edge.from_entity.clone());
819            dependencies
820                .entry(edge.from_entity.clone())
821                .or_default()
822                .push(edge.to_entity.clone());
823            edges.push(edge);
824        }
825
826        // Add newly resolved edges, dedup against kept edges
827        for (from_entity, to_entity, ref_type) in all_resolved {
828            if !all_edge_pairs.insert((from_entity.clone(), to_entity.clone())) {
829                continue;
830            }
831            dependents
832                .entry(to_entity.clone())
833                .or_default()
834                .push(from_entity.clone());
835            dependencies
836                .entry(from_entity.clone())
837                .or_default()
838                .push(to_entity.clone());
839            edges.push(EntityRef {
840                from_entity,
841                to_entity,
842                ref_type,
843            });
844        }
845
846        let graph = EntityGraph {
847            entities: entity_map,
848            edges,
849            dependents,
850            dependencies,
851        };
852
853        (graph, all_entities)
854    }
855
856    /// Get entities that depend on the given entity (reverse deps).
857    pub fn get_dependents(&self, entity_id: &str) -> Vec<&EntityInfo> {
858        self.dependents
859            .get(entity_id)
860            .map(|ids| {
861                ids.iter()
862                    .filter_map(|id| self.entities.get(id))
863                    .collect()
864            })
865            .unwrap_or_default()
866    }
867
868    /// Get entities that the given entity depends on (forward deps).
869    pub fn get_dependencies(&self, entity_id: &str) -> Vec<&EntityInfo> {
870        self.dependencies
871            .get(entity_id)
872            .map(|ids| {
873                ids.iter()
874                    .filter_map(|id| self.entities.get(id))
875                    .collect()
876            })
877            .unwrap_or_default()
878    }
879
880    /// Impact analysis: if the given entity changes, what else might be affected?
881    /// Returns all transitive dependents (breadth-first), capped at 10k.
882    pub fn impact_analysis(&self, entity_id: &str) -> Vec<&EntityInfo> {
883        self.impact_analysis_capped(entity_id, 10_000)
884    }
885
886    /// Depth-limited impact analysis. Returns transitive dependents with their BFS depth.
887    /// `max_depth == 0` means unlimited. Default depth of 2 covers direct + one transitive level.
888    pub fn impact_analysis_bounded(&self, entity_id: &str, max_depth: usize) -> Vec<(&EntityInfo, usize)> {
889        let mut visited: HashSet<&str> = HashSet::new();
890        let mut queue: std::collections::VecDeque<(&str, usize)> = std::collections::VecDeque::new();
891        let mut result = Vec::new();
892
893        let start_key = match self.entities.get_key_value(entity_id) {
894            Some((k, _)) => k.as_str(),
895            None => return result,
896        };
897
898        queue.push_back((start_key, 0));
899        visited.insert(start_key);
900
901        while let Some((current, depth)) = queue.pop_front() {
902            if let Some(deps) = self.dependents.get(current) {
903                let next_depth = depth + 1;
904                if max_depth > 0 && next_depth > max_depth {
905                    continue;
906                }
907                for dep in deps {
908                    if visited.insert(dep.as_str()) {
909                        if let Some(info) = self.entities.get(dep.as_str()) {
910                            result.push((info, next_depth));
911                        }
912                        queue.push_back((dep.as_str(), next_depth));
913                    }
914                }
915            }
916        }
917
918        result
919    }
920
921    /// Impact analysis with a cap on maximum nodes visited.
922    /// Returns transitive dependents up to the cap. Uses borrowed strings.
923    pub fn impact_analysis_capped(&self, entity_id: &str, max_visited: usize) -> Vec<&EntityInfo> {
924        let mut visited: HashSet<&str> = HashSet::new();
925        let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
926        let mut result = Vec::new();
927
928        let start_key = match self.entities.get_key_value(entity_id) {
929            Some((k, _)) => k.as_str(),
930            None => return result,
931        };
932
933        queue.push_back(start_key);
934        visited.insert(start_key);
935
936        while let Some(current) = queue.pop_front() {
937            if result.len() >= max_visited {
938                break;
939            }
940            if let Some(deps) = self.dependents.get(current) {
941                for dep in deps {
942                    if visited.insert(dep.as_str()) {
943                        if let Some(info) = self.entities.get(dep.as_str()) {
944                            result.push(info);
945                        }
946                        queue.push_back(dep.as_str());
947                        if result.len() >= max_visited {
948                            break;
949                        }
950                    }
951                }
952            }
953        }
954
955        result
956    }
957
958    /// Count transitive dependents without collecting them (faster for large graphs).
959    /// Uses borrowed strings to avoid allocation overhead.
960    pub fn impact_count(&self, entity_id: &str, max_count: usize) -> usize {
961        let mut visited: HashSet<&str> = HashSet::new();
962        let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
963        let mut count = 0;
964
965        // We need entity_id to live long enough; look it up in our entities map
966        let start_key = match self.entities.get_key_value(entity_id) {
967            Some((k, _)) => k.as_str(),
968            None => return 0,
969        };
970
971        queue.push_back(start_key);
972        visited.insert(start_key);
973
974        while let Some(current) = queue.pop_front() {
975            if count >= max_count {
976                break;
977            }
978            if let Some(deps) = self.dependents.get(current) {
979                for dep in deps {
980                    if visited.insert(dep.as_str()) {
981                        count += 1;
982                        queue.push_back(dep.as_str());
983                        if count >= max_count {
984                            break;
985                        }
986                    }
987                }
988            }
989        }
990
991        count
992    }
993
994    /// Filter entities to those that look like tests.
995    /// Uses name heuristics, file path patterns, and content patterns.
996    pub fn filter_test_entities(&self, entities: &[crate::model::entity::SemanticEntity]) -> HashSet<String> {
997        let mut test_ids = HashSet::new();
998        for entity in entities {
999            if is_test_entity(entity) {
1000                test_ids.insert(entity.id.clone());
1001            }
1002        }
1003        test_ids
1004    }
1005
1006    /// Impact analysis filtered to test entities only.
1007    /// Returns transitive dependents that are test functions/methods.
1008    pub fn test_impact(
1009        &self,
1010        entity_id: &str,
1011        all_entities: &[crate::model::entity::SemanticEntity],
1012    ) -> Vec<&EntityInfo> {
1013        let test_ids = self.filter_test_entities(all_entities);
1014        let impact = self.impact_analysis(entity_id);
1015        impact
1016            .into_iter()
1017            .filter(|info| test_ids.contains(&info.id))
1018            .collect()
1019    }
1020
1021    /// Incrementally update the graph from a set of changed files.
1022    ///
1023    /// Instead of rebuilding the entire graph, this only re-extracts entities
1024    /// from changed files and re-resolves their references. This is faster
1025    /// than a full rebuild when only a few files changed.
1026    ///
1027    /// For each changed file:
1028    /// - Deleted: remove all entities from that file, prune edges
1029    /// - Added/Modified: remove old entities, extract new ones, rebuild references
1030    /// - Renamed: update file paths in entity info
1031    pub fn update_from_changes(
1032        &mut self,
1033        changed_files: &[FileChange],
1034        root: &Path,
1035        registry: &ParserRegistry,
1036    ) {
1037        let mut affected_files: HashSet<String> = HashSet::new();
1038        let mut new_entities: Vec<SemanticEntity> = Vec::new();
1039
1040        for change in changed_files {
1041            affected_files.insert(change.file_path.clone());
1042            if let Some(ref old_path) = change.old_file_path {
1043                affected_files.insert(old_path.clone());
1044            }
1045
1046            match change.status {
1047                FileStatus::Deleted => {
1048                    self.remove_entities_for_file(&change.file_path);
1049                }
1050                FileStatus::Renamed => {
1051                    // Update file paths for renamed files
1052                    if let Some(ref old_path) = change.old_file_path {
1053                        self.remove_entities_for_file(old_path);
1054                    }
1055                    // Extract entities from the new file
1056                    if let Some(entities) = self.extract_file_entities(
1057                        &change.file_path,
1058                        change.after_content.as_deref(),
1059                        root,
1060                        registry,
1061                    ) {
1062                        new_entities.extend(entities);
1063                    }
1064                }
1065                FileStatus::Added | FileStatus::Modified => {
1066                    // Remove old entities for this file
1067                    self.remove_entities_for_file(&change.file_path);
1068                    // Extract new entities
1069                    if let Some(entities) = self.extract_file_entities(
1070                        &change.file_path,
1071                        change.after_content.as_deref(),
1072                        root,
1073                        registry,
1074                    ) {
1075                        new_entities.extend(entities);
1076                    }
1077                }
1078            }
1079        }
1080
1081        // Add new entities to the entity map
1082        for entity in &new_entities {
1083            self.entities.insert(
1084                entity.id.clone(),
1085                EntityInfo {
1086                    id: entity.id.clone(),
1087                    name: entity.name.clone(),
1088                    entity_type: entity.entity_type.clone(),
1089                    file_path: entity.file_path.clone(),
1090                    parent_id: entity.parent_id.clone(),
1091                    start_line: entity.start_line,
1092                    end_line: entity.end_line,
1093                },
1094            );
1095        }
1096
1097        // Rebuild the global symbol table from all current entities
1098        let symbol_table = self.build_symbol_table();
1099
1100        // Re-resolve references for new entities
1101        for entity in &new_entities {
1102            self.resolve_entity_references(entity, &symbol_table);
1103        }
1104
1105        // Also re-resolve references for entities in OTHER files that might
1106        // reference entities in changed files (their targets may have changed)
1107        let changed_entity_names: HashSet<String> = new_entities
1108            .iter()
1109            .map(|e| e.name.clone())
1110            .collect();
1111
1112        // Find entities in unchanged files that reference any changed entity name
1113        let entities_to_recheck: Vec<String> = self
1114            .entities
1115            .values()
1116            .filter(|e| !affected_files.contains(&e.file_path))
1117            .filter(|e| {
1118                self.dependencies
1119                    .get(&e.id)
1120                    .map_or(false, |deps| {
1121                        deps.iter().any(|dep_id| {
1122                            self.entities
1123                                .get(dep_id)
1124                                .map_or(false, |dep| changed_entity_names.contains(&dep.name))
1125                        })
1126                    })
1127            })
1128            .map(|e| e.id.clone())
1129            .collect();
1130
1131        // We don't have the full SemanticEntity for unchanged files, so we skip
1132        // deep re-resolution here. The forward/reverse indexes are already updated
1133        // by remove_entities_for_file and resolve_entity_references.
1134        // For entities that had dangling references (their target was deleted),
1135        // the edges were already pruned.
1136        let _ = entities_to_recheck; // acknowledge but don't act on for now
1137    }
1138
1139    /// Extract entities from a file, using provided content or reading from disk.
1140    fn extract_file_entities(
1141        &self,
1142        file_path: &str,
1143        content: Option<&str>,
1144        root: &Path,
1145        registry: &ParserRegistry,
1146    ) -> Option<Vec<SemanticEntity>> {
1147        let content = if let Some(c) = content {
1148            c.to_string()
1149        } else {
1150            let full_path = root.join(file_path);
1151            std::fs::read_to_string(&full_path).ok()?
1152        };
1153
1154        Some(registry.extract_entities(file_path, &content))
1155    }
1156
1157    /// Remove all entities belonging to a specific file and prune their edges.
1158    fn remove_entities_for_file(&mut self, file_path: &str) {
1159        // Collect entity IDs to remove
1160        let ids_to_remove: Vec<String> = self
1161            .entities
1162            .values()
1163            .filter(|e| e.file_path == file_path)
1164            .map(|e| e.id.clone())
1165            .collect();
1166
1167        let id_set: HashSet<&str> = ids_to_remove.iter().map(|s| s.as_str()).collect();
1168
1169        // Remove from entity map
1170        for id in &ids_to_remove {
1171            self.entities.remove(id);
1172        }
1173
1174        // Remove edges involving these entities
1175        self.edges
1176            .retain(|e| !id_set.contains(e.from_entity.as_str()) && !id_set.contains(e.to_entity.as_str()));
1177
1178        // Clean up dependency/dependent indexes
1179        for id in &ids_to_remove {
1180            // Remove forward deps
1181            if let Some(deps) = self.dependencies.remove(id) {
1182                // Also remove from reverse index
1183                for dep in &deps {
1184                    if let Some(dependents) = self.dependents.get_mut(dep) {
1185                        dependents.retain(|d| d != id);
1186                    }
1187                }
1188            }
1189            // Remove reverse deps
1190            if let Some(deps) = self.dependents.remove(id) {
1191                // Also remove from forward index
1192                for dep in &deps {
1193                    if let Some(dependencies) = self.dependencies.get_mut(dep) {
1194                        dependencies.retain(|d| d != id);
1195                    }
1196                }
1197            }
1198        }
1199    }
1200
1201    /// Build a symbol table from all current entities.
1202    fn build_symbol_table(&self) -> HashMap<String, Vec<String>> {
1203        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::new();
1204        for entity in self.entities.values() {
1205            symbol_table
1206                .entry(entity.name.clone())
1207                .or_default()
1208                .push(entity.id.clone());
1209        }
1210        symbol_table
1211    }
1212
1213    /// Resolve references for a single entity against the symbol table.
1214    fn resolve_entity_references(
1215        &mut self,
1216        entity: &SemanticEntity,
1217        symbol_table: &HashMap<String, Vec<String>>,
1218    ) {
1219        let refs = extract_references_from_content(&entity.content, &entity.name);
1220
1221        for ref_name in refs {
1222            if let Some(target_ids) = symbol_table.get(ref_name) {
1223                let target = target_ids
1224                    .iter()
1225                    .find(|id| {
1226                        *id != &entity.id
1227                            && self
1228                                .entities
1229                                .get(*id)
1230                                .map_or(false, |e| e.file_path == entity.file_path)
1231                    })
1232                    .or_else(|| target_ids.iter().find(|id| *id != &entity.id));
1233
1234                if let Some(target_id) = target {
1235                    let ref_type = infer_ref_type(&entity.content, &ref_name);
1236                    self.edges.push(EntityRef {
1237                        from_entity: entity.id.clone(),
1238                        to_entity: target_id.clone(),
1239                        ref_type,
1240                    });
1241                    self.dependents
1242                        .entry(target_id.clone())
1243                        .or_default()
1244                        .push(entity.id.clone());
1245                    self.dependencies
1246                        .entry(entity.id.clone())
1247                        .or_default()
1248                        .push(target_id.clone());
1249                }
1250            }
1251        }
1252    }
1253}
1254
1255/// Check if an entity looks like a test based on name, file path, and content patterns.
1256fn is_test_entity(entity: &crate::model::entity::SemanticEntity) -> bool {
1257    let name = &entity.name;
1258    let path = &entity.file_path;
1259    let content = &entity.content;
1260
1261    // Name patterns
1262    if name.starts_with("test_") || name.starts_with("Test") || name.ends_with("_test") || name.ends_with("Test") {
1263        return true;
1264    }
1265    if name.starts_with("it_") || name.starts_with("describe_") || name.starts_with("spec_") {
1266        return true;
1267    }
1268
1269    // File path patterns
1270    let path_lower = path.to_lowercase();
1271    let in_test_file = path_lower.contains("/test/")
1272        || path_lower.contains("/tests/")
1273        || path_lower.contains("/spec/")
1274        || path_lower.contains("_test.")
1275        || path_lower.contains(".test.")
1276        || path_lower.contains("_spec.")
1277        || path_lower.contains(".spec.");
1278
1279    // Content patterns (test annotations/decorators)
1280    let has_test_marker = content.contains("#[test]")
1281        || content.contains("#[cfg(test)]")
1282        || content.contains("@Test")
1283        || content.contains("@pytest")
1284        || content.contains("@test")
1285        || content.contains("describe(")
1286        || content.contains("it(")
1287        || content.contains("test(");
1288
1289    in_test_file && has_test_marker
1290}
1291
1292/// Build import table: maps (file_path, imported_name) → target entity ID.
1293///
1294/// Parses `from X import Y` / `import X` / `use X` style statements from entity content
1295/// and resolves Y to the entity it refers to in the symbol table.
1296fn build_import_table(
1297    root: &Path,
1298    file_paths: &[String],
1299    symbol_table: &HashMap<String, Vec<String>>,
1300    entity_map: &HashMap<String, EntityInfo>,
1301    pre_parsed_content: Option<&[(String, String, tree_sitter::Tree)]>,
1302) -> HashMap<(String, String), String> {
1303    // Build a content lookup from pre-parsed files to avoid re-reading from disk
1304    let content_map: HashMap<&str, &str> = pre_parsed_content
1305        .map(|files| {
1306            files.iter().map(|(fp, content, _)| (fp.as_str(), content.as_str())).collect()
1307        })
1308        .unwrap_or_default();
1309
1310    // Go imports are handled entirely by the scope resolver (which uses an indexed approach).
1311    // We no longer need a go_pkg_index here since Go files are skipped below.
1312
1313    // Process files in parallel, each producing local import entries
1314    let per_file_imports: Vec<Vec<((String, String), String)>> = file_paths
1315        .par_iter()
1316        .filter_map(|file_path| {
1317            // Go imports are handled entirely by the scope resolver — skip here
1318            if file_path.ends_with(".go") {
1319                return None;
1320            }
1321
1322            // Use pre-parsed content if available, otherwise read from disk
1323            let owned_content: Option<String>;
1324            let content: &str = if let Some(c) = content_map.get(file_path.as_str()) {
1325                c
1326            } else {
1327                let full_path = root.join(file_path);
1328                owned_content = std::fs::read_to_string(&full_path).ok();
1329                match owned_content.as_deref() {
1330                    Some(c) => c,
1331                    None => return None,
1332                }
1333            };
1334
1335            let mut local_imports: Vec<((String, String), String)> = Vec::new();
1336
1337            // Join multi-line imports into single logical lines
1338            // e.g. "from .cookies import (\n    foo,\n    bar,\n)" -> "from .cookies import foo, bar"
1339            let mut logical_lines: Vec<String> = Vec::new();
1340            let mut current_line = String::new();
1341            let mut in_parens = false;
1342
1343            for line in content.lines() {
1344                let trimmed = line.trim();
1345                if in_parens {
1346                    // Strip parentheses and comments
1347                    let clean = trimmed.trim_end_matches(|c: char| c == ')' || c == ',');
1348                    let clean = clean.split('#').next().unwrap_or(clean).trim();
1349                    if !clean.is_empty() && clean != "(" {
1350                        current_line.push_str(", ");
1351                        current_line.push_str(clean);
1352                    }
1353                    if trimmed.contains(')') {
1354                        in_parens = false;
1355                        logical_lines.push(std::mem::take(&mut current_line));
1356                    }
1357                } else if trimmed.starts_with("from ") && trimmed.contains(" import ") {
1358                    if trimmed.contains('(') && !trimmed.contains(')') {
1359                        // Multi-line import starts
1360                        in_parens = true;
1361                        // Take everything before the paren
1362                        let before_paren = trimmed.split('(').next().unwrap_or(trimmed);
1363                        current_line = before_paren.trim().to_string();
1364                        // Also grab anything after the paren on this line
1365                        if let Some(after) = trimmed.split('(').nth(1) {
1366                            let after = after.trim().trim_end_matches(')').trim();
1367                            if !after.is_empty() {
1368                                current_line.push(' ');
1369                                current_line.push_str(after);
1370                            }
1371                        }
1372                    } else {
1373                        logical_lines.push(trimmed.to_string());
1374                    }
1375                }
1376            }
1377
1378            for logical_line in &logical_lines {
1379                if let Some(rest) = logical_line.strip_prefix("from ") {
1380                    // Find " import " or " import," (multi-line imports join with comma)
1381                    let import_match = rest.find(" import ")
1382                        .map(|pos| (pos, 8))
1383                        .or_else(|| rest.find(" import,").map(|pos| (pos, 8)));
1384                    if let Some((import_pos, skip)) = import_match {
1385                        let module_path = &rest[..import_pos];
1386                        let names_str = &rest[import_pos + skip..];
1387
1388                        let source_module = module_path
1389                            .trim_start_matches('.')
1390                            .rsplit('.')
1391                            .next()
1392                            .unwrap_or(module_path.trim_start_matches('.'));
1393
1394                        for name_part in names_str.split(',') {
1395                            let name_part = name_part.trim();
1396                            let imported_name = name_part.split_whitespace().next().unwrap_or(name_part);
1397                            // Strip trailing parens/punctuation
1398                            let imported_name = imported_name.trim_matches(|c: char| c == '(' || c == ')' || c == ',');
1399                            if imported_name.is_empty() {
1400                                continue;
1401                            }
1402
1403                            if let Some(target_ids) = symbol_table.get(imported_name) {
1404                                let target = target_ids.iter().find(|id| {
1405                                    entity_map.get(*id).map_or(false, |e| {
1406                                        let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1407                                        let stem = stem.strip_suffix(".py")
1408                                            .or_else(|| stem.strip_suffix(".ts"))
1409                                            .or_else(|| stem.strip_suffix(".js"))
1410                                            .or_else(|| stem.strip_suffix(".rs"))
1411                                            .unwrap_or(stem);
1412                                        stem == source_module
1413                                    })
1414                                });
1415                                if let Some(target_id) = target {
1416                                    local_imports.push((
1417                                        (file_path.clone(), imported_name.to_string()),
1418                                        target_id.clone(),
1419                                    ));
1420                                }
1421                            }
1422                        }
1423                    }
1424                }
1425            }
1426
1427            // JS/TS imports: import { foo, bar as baz } from './module'
1428            //                import Foo from './module'
1429            let is_js_ts = file_path.ends_with(".js") || file_path.ends_with(".ts")
1430                || file_path.ends_with(".jsx") || file_path.ends_with(".tsx");
1431
1432            if is_js_ts {
1433                static JS_NAMED_RE: LazyLock<Regex> = LazyLock::new(|| {
1434                    Regex::new(r#"import\s*\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#).unwrap()
1435                });
1436                static JS_DEFAULT_RE: LazyLock<Regex> = LazyLock::new(|| {
1437                    Regex::new(r#"import\s+(?:type\s+)?([A-Za-z_]\w*)\s+from\s*['"]([^'"]+)['"]"#).unwrap()
1438                });
1439
1440                for cap in JS_NAMED_RE.captures_iter(content) {
1441                    let names_str = cap.get(1).unwrap().as_str();
1442                    let module_path = cap.get(2).unwrap().as_str();
1443                    let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1444                    let source_module = strip_js_ext(source_module);
1445
1446                    for name_part in names_str.split(',') {
1447                        let name_part = name_part.trim();
1448                        if name_part.is_empty() { continue; }
1449
1450                        // Handle "foo as bar" aliases and "type foo" prefixes
1451                        let (original_name, local_name) = if let Some(pos) = name_part.find(" as ") {
1452                            let orig = name_part[..pos].trim();
1453                            let local = name_part[pos + 4..].trim();
1454                            let orig = orig.strip_prefix("type ").unwrap_or(orig);
1455                            (orig, local)
1456                        } else {
1457                            let name = name_part.strip_prefix("type ").unwrap_or(name_part);
1458                            (name, name)
1459                        };
1460
1461                        if original_name.is_empty() || local_name.is_empty() { continue; }
1462
1463                        if let Some(target_ids) = symbol_table.get(original_name) {
1464                            let target = target_ids.iter().find(|id| {
1465                                entity_map.get(*id).map_or(false, |e| {
1466                                    let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1467                                    let stem = strip_file_ext(stem);
1468                                    stem == source_module
1469                                })
1470                            });
1471                            if let Some(target_id) = target {
1472                                local_imports.push((
1473                                    (file_path.clone(), local_name.to_string()),
1474                                    target_id.clone(),
1475                                ));
1476                            }
1477                        }
1478                    }
1479                }
1480
1481                for cap in JS_DEFAULT_RE.captures_iter(content) {
1482                    let local_name = cap.get(1).unwrap().as_str();
1483                    let module_path = cap.get(2).unwrap().as_str();
1484                    let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1485                    let source_module = strip_js_ext(source_module);
1486
1487                    if let Some(target_ids) = symbol_table.get(local_name) {
1488                        let target = target_ids.iter().find(|id| {
1489                            entity_map.get(*id).map_or(false, |e| {
1490                                let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1491                                let stem = strip_file_ext(stem);
1492                                stem == source_module
1493                            })
1494                        });
1495                        if let Some(target_id) = target {
1496                            local_imports.push((
1497                                (file_path.clone(), local_name.to_string()),
1498                                target_id.clone(),
1499                            ));
1500                        }
1501                    }
1502                }
1503            }
1504
1505            // Rust imports: use crate::module::Name; / use crate::module::{A, B};
1506            // Also: use super::module::Name; / use self::module::Name;
1507            let is_rust = file_path.ends_with(".rs");
1508            if is_rust {
1509                static RUST_USE_SIMPLE_RE: LazyLock<Regex> = LazyLock::new(|| {
1510                    // use crate::config::Config;
1511                    // use super::types::Entity;
1512                    // use config::Config;  (bare module path in binary crates)
1513                    Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*;").unwrap()
1514                });
1515                static RUST_USE_GROUP_RE: LazyLock<Regex> = LazyLock::new(|| {
1516                    // use crate::types::{Entity, ParseError};
1517                    // use types::{Entity, ParseError};  (bare module path)
1518                    Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)::\{([^}]+)\}\s*;").unwrap()
1519                });
1520
1521                // Use a local import table for Rust alias resolution
1522                let mut local_import_table: HashMap<(String, String), String> = HashMap::new();
1523
1524                // Build a map: module_name -> list of file paths whose stem matches
1525                // For "use crate::config::Config", module is "config", name is "Config"
1526                for cap in RUST_USE_SIMPLE_RE.captures_iter(content) {
1527                    let full_path_str = cap.get(1).unwrap().as_str();
1528                    let parts: Vec<&str> = full_path_str.split("::").collect();
1529                    if parts.is_empty() { continue; }
1530
1531                    // Last part is the imported name, everything before is the module path
1532                    let imported_name = parts[parts.len() - 1];
1533                    // The module is the second-to-last part, or the first if only one part
1534                    let source_module = if parts.len() >= 2 {
1535                        parts[parts.len() - 2]
1536                    } else {
1537                        parts[0]
1538                    };
1539
1540                    resolve_rust_import(
1541                        file_path, imported_name, source_module,
1542                        symbol_table, entity_map, &mut local_import_table,
1543                    );
1544                }
1545
1546                for cap in RUST_USE_GROUP_RE.captures_iter(content) {
1547                    let module_path = cap.get(1).unwrap().as_str();
1548                    let names_str = cap.get(2).unwrap().as_str();
1549
1550                    // source_module is the last segment of the module path
1551                    let source_module = module_path.rsplit("::").next().unwrap_or(module_path);
1552
1553                    for name_part in names_str.split(',') {
1554                        let name_part = name_part.trim();
1555                        // Handle "Name as Alias"
1556                        let (original, local) = if let Some(pos) = name_part.find(" as ") {
1557                            (&name_part[..pos], name_part[pos + 4..].trim())
1558                        } else {
1559                            (name_part, name_part)
1560                        };
1561                        let original = original.trim();
1562                        let local = local.trim();
1563                        if original.is_empty() || local.is_empty() { continue; }
1564
1565                        resolve_rust_import(
1566                            file_path, original, source_module,
1567                            symbol_table, entity_map, &mut local_import_table,
1568                        );
1569                        // If aliased, also map the local name
1570                        if local != original {
1571                            if let Some(target) = local_import_table.get(&(file_path.clone(), original.to_string())).cloned() {
1572                                local_import_table.insert(
1573                                    (file_path.clone(), local.to_string()),
1574                                    target,
1575                                );
1576                            }
1577                        }
1578                    }
1579                }
1580
1581                // Collect all Rust imports into local_imports
1582                for (key, val) in local_import_table {
1583                    local_imports.push((key, val));
1584                }
1585            }
1586
1587            // Go imports are handled by the scope resolver (avoids O(n²) import table explosion).
1588            // Skip Go files here entirely.
1589
1590            Some(local_imports)
1591        })
1592        .collect();
1593
1594    // Merge all per-file imports into a single table
1595    let mut import_table: HashMap<(String, String), String> = HashMap::new();
1596    for local_imports in per_file_imports {
1597        for (key, val) in local_imports {
1598            import_table.insert(key, val);
1599        }
1600    }
1601
1602    import_table
1603}
1604
1605/// Resolve a Rust import: find the target entity in the symbol table
1606/// by matching the imported name against entities in files whose stem matches source_module.
1607fn resolve_rust_import(
1608    file_path: &str,
1609    imported_name: &str,
1610    source_module: &str,
1611    symbol_table: &HashMap<String, Vec<String>>,
1612    entity_map: &HashMap<String, EntityInfo>,
1613    import_table: &mut HashMap<(String, String), String>,
1614) {
1615    if let Some(target_ids) = symbol_table.get(imported_name) {
1616        let target = target_ids.iter().find(|id| {
1617            entity_map.get(*id).map_or(false, |e| {
1618                let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1619                let stem = strip_file_ext(stem);
1620                stem == source_module
1621            })
1622        });
1623        if let Some(target_id) = target {
1624            import_table.insert(
1625                (file_path.to_string(), imported_name.to_string()),
1626                target_id.clone(),
1627            );
1628        }
1629    }
1630}
1631
1632/// Strip JS/TS extensions from a module name.
1633fn strip_js_ext(s: &str) -> &str {
1634    s.strip_suffix(".js")
1635        .or_else(|| s.strip_suffix(".ts"))
1636        .or_else(|| s.strip_suffix(".jsx"))
1637        .or_else(|| s.strip_suffix(".tsx"))
1638        .unwrap_or(s)
1639}
1640
1641/// Strip common file extensions from a filename.
1642fn strip_file_ext(s: &str) -> &str {
1643    s.strip_suffix(".py")
1644        .or_else(|| s.strip_suffix(".ts"))
1645        .or_else(|| s.strip_suffix(".js"))
1646        .or_else(|| s.strip_suffix(".tsx"))
1647        .or_else(|| s.strip_suffix(".jsx"))
1648        .or_else(|| s.strip_suffix(".rs"))
1649        .unwrap_or(s)
1650}
1651
1652/// Strip comments and string literals from content to avoid false references.
1653/// Returns a new string with comments/docstrings replaced by spaces.
1654fn strip_comments_and_strings(content: &str) -> String {
1655    let bytes = content.as_bytes();
1656    let len = bytes.len();
1657    let mut result = vec![b' '; len];
1658    let mut i = 0;
1659
1660    while i < len {
1661        // Triple-quoted strings (Python docstrings)
1662        if i + 2 < len && bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1663            i += 3;
1664            while i + 2 < len {
1665                if bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1666                    i += 3;
1667                    break;
1668                }
1669                i += 1;
1670            }
1671            continue;
1672        }
1673        if i + 2 < len && bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1674            i += 3;
1675            while i + 2 < len {
1676                if bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1677                    i += 3;
1678                    break;
1679                }
1680                i += 1;
1681            }
1682            continue;
1683        }
1684        // Double-quoted strings
1685        if bytes[i] == b'"' {
1686            i += 1;
1687            while i < len {
1688                if bytes[i] == b'\\' { i += 2; continue; }
1689                if bytes[i] == b'"' { i += 1; break; }
1690                i += 1;
1691            }
1692            continue;
1693        }
1694        // Single-quoted strings
1695        if bytes[i] == b'\'' {
1696            i += 1;
1697            while i < len {
1698                if bytes[i] == b'\\' { i += 2; continue; }
1699                if bytes[i] == b'\'' { i += 1; break; }
1700                i += 1;
1701            }
1702            continue;
1703        }
1704        // Python/Ruby single-line comments
1705        if bytes[i] == b'#' {
1706            while i < len && bytes[i] != b'\n' { i += 1; }
1707            continue;
1708        }
1709        // C-style single-line comments
1710        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'/' {
1711            while i < len && bytes[i] != b'\n' { i += 1; }
1712            continue;
1713        }
1714        // C-style block comments
1715        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
1716            i += 2;
1717            while i + 1 < len {
1718                if bytes[i] == b'*' && bytes[i + 1] == b'/' { i += 2; break; }
1719                i += 1;
1720            }
1721            continue;
1722        }
1723        // Regular code: copy through
1724        result[i] = bytes[i];
1725        i += 1;
1726    }
1727
1728    String::from_utf8_lossy(&result).into_owned()
1729}
1730
1731/// Extract dot-chains (receiver.member) from content for precise resolution.
1732/// Returns unique (receiver, member) pairs found in the content.
1733fn extract_dot_chains<'a>(content: &'a str) -> Vec<(&'a str, &'a str)> {
1734    static DOT_CHAIN_RE: LazyLock<Regex> = LazyLock::new(|| {
1735        Regex::new(r"\b([A-Za-z_]\w*)\.([A-Za-z_]\w*)").unwrap()
1736    });
1737
1738    let mut chains = Vec::new();
1739    let mut seen: HashSet<(&str, &str)> = HashSet::new();
1740    for cap in DOT_CHAIN_RE.captures_iter(content) {
1741        let receiver = cap.get(1).unwrap().as_str();
1742        let member = cap.get(2).unwrap().as_str();
1743        if seen.insert((receiver, member)) {
1744            chains.push((receiver, member));
1745        }
1746    }
1747    chains
1748}
1749
1750/// Extract identifier references from entity content using simple token analysis.
1751/// Strips comments and strings first to avoid false positives from docstrings.
1752/// Returns borrowed slices from the stripped content.
1753fn extract_references_from_content<'a>(content: &'a str, own_name: &str) -> Vec<&'a str> {
1754    let stripped = strip_comments_and_strings(content);
1755    extract_references_with_stripped(content, own_name, &stripped)
1756}
1757
1758/// Extract references using a pre-stripped version of the content.
1759/// Use this when you already have the stripped content (e.g. from dot-chain extraction)
1760/// to avoid stripping comments/strings twice.
1761fn extract_references_with_stripped<'a>(content: &'a str, own_name: &str, stripped: &str) -> Vec<&'a str> {
1762    let stripped_words: HashSet<&str> = stripped
1763        .split(|c: char| !c.is_alphanumeric() && c != '_')
1764        .filter(|w| !w.is_empty())
1765        .collect();
1766
1767    let mut refs = Vec::new();
1768    let mut seen: HashSet<&str> = HashSet::new();
1769
1770    for word in content.split(|c: char| !c.is_alphanumeric() && c != '_') {
1771        if word.is_empty() || word == own_name {
1772            continue;
1773        }
1774        if is_keyword(word) || word.len() < 2 {
1775            continue;
1776        }
1777        // Skip very short lowercase identifiers (likely local vars: i, x, a, ok, id, etc.)
1778        if word.starts_with(|c: char| c.is_lowercase()) && word.len() < 3 {
1779            continue;
1780        }
1781        if !word.starts_with(|c: char| c.is_alphabetic() || c == '_') {
1782            continue;
1783        }
1784        // Skip common local variable names that create false graph edges
1785        if is_common_local_name(word) {
1786            continue;
1787        }
1788        // Skip words that only appear in comments/strings
1789        if !stripped_words.contains(word) {
1790            continue;
1791        }
1792        if seen.insert(word) {
1793            refs.push(word);
1794        }
1795    }
1796
1797    refs
1798}
1799
1800static COMMON_LOCAL_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1801    [
1802        "result", "results", "data", "config", "value", "values",
1803        "item", "items", "input", "output", "args", "opts",
1804        "name", "path", "file", "line", "count", "index",
1805        "temp", "prev", "next", "curr", "current", "node",
1806        "left", "right", "root", "head", "tail", "body",
1807        "text", "content", "source", "target", "entry",
1808        "error", "errors", "message", "response", "request",
1809        "context", "state", "props", "event", "handler",
1810        "callback", "options", "params", "query", "list",
1811        "base", "info", "meta", "kind", "mode", "flag",
1812        "size", "length", "width", "height", "start", "stop",
1813        "begin", "done", "found", "status", "code",
1814    ].into_iter().collect()
1815});
1816
1817/// Names that are overwhelmingly local variables, not entity references.
1818/// These create massive false-positive edges in the dependency graph.
1819fn is_common_local_name(word: &str) -> bool {
1820    COMMON_LOCAL_NAMES.contains(word)
1821}
1822
1823/// Infer reference type from context using word-boundary-aware matching.
1824fn infer_ref_type(content: &str, ref_name: &str) -> RefType {
1825    // Check if it's a function call: ref_name followed by ( with word boundary before.
1826    // Avoids format! allocation by finding ref_name and checking the next char.
1827    let bytes = content.as_bytes();
1828    let name_bytes = ref_name.as_bytes();
1829    let mut search_start = 0;
1830    while let Some(rel_pos) = content[search_start..].find(ref_name) {
1831        let pos = search_start + rel_pos;
1832        let after = pos + name_bytes.len();
1833        // Check next char is '('
1834        if after < bytes.len() && bytes[after] == b'(' {
1835            // Verify word boundary before
1836            let is_boundary = pos == 0 || {
1837                let prev = bytes[pos - 1];
1838                !prev.is_ascii_alphanumeric() && prev != b'_'
1839            };
1840            if is_boundary {
1841                return RefType::Calls;
1842            }
1843        }
1844        // Advance past pos to the next char boundary to avoid slicing inside a multi-byte UTF-8 char.
1845        search_start = pos + 1;
1846        while search_start < content.len() && !content.is_char_boundary(search_start) {
1847            search_start += 1;
1848        }
1849    }
1850
1851    // Check if it's in an import/use statement (line-level, not substring)
1852    for line in content.lines() {
1853        let trimmed = line.trim();
1854        if (trimmed.starts_with("import ") || trimmed.starts_with("use ")
1855            || trimmed.starts_with("from ") || trimmed.starts_with("require("))
1856            && trimmed.contains(ref_name)
1857        {
1858            return RefType::Imports;
1859        }
1860    }
1861
1862    // Default to type reference
1863    RefType::TypeRef
1864}
1865
1866static KEYWORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1867    [
1868        // Common across languages
1869        "if", "else", "for", "while", "do", "switch", "case", "break",
1870        "continue", "return", "try", "catch", "finally", "throw",
1871        "new", "delete", "typeof", "instanceof", "in", "of",
1872        "true", "false", "null", "undefined", "void", "this",
1873        "super", "class", "extends", "implements", "interface",
1874        "enum", "const", "let", "var", "function", "async",
1875        "await", "yield", "import", "export", "default", "from",
1876        "as", "static", "public", "private", "protected",
1877        "abstract", "final", "override",
1878        // Rust
1879        "fn", "pub", "mod", "use", "struct", "impl", "trait",
1880        "where", "type", "self", "Self", "mut", "ref", "match",
1881        "loop", "move", "unsafe", "extern", "crate", "dyn",
1882        // Python
1883        "def", "elif", "except", "raise", "with",
1884        "pass", "lambda", "nonlocal", "global", "assert",
1885        "True", "False", "and", "or", "not", "is",
1886        // Go
1887        "func", "package", "range", "select", "chan", "go",
1888        "defer", "map", "make", "append", "len", "cap",
1889        // C/C++
1890        "auto", "register", "volatile", "sizeof", "typedef",
1891        "template", "typename", "namespace", "virtual", "inline",
1892        "constexpr", "nullptr", "noexcept", "explicit", "friend",
1893        "operator", "using", "cout", "endl", "cerr", "cin",
1894        "printf", "scanf", "malloc", "free", "NULL", "include",
1895        "ifdef", "ifndef", "endif", "define", "pragma",
1896        // Ruby
1897        "end", "then", "elsif", "unless", "until",
1898        "begin", "rescue", "ensure", "when", "require",
1899        "attr_accessor", "attr_reader", "attr_writer",
1900        "puts", "nil", "module", "defined",
1901        // C#
1902        "internal", "sealed", "readonly",
1903        "partial", "delegate", "event", "params", "out",
1904        "object", "decimal", "sbyte", "ushort", "uint",
1905        "ulong", "nint", "nuint", "dynamic",
1906        "get", "set", "value", "init", "record",
1907        // Types (primitives)
1908        "string", "number", "boolean", "int", "float", "double",
1909        "bool", "char", "byte", "i8", "i16", "i32", "i64",
1910        "u8", "u16", "u32", "u64", "f32", "f64", "usize",
1911        "isize", "str", "String", "Vec", "Option", "Result",
1912        "Box", "Arc", "Rc", "HashMap", "HashSet", "Some",
1913        "Ok", "Err",
1914    ].into_iter().collect()
1915});
1916
1917fn is_keyword(word: &str) -> bool {
1918    KEYWORDS.contains(word)
1919}
1920
1921#[cfg(test)]
1922mod tests {
1923    use super::*;
1924    use crate::git::types::{FileChange, FileStatus};
1925    use std::io::Write;
1926    use tempfile::TempDir;
1927
1928    fn create_test_repo() -> (TempDir, ParserRegistry) {
1929        let dir = TempDir::new().unwrap();
1930        let registry = crate::parser::plugins::create_default_registry();
1931        (dir, registry)
1932    }
1933
1934    fn write_file(dir: &Path, name: &str, content: &str) {
1935        let path = dir.join(name);
1936        if let Some(parent) = path.parent() {
1937            std::fs::create_dir_all(parent).unwrap();
1938        }
1939        let mut f = std::fs::File::create(path).unwrap();
1940        f.write_all(content.as_bytes()).unwrap();
1941    }
1942
1943    #[test]
1944    fn test_incremental_add_file() {
1945        let (dir, registry) = create_test_repo();
1946        let root = dir.path();
1947
1948        // Start with one file
1949        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1950        write_file(root, "b.ts", "export function bar() { return 1; }\n");
1951
1952        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
1953        assert_eq!(graph.entities.len(), 2);
1954
1955        // Add a new file
1956        write_file(root, "c.ts", "export function baz() { return foo(); }\n");
1957        graph.update_from_changes(
1958            &[FileChange {
1959                file_path: "c.ts".into(),
1960                status: FileStatus::Added,
1961                old_file_path: None,
1962                before_content: None,
1963                after_content: None, // will read from disk
1964            }],
1965            root,
1966            &registry,
1967        );
1968
1969        assert_eq!(graph.entities.len(), 3);
1970        assert!(graph.entities.contains_key("c.ts::function::baz"));
1971        // baz references foo
1972        let baz_deps = graph.get_dependencies("c.ts::function::baz");
1973        assert!(
1974            baz_deps.iter().any(|d| d.name == "foo"),
1975            "baz should depend on foo. Deps: {:?}",
1976            baz_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1977        );
1978    }
1979
1980    #[test]
1981    fn test_incremental_delete_file() {
1982        let (dir, registry) = create_test_repo();
1983        let root = dir.path();
1984
1985        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1986        write_file(root, "b.ts", "export function bar() { return 1; }\n");
1987
1988        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
1989        assert_eq!(graph.entities.len(), 2);
1990
1991        // Delete b.ts
1992        graph.update_from_changes(
1993            &[FileChange {
1994                file_path: "b.ts".into(),
1995                status: FileStatus::Deleted,
1996                old_file_path: None,
1997                before_content: None,
1998                after_content: None,
1999            }],
2000            root,
2001            &registry,
2002        );
2003
2004        assert_eq!(graph.entities.len(), 1);
2005        assert!(!graph.entities.contains_key("b.ts::function::bar"));
2006        // foo's dependency on bar should be pruned
2007        let foo_deps = graph.get_dependencies("a.ts::function::foo");
2008        assert!(
2009            foo_deps.is_empty(),
2010            "foo's deps should be empty after bar deleted. Deps: {:?}",
2011            foo_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2012        );
2013    }
2014
2015    #[test]
2016    fn test_incremental_modify_file() {
2017        let (dir, registry) = create_test_repo();
2018        let root = dir.path();
2019
2020        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
2021        write_file(root, "b.ts", "export function bar() { return 1; }\nexport function baz() { return 2; }\n");
2022
2023        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
2024        assert_eq!(graph.entities.len(), 3);
2025
2026        // Modify a.ts to call baz instead of bar
2027        write_file(root, "a.ts", "export function foo() { return baz(); }\n");
2028        graph.update_from_changes(
2029            &[FileChange {
2030                file_path: "a.ts".into(),
2031                status: FileStatus::Modified,
2032                old_file_path: None,
2033                before_content: None,
2034                after_content: None,
2035            }],
2036            root,
2037            &registry,
2038        );
2039
2040        assert_eq!(graph.entities.len(), 3);
2041        // foo should now depend on baz, not bar
2042        let foo_deps = graph.get_dependencies("a.ts::function::foo");
2043        let dep_names: Vec<&str> = foo_deps.iter().map(|d| d.name.as_str()).collect();
2044        assert!(dep_names.contains(&"baz"), "foo should depend on baz after modification. Deps: {:?}", dep_names);
2045        assert!(!dep_names.contains(&"bar"), "foo should no longer depend on bar. Deps: {:?}", dep_names);
2046    }
2047
2048    #[test]
2049    fn test_incremental_with_content() {
2050        let (dir, registry) = create_test_repo();
2051        let root = dir.path();
2052
2053        write_file(root, "a.ts", "export function foo() { return 1; }\n");
2054        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into()], &registry);
2055        assert_eq!(graph.entities.len(), 1);
2056
2057        // Add file with content provided directly (no disk read needed)
2058        graph.update_from_changes(
2059            &[FileChange {
2060                file_path: "b.ts".into(),
2061                status: FileStatus::Added,
2062                old_file_path: None,
2063                before_content: None,
2064                after_content: Some("export function bar() { return foo(); }\n".into()),
2065            }],
2066            root,
2067            &registry,
2068        );
2069
2070        assert_eq!(graph.entities.len(), 2);
2071        let bar_deps = graph.get_dependencies("b.ts::function::bar");
2072        assert!(bar_deps.iter().any(|d| d.name == "foo"));
2073    }
2074
2075    #[test]
2076    fn test_extract_references() {
2077        let content = "function processData(input) {\n  const result = validateInput(input);\n  return transform(result);\n}";
2078        let refs = extract_references_from_content(content, "processData");
2079        assert!(refs.contains(&"validateInput"));
2080        assert!(refs.contains(&"transform"));
2081        assert!(!refs.contains(&"processData")); // self excluded
2082    }
2083
2084    #[test]
2085    fn test_extract_references_skips_keywords() {
2086        let content = "function foo() { if (true) { return false; } }";
2087        let refs = extract_references_from_content(content, "foo");
2088        assert!(!refs.contains(&"if"));
2089        assert!(!refs.contains(&"true"));
2090        assert!(!refs.contains(&"return"));
2091        assert!(!refs.contains(&"false"));
2092    }
2093
2094    #[test]
2095    fn test_infer_ref_type_call() {
2096        assert_eq!(
2097            infer_ref_type("validateInput(data)", "validateInput"),
2098            RefType::Calls,
2099        );
2100    }
2101
2102    #[test]
2103    fn test_infer_ref_type_type() {
2104        assert_eq!(
2105            infer_ref_type("let x: MyType = something", "MyType"),
2106            RefType::TypeRef,
2107        );
2108    }
2109
2110    #[test]
2111    fn test_infer_ref_type_multibyte_utf8() {
2112        // Ensure no panic when content contains multi-byte UTF-8 characters
2113        assert_eq!(
2114            infer_ref_type("let café = foo(x)", "foo"),
2115            RefType::Calls,
2116        );
2117        assert_eq!(
2118            infer_ref_type("class HandicapfrPublicationFieldsEnum:\n    É = 1\n    bar()", "bar"),
2119            RefType::Calls,
2120        );
2121        // No match should not panic either
2122        assert_eq!(
2123            infer_ref_type("// 日本語コメント\nlet x = 1", "missing"),
2124            RefType::TypeRef,
2125        );
2126    }
2127
2128    #[test]
2129    fn test_dot_chain_self_resolution() {
2130        let (dir, registry) = create_test_repo();
2131        let root = dir.path();
2132
2133        write_file(root, "service.py", "\
2134class MyService:
2135    def process(self):
2136        return self.validate()
2137
2138    def validate(self):
2139        return True
2140");
2141
2142        let (graph, _) = EntityGraph::build(root, &["service.py".into()], &registry);
2143
2144        // process should have an edge to validate via self.validate()
2145        let process_id = graph.entities.keys()
2146            .find(|id| id.contains("process"))
2147            .expect("process entity should exist");
2148        let deps = graph.get_dependencies(process_id);
2149        assert!(
2150            deps.iter().any(|d| d.name == "validate"),
2151            "process should depend on validate via self.validate(). Deps: {:?}",
2152            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2153        );
2154    }
2155
2156    #[test]
2157    fn test_dot_chain_this_resolution() {
2158        let (dir, registry) = create_test_repo();
2159        let root = dir.path();
2160
2161        write_file(root, "service.ts", "\
2162class UserService {
2163    process() {
2164        return this.validate();
2165    }
2166    validate() {
2167        return true;
2168    }
2169}
2170");
2171
2172        let (graph, _) = EntityGraph::build(root, &["service.ts".into()], &registry);
2173
2174        let process_id = graph.entities.keys()
2175            .find(|id| id.contains("process"))
2176            .expect("process entity should exist");
2177        let deps = graph.get_dependencies(process_id);
2178        assert!(
2179            deps.iter().any(|d| d.name == "validate"),
2180            "process should depend on validate via this.validate(). Deps: {:?}",
2181            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2182        );
2183    }
2184
2185    #[test]
2186    fn test_dot_chain_class_static() {
2187        let (dir, registry) = create_test_repo();
2188        let root = dir.path();
2189
2190        write_file(root, "utils.ts", "\
2191class MathUtils {
2192    static compute() { return 1; }
2193}
2194function caller() { return MathUtils.compute(); }
2195");
2196
2197        let (graph, _) = EntityGraph::build(root, &["utils.ts".into()], &registry);
2198
2199        let caller_id = graph.entities.keys()
2200            .find(|id| id.contains("caller"))
2201            .expect("caller entity should exist");
2202        let deps = graph.get_dependencies(caller_id);
2203        assert!(
2204            deps.iter().any(|d| d.name == "compute"),
2205            "caller should depend on compute via MathUtils.compute(). Deps: {:?}",
2206            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2207        );
2208    }
2209
2210    #[test]
2211    fn test_js_ts_import_resolution() {
2212        let (dir, registry) = create_test_repo();
2213        let root = dir.path();
2214
2215        write_file(root, "helper.ts", "\
2216export function helper() { return 1; }
2217");
2218        write_file(root, "main.ts", "\
2219import { helper } from './helper';
2220export function main() { return helper(); }
2221");
2222
2223        let (graph, _) = EntityGraph::build(
2224            root,
2225            &["helper.ts".into(), "main.ts".into()],
2226            &registry,
2227        );
2228
2229        let main_id = graph.entities.keys()
2230            .find(|id| id.contains("main"))
2231            .expect("main entity should exist");
2232        let deps = graph.get_dependencies(main_id);
2233        assert!(
2234            deps.iter().any(|d| d.name == "helper"),
2235            "main should depend on helper via JS import. Deps: {:?}",
2236            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2237        );
2238    }
2239
2240    #[test]
2241    fn test_dot_chain_no_false_edges() {
2242        let (dir, registry) = create_test_repo();
2243        let root = dir.path();
2244
2245        // Two classes with same method name "process".
2246        // self.process() in ClassA should NOT create edge to ClassB::process.
2247        write_file(root, "a.py", "\
2248class ClassA:
2249    def run(self):
2250        return self.process()
2251
2252    def process(self):
2253        return 1
2254");
2255        write_file(root, "b.py", "\
2256class ClassB:
2257    def process(self):
2258        return 2
2259");
2260
2261        let (graph, _) = EntityGraph::build(
2262            root,
2263            &["a.py".into(), "b.py".into()],
2264            &registry,
2265        );
2266
2267        let run_id = graph.entities.keys()
2268            .find(|id| id.contains("run"))
2269            .expect("run entity should exist");
2270        let deps = graph.get_dependencies(run_id);
2271        // Should have edge to ClassA::process, NOT ClassB::process
2272        for dep in &deps {
2273            if dep.name == "process" {
2274                assert!(
2275                    dep.file_path == "a.py",
2276                    "run's process dep should be in a.py, not {}",
2277                    dep.file_path
2278                );
2279            }
2280        }
2281    }
2282
2283    #[test]
2284    fn test_dot_chain_fallback() {
2285        let (dir, registry) = create_test_repo();
2286        let root = dir.path();
2287
2288        // someVar.unknownMethod() - "someVar" is not a class,
2289        // so the chain is unresolved and words fall through to bag-of-words.
2290        // "helper" should still resolve via bag-of-words.
2291        write_file(root, "app.ts", "\
2292export function helper() { return 1; }
2293export function caller() {
2294    const val = helper();
2295    return val;
2296}
2297");
2298
2299        let (graph, _) = EntityGraph::build(root, &["app.ts".into()], &registry);
2300
2301        let caller_id = graph.entities.keys()
2302            .find(|id| id.contains("caller"))
2303            .expect("caller entity should exist");
2304        let deps = graph.get_dependencies(caller_id);
2305        assert!(
2306            deps.iter().any(|d| d.name == "helper"),
2307            "caller should still resolve helper via bag-of-words. Deps: {:?}",
2308            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2309        );
2310    }
2311
2312}