Skip to main content

sem_core/parser/
graph.rs

1//! Entity dependency graph — cross-file reference extraction.
2//!
3//! Implements a two-pass approach inspired by arXiv:2601.08773 (Reliable Graph-RAG):
4//! Pass 1: Extract all entities, build a symbol table (name → entity ID).
5//! Pass 2: For each entity, extract identifier references from its AST subtree,
6//!         resolve them against the symbol table to create edges.
7//!
8//! This enables impact analysis: "if I change entity X, what else is affected?"
9
10use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::{Arc, LazyLock};
13
14#[cfg(feature = "parallel")]
15use rayon::prelude::*;
16use regex::Regex;
17use serde::{Deserialize, Serialize};
18
19/// Helper macro to select parallel or sequential iteration based on feature flag.
20macro_rules! maybe_par_iter {
21    ($slice:expr) => {{
22        #[cfg(feature = "parallel")]
23        { $slice.par_iter() }
24        #[cfg(not(feature = "parallel"))]
25        { $slice.iter() }
26    }};
27}
28
29use crate::git::types::{FileChange, FileStatus};
30use crate::model::entity::SemanticEntity;
31use crate::parser::import_resolution::find_import_target;
32use crate::parser::registry::{resolve_go_method_parent_ids, ParserRegistry};
33use crate::parser::scope_resolve;
34
35fn build_scope_consumed_words(
36    resolution_log: &[scope_resolve::ResolutionEntry],
37) -> HashMap<String, HashSet<String>> {
38    let mut consumed_by_entity: HashMap<String, HashSet<String>> = HashMap::new();
39    for entry in resolution_log {
40        let words = consumed_by_entity
41            .entry(entry.from_entity.clone())
42            .or_default();
43        add_scope_reference_words(words, &entry.reference);
44    }
45    consumed_by_entity
46}
47
48fn add_scope_reference_words(words: &mut HashSet<String>, reference: &str) {
49    let reference = reference.strip_suffix("()").unwrap_or(reference);
50    if let Some((receiver, member)) = reference.rsplit_once('.') {
51        if !receiver.is_empty() {
52            words.insert(receiver.to_string());
53        }
54        if !member.is_empty() {
55            words.insert(member.to_string());
56        }
57    } else if !reference.is_empty() {
58        words.insert(reference.to_string());
59    }
60}
61
62/// A reference from one entity to another.
63#[derive(Debug, Clone, Serialize, Deserialize)]
64#[serde(rename_all = "camelCase")]
65pub struct EntityRef {
66    pub from_entity: String,
67    pub to_entity: String,
68    pub ref_type: RefType,
69}
70
71/// Type of reference between entities.
72#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
73#[serde(rename_all = "lowercase")]
74pub enum RefType {
75    /// Function/method call
76    Calls,
77    /// Type reference (extends, implements, field type)
78    TypeRef,
79    /// Import/use statement reference
80    Imports,
81}
82
83/// A complete entity dependency graph for a set of files.
84#[derive(Debug)]
85pub struct EntityGraph {
86    /// All entities indexed by ID
87    pub entities: HashMap<String, EntityInfo>,
88    /// Edges: from_entity → [(to_entity, ref_type)]
89    pub edges: Vec<EntityRef>,
90    /// Reverse index: entity_id → entities that reference it
91    pub dependents: HashMap<String, Vec<String>>,
92    /// Forward index: entity_id → entities it references
93    pub dependencies: HashMap<String, Vec<String>>,
94}
95
96/// Minimal entity info stored in the graph.
97#[derive(Debug, Clone, Serialize, Deserialize)]
98#[serde(rename_all = "camelCase")]
99pub struct EntityInfo {
100    pub id: String,
101    pub name: String,
102    pub entity_type: String,
103    pub file_path: String,
104    #[serde(skip_serializing_if = "Option::is_none")]
105    pub parent_id: Option<String>,
106    pub start_line: usize,
107    pub end_line: usize,
108}
109
110impl EntityGraph {
111    /// Reconstruct an EntityGraph from pre-loaded parts (e.g. from a cache).
112    pub fn from_parts(entities: HashMap<String, EntityInfo>, edges: Vec<EntityRef>) -> Self {
113        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
114        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
115        for edge in &edges {
116            dependents
117                .entry(edge.to_entity.clone())
118                .or_default()
119                .push(edge.from_entity.clone());
120            dependencies
121                .entry(edge.from_entity.clone())
122                .or_default()
123                .push(edge.to_entity.clone());
124        }
125        EntityGraph {
126            entities,
127            edges,
128            dependents,
129            dependencies,
130        }
131    }
132
133    /// Build an entity graph from a set of files.
134    ///
135    /// Pass 1: Extract all entities from all files using the parser registry.
136    /// Pass 2: For each entity, find identifier tokens and resolve them against
137    ///         the symbol table to create reference edges.
138    pub fn build(
139        root: &Path,
140        file_paths: &[String],
141        registry: &ParserRegistry,
142    ) -> (Self, Vec<SemanticEntity>) {
143        // Pass 1: Extract all entities in parallel (file I/O + tree-sitter parsing)
144        // Also collect (file_path, content, tree) for scope_resolve reuse
145        let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = maybe_par_iter!(file_paths)
146            .filter_map(|file_path| {
147                let full_path = root.join(file_path);
148                let content = std::fs::read_to_string(&full_path).ok()?;
149                let (entities, tree) = registry.extract_entities_with_tree(file_path, &content)?;
150                let parsed = tree.map(|t| (file_path.clone(), content, t));
151                Some((entities, parsed))
152            })
153            .collect();
154
155        let mut all_entities: Vec<SemanticEntity> = Vec::new();
156        let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
157        for (entities, parsed) in per_file {
158            all_entities.extend(entities);
159            if let Some(p) = parsed {
160                parsed_files.push(p);
161            }
162        }
163        resolve_go_method_parent_ids(&mut all_entities);
164
165        // Pass A: Build all lookup structures in a single pass over all_entities.
166        // This merges what was previously 6 separate O(E) iterations.
167        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
168        let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
169        let mut parent_child_pairs: HashSet<(&str, &str)> = HashSet::new();
170        let mut class_child_names: HashSet<(&str, &str)> = HashSet::new();
171        let mut class_entity_names: HashSet<&str> = HashSet::new();
172        let mut class_entity_files: HashSet<(&str, &str)> = HashSet::new();
173        let mut id_to_name: HashMap<&str, &str> = HashMap::with_capacity(all_entities.len());
174        let mut scope_entity_ranges: HashMap<String, Vec<(usize, usize, String)>> = HashMap::new();
175
176        for entity in &all_entities {
177            symbol_table
178                .entry(entity.name.clone())
179                .or_default()
180                .push(entity.id.clone());
181
182            entity_map.insert(
183                entity.id.clone(),
184                EntityInfo {
185                    id: entity.id.clone(),
186                    name: entity.name.clone(),
187                    entity_type: entity.entity_type.clone(),
188                    file_path: entity.file_path.clone(),
189                    parent_id: entity.parent_id.clone(),
190                    start_line: entity.start_line,
191                    end_line: entity.end_line,
192                },
193            );
194
195            if let Some(ref pid) = entity.parent_id {
196                parent_child_pairs.insert((pid.as_str(), entity.id.as_str()));
197                class_child_names.insert((pid.as_str(), entity.name.as_str()));
198            }
199
200            if matches!(entity.entity_type.as_str(), "class" | "struct" | "interface" | "class_type") {
201                class_entity_names.insert(entity.name.as_str());
202                class_entity_files.insert((entity.name.as_str(), entity.file_path.as_str()));
203            }
204
205            id_to_name.insert(entity.id.as_str(), entity.name.as_str());
206
207            scope_entity_ranges.entry(entity.file_path.clone()).or_default()
208                .push((entity.start_line, entity.end_line, entity.id.clone()));
209        }
210
211        // Pass B: Build enclosing_class, class_members, and scope_class_members
212        // (depends on id_to_name, class_entity_names, and entity_map from Pass A)
213        let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
214        let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
215        let mut scope_class_members: HashMap<String, Vec<(String, String)>> = HashMap::new();
216
217        for entity in &all_entities {
218            if let Some(ref pid) = entity.parent_id {
219                if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
220                    if class_entity_names.contains(parent_name) {
221                        enclosing_class.insert(entity.id.as_str(), parent_name);
222                        class_members
223                            .entry(parent_name)
224                            .or_default()
225                            .push((entity.name.as_str(), entity.id.as_str()));
226                    }
227                }
228                // scope_class_members for scope resolver (checks entity_type of parent)
229                if let Some(parent) = entity_map.get(pid.as_str()) {
230                    if matches!(parent.entity_type.as_str(), "class" | "struct" | "interface" | "impl") {
231                        scope_class_members.entry(parent.name.clone()).or_default()
232                            .push((entity.name.clone(), entity.id.clone()));
233                    }
234                }
235            }
236            // Go receiver-based methods
237            if entity.entity_type == "method" && entity.file_path.ends_with(".go") {
238                if let Some(struct_name) = scope_resolve::extract_go_receiver_type(&entity.content) {
239                    scope_class_members.entry(struct_name).or_default()
240                        .push((entity.name.clone(), entity.id.clone()));
241                }
242            }
243        }
244
245        // Build import table: (file_path, imported_name) → target entity ID
246        // e.g. ("io_handler.py", "validate") → "core.py::function::validate"
247        let import_table = build_import_table(root, file_paths, &symbol_table, &entity_map, Some(&parsed_files));
248        // Build owned Go package index for scope resolver
249        let owned_go_pkg_index: HashMap<String, Vec<(String, String)>> = if file_paths.iter().any(|f| f.ends_with(".go")) {
250            let mut idx: HashMap<String, Vec<(String, String)>> = HashMap::new();
251            for (name, target_ids) in symbol_table.iter() {
252                for target_id in target_ids {
253                    if let Some(entity) = entity_map.get(target_id) {
254                        let file_stem = entity.file_path.rsplit('/').next().unwrap_or(&entity.file_path);
255                        let file_stem = strip_file_ext(file_stem);
256                        idx.entry(file_stem.to_string())
257                            .or_default()
258                            .push((name.clone(), target_id.clone()));
259                        if let Some(parent_start) = entity.file_path.rfind('/') {
260                            let parent_path = &entity.file_path[..parent_start];
261                            if let Some(dir_name_start) = parent_path.rfind('/') {
262                                let dir_name = &parent_path[dir_name_start + 1..];
263                                if dir_name != file_stem {
264                                    idx.entry(dir_name.to_string())
265                                        .or_default()
266                                        .push((name.clone(), target_id.clone()));
267                                }
268                            } else if !parent_path.is_empty() && parent_path != file_stem {
269                                idx.entry(parent_path.to_string())
270                                    .or_default()
271                                    .push((name.clone(), target_id.clone()));
272                            }
273                        }
274                    }
275                }
276            }
277            idx
278        } else {
279            HashMap::new()
280        };
281
282        // Wrap symbol_table in Arc to avoid expensive deep clone (621K entries)
283        let symbol_table = Arc::new(symbol_table);
284
285        let pre_built = scope_resolve::PreBuiltLookups {
286            symbol_table: Arc::clone(&symbol_table),
287            class_members: scope_class_members,
288            entity_ranges: scope_entity_ranges,
289            go_pkg_index: owned_go_pkg_index,
290        };
291
292        // Run scope-aware resolver for supported languages (reuse pre-parsed trees)
293        let has_scope_lang = file_paths.iter().any(|f| {
294            let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
295            crate::parser::plugins::code::languages::get_language_config(ext)
296                .and_then(|c| c.scope_resolve)
297                .is_some()
298        });
299        let (scope_edges, scope_consumed_words) = if has_scope_lang {
300            let result = scope_resolve::resolve_with_scopes_full(root, file_paths, &all_entities, &entity_map, Some(parsed_files), Some(pre_built));
301            let consumed_words = build_scope_consumed_words(&result.resolution_log);
302            (result.edges, consumed_words)
303        } else {
304            (vec![], HashMap::new())
305        };
306
307        // Pass 2: Extract references in parallel, then resolve against symbol table
308        // Phase 1: Dot-chain resolution (precise self.X, this.X, ClassName.X)
309        // Phase 2: Bag-of-words resolution (existing logic, skipping consumed words)
310        // Skip entities already resolved by scope resolver (Python files)
311        // Skip entities from non-code file types (JSON, SQL, etc.) that can't produce edges
312        let resolved_refs: Vec<(String, String, RefType)> = maybe_par_iter!(all_entities)
313            .flat_map(|entity| {
314                // Skip entities from file types that don't have language configs
315                // (JSON, SQL, YAML, etc. — they extract entities but never produce reference edges)
316                let ext = entity.file_path.rfind('.').map(|i| &entity.file_path[i..]).unwrap_or("");
317                if crate::parser::plugins::code::languages::get_language_config(ext).is_none() {
318                    return vec![];
319                }
320
321                let mut entity_edges = Vec::new();
322                let mut consumed_words = scope_consumed_words
323                    .get(&entity.id)
324                    .cloned()
325                    .unwrap_or_default();
326
327                // Strip comments/strings once, reuse for both dot-chain and bag-of-words
328                let stripped = strip_comments_and_strings(&entity.content);
329
330                // Phase 1: Dot-chain resolution
331                let dot_chains = extract_dot_chains(&stripped);
332
333                for (receiver, member) in &dot_chains {
334                    let edge_count_before = entity_edges.len();
335                    if *receiver == "self" || *receiver == "this" {
336                        // self.B / this.B: resolve to sibling method in enclosing class
337                        if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
338                            if let Some(members) = class_members.get(class_name) {
339                                for (n, tid) in members {
340                                    if *n == *member && *tid != entity.id.as_str() {
341                                        entity_edges.push((
342                                            entity.id.clone(),
343                                            tid.to_string(),
344                                            RefType::Calls,
345                                        ));
346                                        consumed_words.insert(member.to_string());
347                                        break;
348                                    }
349                                }
350                            }
351                        }
352                    } else if class_entity_files.contains(&(*receiver, entity.file_path.as_str())) {
353                        // ClassName.B: resolve to class member
354                        if let Some(members) = class_members.get(*receiver) {
355                            for (n, tid) in members {
356                                if *n == *member {
357                                    entity_edges.push((
358                                        entity.id.clone(),
359                                        tid.to_string(),
360                                        RefType::Calls,
361                                    ));
362                                    consumed_words.insert(member.to_string());
363                                    consumed_words.insert(receiver.to_string());
364                                    break;
365                                }
366                            }
367                        }
368                    }
369                    if entity_edges.len() == edge_count_before {
370                        consumed_words.insert(member.to_string());
371                    }
372                }
373
374                // Phase 2: Bag-of-words resolution (skip words consumed by dot-chains)
375                // Reuse the stripped content to avoid stripping twice
376                let refs = extract_references_with_stripped(&entity.content, &entity.name, &stripped);
377                for ref_name in refs {
378                    if consumed_words.contains(ref_name) {
379                        continue;
380                    }
381
382                    // Skip references to names that are this class's own methods
383                    if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
384                        continue;
385                    }
386
387                    // Check import table first: if this file imports this name,
388                    // resolve to the import target instead of global symbol table
389                    let import_key = (entity.file_path.clone(), ref_name.to_string());
390                    if let Some(import_target_id) = import_table.get(&import_key) {
391                        if import_target_id != &entity.id
392                            && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
393                            && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
394                        {
395                            let ref_type = infer_ref_type(&entity.content, &ref_name);
396                            entity_edges.push((
397                                entity.id.clone(),
398                                import_target_id.clone(),
399                                ref_type,
400                            ));
401                        }
402                        continue;
403                    }
404
405                    if let Some(target_ids) = symbol_table.get(ref_name) {
406                        // Without an import, only resolve to entities in the same file.
407                        // Cross-file resolution is handled by the import table above.
408                        let target = target_ids
409                            .iter()
410                            .find(|id| {
411                                *id != &entity.id
412                                    && entity_map
413                                        .get(*id)
414                                        .map_or(false, |e| e.file_path == entity.file_path)
415                            });
416
417                        if let Some(target_id) = target {
418                            // Skip parent-child edges (class -> own method)
419                            if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
420                                || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
421                            {
422                                continue;
423                            }
424                            let ref_type = infer_ref_type(&entity.content, &ref_name);
425                            entity_edges.push((
426                                entity.id.clone(),
427                                target_id.clone(),
428                                ref_type,
429                            ));
430                        }
431                    }
432                }
433                entity_edges
434            })
435            .collect();
436
437        // Merge scope edges with bag-of-words edges, deduplicating
438        let mut combined: Vec<(String, String, RefType)> = scope_edges;
439        combined.extend(resolved_refs);
440        let mut seen_edges: HashSet<(String, String)> = HashSet::with_capacity(combined.len());
441        let mut all_resolved: Vec<(String, String, RefType)> = Vec::with_capacity(combined.len());
442        for edge in combined {
443            if seen_edges.insert((edge.0.clone(), edge.1.clone())) {
444                all_resolved.push(edge);
445            }
446        }
447
448        // Build edge indexes from resolved references
449        let mut edges: Vec<EntityRef> = Vec::with_capacity(all_resolved.len());
450        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
451        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
452
453        for (from_entity, to_entity, ref_type) in all_resolved {
454            dependents
455                .entry(to_entity.clone())
456                .or_default()
457                .push(from_entity.clone());
458            dependencies
459                .entry(from_entity.clone())
460                .or_default()
461                .push(to_entity.clone());
462            edges.push(EntityRef {
463                from_entity,
464                to_entity,
465                ref_type,
466            });
467        }
468
469        let graph = EntityGraph {
470            entities: entity_map,
471            edges,
472            dependents,
473            dependencies,
474        };
475
476        (graph, all_entities)
477    }
478
479    /// Incrementally build an entity graph: reparse only stale files, reuse cached data for clean files.
480    ///
481    /// Uses the same full 3-phase resolution (scope + dot-chain + bag-of-words) as `build()`,
482    /// but only runs it for entities in stale files + clean entities whose cached edges
483    /// pointed into stale files (they need re-resolution since their targets may have changed).
484    pub fn build_incremental(
485        root: &Path,
486        stale_files: &[String],
487        all_file_paths: &[String],
488        cached_entities: Vec<SemanticEntity>,
489        cached_edges: Vec<EntityRef>,
490        stale_file_cached_entities: Vec<SemanticEntity>,
491        registry: &ParserRegistry,
492    ) -> (Self, Vec<SemanticEntity>) {
493        // Build set of stale file paths for quick lookup
494        let stale_set: HashSet<&str> = stale_files.iter().map(|s| s.as_str()).collect();
495
496        // Parse stale files in parallel to get new entities + trees
497        let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = maybe_par_iter!(stale_files)
498            .filter_map(|file_path| {
499                let full_path = root.join(file_path);
500                let content = std::fs::read_to_string(&full_path).ok()?;
501                let (entities, tree) = registry.extract_entities_with_tree(file_path, &content)?;
502                let parsed = tree.map(|t| (file_path.clone(), content, t));
503                Some((entities, parsed))
504            })
505            .collect();
506
507        let mut new_entities: Vec<SemanticEntity> = Vec::new();
508        let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
509        for (entities, parsed) in per_file {
510            new_entities.extend(entities);
511            if let Some(p) = parsed {
512                parsed_files.push(p);
513            }
514        }
515
516        // Merge clean cached entities with newly parsed stale-file entities before
517        // repairing Go method parents; Go receiver types may live in clean files.
518        let mut all_entities: Vec<SemanticEntity> = cached_entities
519            .into_iter()
520            .chain(new_entities.into_iter())
521            .collect();
522        let entity_ids_before_parent_repair: HashSet<String> =
523            all_entities.iter().map(|e| e.id.clone()).collect();
524        resolve_go_method_parent_ids(&mut all_entities);
525        let parent_repaired_ids: HashSet<&str> = all_entities
526            .iter()
527            .filter(|e| !entity_ids_before_parent_repair.contains(&e.id))
528            .map(|e| e.id.as_str())
529            .collect();
530
531        // Entity-level diffing: compare repaired stale-file entities against cached versions.
532        let stale_cached_entity_ids: HashSet<&str> = stale_file_cached_entities
533            .iter()
534            .map(|e| e.id.as_str())
535            .collect();
536
537        // Build content_hash lookup from cached stale-file entities
538        let cached_hashes: HashMap<&str, &str> = stale_file_cached_entities
539            .iter()
540            .map(|e| (e.id.as_str(), e.content_hash.as_str()))
541            .collect();
542
543        // Classify new stale-file entities
544        let mut truly_changed_ids: HashSet<String> = HashSet::new();
545        let mut content_clean_ids: HashSet<String> = HashSet::new();
546        for entity in all_entities
547            .iter()
548            .filter(|e| stale_set.contains(e.file_path.as_str()))
549        {
550            match cached_hashes.get(entity.id.as_str()) {
551                Some(old_hash) if *old_hash == entity.content_hash.as_str() => {
552                    content_clean_ids.insert(entity.id.clone());
553                }
554                _ => {
555                    // Hash differs or entity is new
556                    truly_changed_ids.insert(entity.id.clone());
557                }
558            }
559        }
560
561        // Detect deleted entities: in cached stale but not in new
562        let new_entity_ids: HashSet<&str> = all_entities
563            .iter()
564            .filter(|e| stale_set.contains(e.file_path.as_str()))
565            .map(|e| e.id.as_str())
566            .collect();
567        let deleted_ids: HashSet<&str> = stale_file_cached_entities
568            .iter()
569            .filter(|e| !new_entity_ids.contains(e.id.as_str()))
570            .map(|e| e.id.as_str())
571            .collect();
572
573        // Find affected clean entities: only care about edges pointing to truly_changed/deleted
574        let mut affected_clean_ids: HashSet<String> = HashSet::new();
575        for edge in &cached_edges {
576            let to_truly_changed = truly_changed_ids.contains(&edge.to_entity)
577                || deleted_ids.contains(edge.to_entity.as_str());
578            if to_truly_changed && !stale_set.contains(
579                all_entities.iter()
580                    .find(|e| e.id == edge.from_entity)
581                    .map(|e| e.file_path.as_str())
582                    .unwrap_or("")
583            ) {
584                affected_clean_ids.insert(edge.from_entity.clone());
585            }
586        }
587
588        // Collect all stale entity IDs (for edge filtering)
589        let stale_entity_ids: HashSet<&str> = all_entities
590            .iter()
591            .filter(|e| stale_set.contains(e.file_path.as_str()))
592            .map(|e| e.id.as_str())
593            .collect();
594        let current_entity_ids: HashSet<&str> = all_entities
595            .iter()
596            .map(|e| e.id.as_str())
597            .collect();
598
599        // Keep edges where both endpoints are in clean (non-stale) files and from_entity
600        // is not affected by target changes. Drop ALL cached edges from stale-file entities
601        // (even content_clean ones) because import/scope context may have changed even when
602        // entity content didn't. See: https://github.com/Ataraxy-Labs/sem/issues/116
603        let kept_edges: Vec<EntityRef> = cached_edges
604            .into_iter()
605            .filter(|e| {
606                if !current_entity_ids.contains(e.from_entity.as_str())
607                    || !current_entity_ids.contains(e.to_entity.as_str())
608                {
609                    return false;
610                }
611
612                let from_stale = stale_entity_ids.contains(e.from_entity.as_str())
613                    || stale_cached_entity_ids.contains(e.from_entity.as_str());
614                let to_stale = stale_entity_ids.contains(e.to_entity.as_str())
615                    || stale_cached_entity_ids.contains(e.to_entity.as_str());
616
617                if !from_stale && !to_stale && !affected_clean_ids.contains(&e.from_entity) {
618                    // Both endpoints in clean files, from not affected
619                    return true;
620                }
621                false
622            })
623            .collect();
624
625        // Set of entity IDs that need resolution: all stale-file entities + affected clean.
626        // Content-clean stale entities must be re-resolved because import/scope context
627        // may have changed even if entity body content is identical.
628        let needs_resolution: HashSet<&str> = all_entities
629            .iter()
630            .filter(|e| {
631                truly_changed_ids.contains(&e.id)
632                    || content_clean_ids.contains(&e.id)
633                    || parent_repaired_ids.contains(e.id.as_str())
634                    || affected_clean_ids.contains(&e.id)
635            })
636            .map(|e| e.id.as_str())
637            .collect();
638
639        // Now run the same resolution logic as build() but only for entities in needs_resolution.
640        // We still need the full context (symbol table, import table, etc.) from ALL entities.
641
642        // Build symbol table from all entities
643        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
644        let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
645
646        for entity in &all_entities {
647            symbol_table
648                .entry(entity.name.clone())
649                .or_default()
650                .push(entity.id.clone());
651            entity_map.insert(
652                entity.id.clone(),
653                EntityInfo {
654                    id: entity.id.clone(),
655                    name: entity.name.clone(),
656                    entity_type: entity.entity_type.clone(),
657                    file_path: entity.file_path.clone(),
658                    parent_id: entity.parent_id.clone(),
659                    start_line: entity.start_line,
660                    end_line: entity.end_line,
661                },
662            );
663        }
664
665        // Build parent-child set
666        let parent_child_pairs: HashSet<(&str, &str)> = all_entities
667            .iter()
668            .filter_map(|e| {
669                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
670            })
671            .collect();
672
673        let class_child_names: HashSet<(&str, &str)> = all_entities
674            .iter()
675            .filter_map(|e| {
676                e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
677            })
678            .collect();
679
680        let class_entity_names: HashSet<&str> = all_entities
681            .iter()
682            .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
683            .map(|e| e.name.as_str())
684            .collect();
685        let class_entity_files: HashSet<(&str, &str)> = all_entities
686            .iter()
687            .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
688            .map(|e| (e.name.as_str(), e.file_path.as_str()))
689            .collect();
690
691        let id_to_name: HashMap<&str, &str> = all_entities
692            .iter()
693            .map(|e| (e.id.as_str(), e.name.as_str()))
694            .collect();
695
696        let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
697        let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
698
699        for entity in &all_entities {
700            if let Some(ref pid) = entity.parent_id {
701                if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
702                    if class_entity_names.contains(parent_name) {
703                        enclosing_class.insert(entity.id.as_str(), parent_name);
704                        class_members
705                            .entry(parent_name)
706                            .or_default()
707                            .push((entity.name.as_str(), entity.id.as_str()));
708                    }
709                }
710            }
711        }
712
713        // Build import table from ALL files (imports may reference stale entities)
714        let import_table = build_import_table(root, all_file_paths, &symbol_table, &entity_map, Some(&parsed_files));
715
716        // Run scope-aware resolver only on files that need resolution
717        let resolve_file_paths: Vec<String> = all_file_paths
718            .iter()
719            .filter(|f| {
720                // Include file if any entity in needs_resolution belongs to it
721                stale_set.contains(f.as_str()) || all_entities.iter().any(|e| {
722                    e.file_path == **f && affected_clean_ids.contains(&e.id)
723                })
724            })
725            .cloned()
726            .collect();
727
728        let has_scope_lang = resolve_file_paths.iter().any(|f| {
729            let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
730            crate::parser::plugins::code::languages::get_language_config(ext)
731                .and_then(|c| c.scope_resolve)
732                .is_some()
733        });
734        let (scope_edges, scope_consumed_words) = if has_scope_lang {
735            // Pass pre-parsed stale-file trees; scope_resolve reads affected clean files from disk
736            let resolve_set: HashSet<&str> = resolve_file_paths.iter().map(|s| s.as_str()).collect();
737            let relevant_parsed: Vec<(String, String, tree_sitter::Tree)> = parsed_files
738                .into_iter()
739                .filter(|(fp, _, _)| resolve_set.contains(fp.as_str()))
740                .collect();
741            let pre = if relevant_parsed.is_empty() { None } else { Some(relevant_parsed) };
742            let result = scope_resolve::resolve_with_scopes_full(root, &resolve_file_paths, &all_entities, &entity_map, pre, None);
743            let consumed_words = build_scope_consumed_words(&result.resolution_log);
744            (result.edges, consumed_words)
745        } else {
746            (vec![], HashMap::new())
747        };
748
749        // Resolve references only for entities in needs_resolution
750        let resolved_refs: Vec<(String, String, RefType)> = maybe_par_iter!(all_entities)
751            .filter(|e| needs_resolution.contains(e.id.as_str()))
752            .flat_map(|entity| {
753                // Skip entities from non-code file types (JSON, SQL, etc.)
754                let ext = entity.file_path.rfind('.').map(|i| &entity.file_path[i..]).unwrap_or("");
755                if crate::parser::plugins::code::languages::get_language_config(ext).is_none() {
756                    return vec![];
757                }
758
759                let mut entity_edges = Vec::new();
760                let mut consumed_words = scope_consumed_words
761                    .get(&entity.id)
762                    .cloned()
763                    .unwrap_or_default();
764
765                // Strip comments/strings once, reuse for both dot-chain and bag-of-words
766                let stripped = strip_comments_and_strings(&entity.content);
767
768                // Phase 1: Dot-chain resolution
769                let dot_chains = extract_dot_chains(&stripped);
770
771                for (receiver, member) in &dot_chains {
772                    let edge_count_before = entity_edges.len();
773                    if *receiver == "self" || *receiver == "this" {
774                        if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
775                            if let Some(members) = class_members.get(class_name) {
776                                for (n, tid) in members {
777                                    if *n == *member && *tid != entity.id.as_str() {
778                                        entity_edges.push((
779                                            entity.id.clone(),
780                                            tid.to_string(),
781                                            RefType::Calls,
782                                        ));
783                                        consumed_words.insert(member.to_string());
784                                        break;
785                                    }
786                                }
787                            }
788                        }
789                    } else if class_entity_files.contains(&(*receiver, entity.file_path.as_str())) {
790                        if let Some(members) = class_members.get(*receiver) {
791                            for (n, tid) in members {
792                                if *n == *member {
793                                    entity_edges.push((
794                                        entity.id.clone(),
795                                        tid.to_string(),
796                                        RefType::Calls,
797                                    ));
798                                    consumed_words.insert(member.to_string());
799                                    consumed_words.insert(receiver.to_string());
800                                    break;
801                                }
802                            }
803                        }
804                    }
805                    if entity_edges.len() == edge_count_before {
806                        consumed_words.insert(member.to_string());
807                    }
808                }
809
810                // Phase 2: Bag-of-words resolution (reuse stripped content)
811                let refs = extract_references_with_stripped(&entity.content, &entity.name, &stripped);
812                for ref_name in refs {
813                    if consumed_words.contains(ref_name) {
814                        continue;
815                    }
816                    if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
817                        continue;
818                    }
819
820                    let import_key = (entity.file_path.clone(), ref_name.to_string());
821                    if let Some(import_target_id) = import_table.get(&import_key) {
822                        if import_target_id != &entity.id
823                            && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
824                            && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
825                        {
826                            let ref_type = infer_ref_type(&entity.content, &ref_name);
827                            entity_edges.push((
828                                entity.id.clone(),
829                                import_target_id.clone(),
830                                ref_type,
831                            ));
832                        }
833                        continue;
834                    }
835
836                    if let Some(target_ids) = symbol_table.get(ref_name) {
837                        let target = target_ids
838                            .iter()
839                            .find(|id| {
840                                *id != &entity.id
841                                    && entity_map
842                                        .get(*id)
843                                        .map_or(false, |e| e.file_path == entity.file_path)
844                            });
845
846                        if let Some(target_id) = target {
847                            if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
848                                || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
849                            {
850                                continue;
851                            }
852                            let ref_type = infer_ref_type(&entity.content, &ref_name);
853                            entity_edges.push((
854                                entity.id.clone(),
855                                target_id.clone(),
856                                ref_type,
857                            ));
858                        }
859                    }
860                }
861                entity_edges
862            })
863            .collect();
864
865        // Merge scope edges + bag-of-words edges + kept cached edges
866        let mut combined: Vec<(String, String, RefType)> = scope_edges;
867        combined.extend(resolved_refs);
868        let mut seen_edges: HashSet<(String, String)> = HashSet::with_capacity(combined.len());
869        let mut all_resolved: Vec<(String, String, RefType)> = Vec::with_capacity(combined.len());
870        for edge in combined {
871            if seen_edges.insert((edge.0.clone(), edge.1.clone())) {
872                all_resolved.push(edge);
873            }
874        }
875
876        // Build final edge list: kept edges + newly resolved edges
877        let mut edges: Vec<EntityRef> = Vec::with_capacity(kept_edges.len() + all_resolved.len());
878        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
879        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
880
881        // Track all edge pairs for dedup
882        let mut all_edge_pairs: HashSet<(String, String)> = HashSet::new();
883
884        // Add kept cached edges
885        for edge in kept_edges {
886            all_edge_pairs.insert((edge.from_entity.clone(), edge.to_entity.clone()));
887            dependents
888                .entry(edge.to_entity.clone())
889                .or_default()
890                .push(edge.from_entity.clone());
891            dependencies
892                .entry(edge.from_entity.clone())
893                .or_default()
894                .push(edge.to_entity.clone());
895            edges.push(edge);
896        }
897
898        // Add newly resolved edges, dedup against kept edges
899        for (from_entity, to_entity, ref_type) in all_resolved {
900            if !all_edge_pairs.insert((from_entity.clone(), to_entity.clone())) {
901                continue;
902            }
903            dependents
904                .entry(to_entity.clone())
905                .or_default()
906                .push(from_entity.clone());
907            dependencies
908                .entry(from_entity.clone())
909                .or_default()
910                .push(to_entity.clone());
911            edges.push(EntityRef {
912                from_entity,
913                to_entity,
914                ref_type,
915            });
916        }
917
918        let graph = EntityGraph {
919            entities: entity_map,
920            edges,
921            dependents,
922            dependencies,
923        };
924
925        (graph, all_entities)
926    }
927
928    /// Get entities that depend on the given entity (reverse deps).
929    pub fn get_dependents(&self, entity_id: &str) -> Vec<&EntityInfo> {
930        self.dependents
931            .get(entity_id)
932            .map(|ids| {
933                ids.iter()
934                    .filter_map(|id| self.entities.get(id))
935                    .collect()
936            })
937            .unwrap_or_default()
938    }
939
940    /// Get entities that the given entity depends on (forward deps).
941    pub fn get_dependencies(&self, entity_id: &str) -> Vec<&EntityInfo> {
942        self.dependencies
943            .get(entity_id)
944            .map(|ids| {
945                ids.iter()
946                    .filter_map(|id| self.entities.get(id))
947                    .collect()
948            })
949            .unwrap_or_default()
950    }
951
952    /// Impact analysis: if the given entity changes, what else might be affected?
953    /// Returns all transitive dependents (breadth-first), capped at 10k.
954    pub fn impact_analysis(&self, entity_id: &str) -> Vec<&EntityInfo> {
955        self.impact_analysis_capped(entity_id, 10_000)
956    }
957
958    /// Depth-limited impact analysis. Returns transitive dependents with their BFS depth.
959    /// `max_depth == 0` means unlimited. Default depth of 2 covers direct + one transitive level.
960    pub fn impact_analysis_bounded(&self, entity_id: &str, max_depth: usize) -> Vec<(&EntityInfo, usize)> {
961        let mut visited: HashSet<&str> = HashSet::new();
962        let mut queue: std::collections::VecDeque<(&str, usize)> = std::collections::VecDeque::new();
963        let mut result = Vec::new();
964
965        let start_key = match self.entities.get_key_value(entity_id) {
966            Some((k, _)) => k.as_str(),
967            None => return result,
968        };
969
970        queue.push_back((start_key, 0));
971        visited.insert(start_key);
972
973        while let Some((current, depth)) = queue.pop_front() {
974            if let Some(deps) = self.dependents.get(current) {
975                let next_depth = depth + 1;
976                if max_depth > 0 && next_depth > max_depth {
977                    continue;
978                }
979                for dep in deps {
980                    if visited.insert(dep.as_str()) {
981                        if let Some(info) = self.entities.get(dep.as_str()) {
982                            result.push((info, next_depth));
983                        }
984                        queue.push_back((dep.as_str(), next_depth));
985                    }
986                }
987            }
988        }
989
990        result
991    }
992
993    /// Impact analysis with a cap on maximum nodes visited.
994    /// Returns transitive dependents up to the cap. Uses borrowed strings.
995    pub fn impact_analysis_capped(&self, entity_id: &str, max_visited: usize) -> Vec<&EntityInfo> {
996        let mut visited: HashSet<&str> = HashSet::new();
997        let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
998        let mut result = Vec::new();
999
1000        let start_key = match self.entities.get_key_value(entity_id) {
1001            Some((k, _)) => k.as_str(),
1002            None => return result,
1003        };
1004
1005        queue.push_back(start_key);
1006        visited.insert(start_key);
1007
1008        while let Some(current) = queue.pop_front() {
1009            if result.len() >= max_visited {
1010                break;
1011            }
1012            if let Some(deps) = self.dependents.get(current) {
1013                for dep in deps {
1014                    if visited.insert(dep.as_str()) {
1015                        if let Some(info) = self.entities.get(dep.as_str()) {
1016                            result.push(info);
1017                        }
1018                        queue.push_back(dep.as_str());
1019                        if result.len() >= max_visited {
1020                            break;
1021                        }
1022                    }
1023                }
1024            }
1025        }
1026
1027        result
1028    }
1029
1030    /// Count transitive dependents without collecting them (faster for large graphs).
1031    /// Uses borrowed strings to avoid allocation overhead.
1032    pub fn impact_count(&self, entity_id: &str, max_count: usize) -> usize {
1033        let mut visited: HashSet<&str> = HashSet::new();
1034        let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
1035        let mut count = 0;
1036
1037        // We need entity_id to live long enough; look it up in our entities map
1038        let start_key = match self.entities.get_key_value(entity_id) {
1039            Some((k, _)) => k.as_str(),
1040            None => return 0,
1041        };
1042
1043        queue.push_back(start_key);
1044        visited.insert(start_key);
1045
1046        while let Some(current) = queue.pop_front() {
1047            if count >= max_count {
1048                break;
1049            }
1050            if let Some(deps) = self.dependents.get(current) {
1051                for dep in deps {
1052                    if visited.insert(dep.as_str()) {
1053                        count += 1;
1054                        queue.push_back(dep.as_str());
1055                        if count >= max_count {
1056                            break;
1057                        }
1058                    }
1059                }
1060            }
1061        }
1062
1063        count
1064    }
1065
1066    /// Filter entities to those that look like tests.
1067    /// Uses name heuristics, file path patterns, and content patterns.
1068    pub fn filter_test_entities(&self, entities: &[crate::model::entity::SemanticEntity]) -> HashSet<String> {
1069        let mut test_ids = HashSet::new();
1070        for entity in entities {
1071            if is_test_entity(entity) {
1072                test_ids.insert(entity.id.clone());
1073            }
1074        }
1075        test_ids
1076    }
1077
1078    /// Impact analysis filtered to test entities only.
1079    /// Returns transitive dependents that are test functions/methods.
1080    pub fn test_impact(
1081        &self,
1082        entity_id: &str,
1083        all_entities: &[crate::model::entity::SemanticEntity],
1084    ) -> Vec<&EntityInfo> {
1085        let test_ids = self.filter_test_entities(all_entities);
1086        let impact = self.impact_analysis(entity_id);
1087        impact
1088            .into_iter()
1089            .filter(|info| test_ids.contains(&info.id))
1090            .collect()
1091    }
1092
1093    /// Incrementally update the graph from a set of changed files.
1094    ///
1095    /// Instead of rebuilding the entire graph, this only re-extracts entities
1096    /// from changed files and re-resolves their references. This is faster
1097    /// than a full rebuild when only a few files changed.
1098    ///
1099    /// For each changed file:
1100    /// - Deleted: remove all entities from that file, prune edges
1101    /// - Added/Modified: remove old entities, extract new ones, rebuild references
1102    /// - Renamed: update file paths in entity info
1103    pub fn update_from_changes(
1104        &mut self,
1105        changed_files: &[FileChange],
1106        root: &Path,
1107        registry: &ParserRegistry,
1108    ) {
1109        let mut affected_files: HashSet<String> = HashSet::new();
1110        let mut new_entities: Vec<SemanticEntity> = Vec::new();
1111
1112        for change in changed_files {
1113            affected_files.insert(change.file_path.clone());
1114            if let Some(ref old_path) = change.old_file_path {
1115                affected_files.insert(old_path.clone());
1116            }
1117
1118            match change.status {
1119                FileStatus::Deleted => {
1120                    self.remove_entities_for_file(&change.file_path);
1121                }
1122                FileStatus::Renamed => {
1123                    // Update file paths for renamed files
1124                    if let Some(ref old_path) = change.old_file_path {
1125                        self.remove_entities_for_file(old_path);
1126                    }
1127                    // Extract entities from the new file
1128                    if let Some(entities) = self.extract_file_entities(
1129                        &change.file_path,
1130                        change.after_content.as_deref(),
1131                        root,
1132                        registry,
1133                    ) {
1134                        new_entities.extend(entities);
1135                    }
1136                }
1137                FileStatus::Added | FileStatus::Modified => {
1138                    // Remove old entities for this file
1139                    self.remove_entities_for_file(&change.file_path);
1140                    // Extract new entities
1141                    if let Some(entities) = self.extract_file_entities(
1142                        &change.file_path,
1143                        change.after_content.as_deref(),
1144                        root,
1145                        registry,
1146                    ) {
1147                        new_entities.extend(entities);
1148                    }
1149                }
1150            }
1151        }
1152
1153        // Add new entities to the entity map
1154        for entity in &new_entities {
1155            self.entities.insert(
1156                entity.id.clone(),
1157                EntityInfo {
1158                    id: entity.id.clone(),
1159                    name: entity.name.clone(),
1160                    entity_type: entity.entity_type.clone(),
1161                    file_path: entity.file_path.clone(),
1162                    parent_id: entity.parent_id.clone(),
1163                    start_line: entity.start_line,
1164                    end_line: entity.end_line,
1165                },
1166            );
1167        }
1168
1169        // Rebuild the global symbol table from all current entities
1170        let symbol_table = self.build_symbol_table();
1171
1172        // Re-resolve references for new entities
1173        for entity in &new_entities {
1174            self.resolve_entity_references(entity, &symbol_table);
1175        }
1176
1177        // Also re-resolve references for entities in OTHER files that might
1178        // reference entities in changed files (their targets may have changed)
1179        let changed_entity_names: HashSet<String> = new_entities
1180            .iter()
1181            .map(|e| e.name.clone())
1182            .collect();
1183
1184        // Find entities in unchanged files that reference any changed entity name
1185        let entities_to_recheck: Vec<String> = self
1186            .entities
1187            .values()
1188            .filter(|e| !affected_files.contains(&e.file_path))
1189            .filter(|e| {
1190                self.dependencies
1191                    .get(&e.id)
1192                    .map_or(false, |deps| {
1193                        deps.iter().any(|dep_id| {
1194                            self.entities
1195                                .get(dep_id)
1196                                .map_or(false, |dep| changed_entity_names.contains(&dep.name))
1197                        })
1198                    })
1199            })
1200            .map(|e| e.id.clone())
1201            .collect();
1202
1203        // We don't have the full SemanticEntity for unchanged files, so we skip
1204        // deep re-resolution here. The forward/reverse indexes are already updated
1205        // by remove_entities_for_file and resolve_entity_references.
1206        // For entities that had dangling references (their target was deleted),
1207        // the edges were already pruned.
1208        let _ = entities_to_recheck; // acknowledge but don't act on for now
1209    }
1210
1211    /// Extract entities from a file, using provided content or reading from disk.
1212    fn extract_file_entities(
1213        &self,
1214        file_path: &str,
1215        content: Option<&str>,
1216        root: &Path,
1217        registry: &ParserRegistry,
1218    ) -> Option<Vec<SemanticEntity>> {
1219        let content = if let Some(c) = content {
1220            c.to_string()
1221        } else {
1222            let full_path = root.join(file_path);
1223            std::fs::read_to_string(&full_path).ok()?
1224        };
1225
1226        Some(registry.extract_entities(file_path, &content))
1227    }
1228
1229    /// Remove all entities belonging to a specific file and prune their edges.
1230    fn remove_entities_for_file(&mut self, file_path: &str) {
1231        // Collect entity IDs to remove
1232        let ids_to_remove: Vec<String> = self
1233            .entities
1234            .values()
1235            .filter(|e| e.file_path == file_path)
1236            .map(|e| e.id.clone())
1237            .collect();
1238
1239        let id_set: HashSet<&str> = ids_to_remove.iter().map(|s| s.as_str()).collect();
1240
1241        // Remove from entity map
1242        for id in &ids_to_remove {
1243            self.entities.remove(id);
1244        }
1245
1246        // Remove edges involving these entities
1247        self.edges
1248            .retain(|e| !id_set.contains(e.from_entity.as_str()) && !id_set.contains(e.to_entity.as_str()));
1249
1250        // Clean up dependency/dependent indexes
1251        for id in &ids_to_remove {
1252            // Remove forward deps
1253            if let Some(deps) = self.dependencies.remove(id) {
1254                // Also remove from reverse index
1255                for dep in &deps {
1256                    if let Some(dependents) = self.dependents.get_mut(dep) {
1257                        dependents.retain(|d| d != id);
1258                    }
1259                }
1260            }
1261            // Remove reverse deps
1262            if let Some(deps) = self.dependents.remove(id) {
1263                // Also remove from forward index
1264                for dep in &deps {
1265                    if let Some(dependencies) = self.dependencies.get_mut(dep) {
1266                        dependencies.retain(|d| d != id);
1267                    }
1268                }
1269            }
1270        }
1271    }
1272
1273    /// Build a symbol table from all current entities.
1274    fn build_symbol_table(&self) -> HashMap<String, Vec<String>> {
1275        let mut symbol_table: HashMap<String, Vec<String>> = HashMap::new();
1276        for entity in self.entities.values() {
1277            symbol_table
1278                .entry(entity.name.clone())
1279                .or_default()
1280                .push(entity.id.clone());
1281        }
1282        symbol_table
1283    }
1284
1285    /// Resolve references for a single entity against the symbol table.
1286    fn resolve_entity_references(
1287        &mut self,
1288        entity: &SemanticEntity,
1289        symbol_table: &HashMap<String, Vec<String>>,
1290    ) {
1291        let refs = extract_references_from_content(&entity.content, &entity.name);
1292
1293        for ref_name in refs {
1294            if let Some(target_ids) = symbol_table.get(ref_name) {
1295                let target = target_ids
1296                    .iter()
1297                    .find(|id| {
1298                        *id != &entity.id
1299                            && self
1300                                .entities
1301                                .get(*id)
1302                                .map_or(false, |e| e.file_path == entity.file_path)
1303                    })
1304                    .or_else(|| target_ids.iter().find(|id| *id != &entity.id));
1305
1306                if let Some(target_id) = target {
1307                    let ref_type = infer_ref_type(&entity.content, &ref_name);
1308                    self.edges.push(EntityRef {
1309                        from_entity: entity.id.clone(),
1310                        to_entity: target_id.clone(),
1311                        ref_type,
1312                    });
1313                    self.dependents
1314                        .entry(target_id.clone())
1315                        .or_default()
1316                        .push(entity.id.clone());
1317                    self.dependencies
1318                        .entry(entity.id.clone())
1319                        .or_default()
1320                        .push(target_id.clone());
1321                }
1322            }
1323        }
1324    }
1325}
1326
1327/// Check if an entity looks like a test based on name, file path, and content patterns.
1328fn is_test_entity(entity: &crate::model::entity::SemanticEntity) -> bool {
1329    let name = &entity.name;
1330    let path = &entity.file_path;
1331    let content = &entity.content;
1332
1333    // Name patterns
1334    if name.starts_with("test_") || name.starts_with("Test") || name.ends_with("_test") || name.ends_with("Test") {
1335        return true;
1336    }
1337    if name.starts_with("it_") || name.starts_with("describe_") || name.starts_with("spec_") {
1338        return true;
1339    }
1340
1341    // File path patterns
1342    let path_lower = path.to_lowercase();
1343    let in_test_file = path_lower.contains("/test/")
1344        || path_lower.contains("/tests/")
1345        || path_lower.contains("/spec/")
1346        || path_lower.contains("_test.")
1347        || path_lower.contains(".test.")
1348        || path_lower.contains("_spec.")
1349        || path_lower.contains(".spec.");
1350
1351    // Content patterns (test annotations/decorators)
1352    let has_test_marker = content.contains("#[test]")
1353        || content.contains("#[cfg(test)]")
1354        || content.contains("@Test")
1355        || content.contains("@pytest")
1356        || content.contains("@test")
1357        || content.contains("describe(")
1358        || content.contains("it(")
1359        || content.contains("test(");
1360
1361    in_test_file && has_test_marker
1362}
1363
1364/// Build import table: maps (file_path, imported_name) → target entity ID.
1365///
1366/// Parses `from X import Y` / `import X` / `use X` style statements from entity content
1367/// and resolves Y to the entity it refers to in the symbol table.
1368fn build_import_table(
1369    root: &Path,
1370    file_paths: &[String],
1371    symbol_table: &HashMap<String, Vec<String>>,
1372    entity_map: &HashMap<String, EntityInfo>,
1373    pre_parsed_content: Option<&[(String, String, tree_sitter::Tree)]>,
1374) -> HashMap<(String, String), String> {
1375    // Build a content lookup from pre-parsed files to avoid re-reading from disk
1376    let content_map: HashMap<&str, &str> = pre_parsed_content
1377        .map(|files| {
1378            files.iter().map(|(fp, content, _)| (fp.as_str(), content.as_str())).collect()
1379        })
1380        .unwrap_or_default();
1381
1382    // Go imports are handled entirely by the scope resolver (which uses an indexed approach).
1383    // We no longer need a go_pkg_index here since Go files are skipped below.
1384
1385    // Process files in parallel, each producing local import entries
1386    let per_file_imports: Vec<Vec<((String, String), String)>> = maybe_par_iter!(file_paths)
1387        .filter_map(|file_path| {
1388            // Go imports are handled entirely by the scope resolver — skip here
1389            if file_path.ends_with(".go") {
1390                return None;
1391            }
1392
1393            // Use pre-parsed content if available, otherwise read from disk
1394            let owned_content: Option<String>;
1395            let content: &str = if let Some(c) = content_map.get(file_path.as_str()) {
1396                c
1397            } else {
1398                let full_path = root.join(file_path);
1399                owned_content = std::fs::read_to_string(&full_path).ok();
1400                match owned_content.as_deref() {
1401                    Some(c) => c,
1402                    None => return None,
1403                }
1404            };
1405
1406            let mut local_imports: Vec<((String, String), String)> = Vec::new();
1407
1408            // Join multi-line imports into single logical lines
1409            // e.g. "from .cookies import (\n    foo,\n    bar,\n)" -> "from .cookies import foo, bar"
1410            let mut logical_lines: Vec<String> = Vec::new();
1411            let mut current_line = String::new();
1412            let mut in_parens = false;
1413
1414            for line in content.lines() {
1415                let trimmed = line.trim();
1416                if in_parens {
1417                    // Strip parentheses and comments
1418                    let clean = trimmed.trim_end_matches(|c: char| c == ')' || c == ',');
1419                    let clean = clean.split('#').next().unwrap_or(clean).trim();
1420                    if !clean.is_empty() && clean != "(" {
1421                        current_line.push_str(", ");
1422                        current_line.push_str(clean);
1423                    }
1424                    if trimmed.contains(')') {
1425                        in_parens = false;
1426                        logical_lines.push(std::mem::take(&mut current_line));
1427                    }
1428                } else if trimmed.starts_with("from ") && trimmed.contains(" import ") {
1429                    if trimmed.contains('(') && !trimmed.contains(')') {
1430                        // Multi-line import starts
1431                        in_parens = true;
1432                        // Take everything before the paren
1433                        let before_paren = trimmed.split('(').next().unwrap_or(trimmed);
1434                        current_line = before_paren.trim().to_string();
1435                        // Also grab anything after the paren on this line
1436                        if let Some(after) = trimmed.split('(').nth(1) {
1437                            let after = after.trim().trim_end_matches(')').trim();
1438                            if !after.is_empty() {
1439                                current_line.push(' ');
1440                                current_line.push_str(after);
1441                            }
1442                        }
1443                    } else {
1444                        logical_lines.push(trimmed.to_string());
1445                    }
1446                }
1447            }
1448
1449            for logical_line in &logical_lines {
1450                if let Some(rest) = logical_line.strip_prefix("from ") {
1451                    // Find " import " or " import," (multi-line imports join with comma)
1452                    let import_match = rest.find(" import ")
1453                        .map(|pos| (pos, 8))
1454                        .or_else(|| rest.find(" import,").map(|pos| (pos, 8)));
1455                    if let Some((import_pos, skip)) = import_match {
1456                        let module_path = &rest[..import_pos];
1457                        let names_str = &rest[import_pos + skip..];
1458
1459                        for name_part in names_str.split(',') {
1460                            let name_part = name_part.trim();
1461                            let imported_name = name_part.split_whitespace().next().unwrap_or(name_part);
1462                            // Strip trailing parens/punctuation
1463                            let imported_name = imported_name.trim_matches(|c: char| c == '(' || c == ')' || c == ',');
1464                            if imported_name.is_empty() {
1465                                continue;
1466                            }
1467
1468                            if let Some(target_ids) = symbol_table.get(imported_name) {
1469                                let target = find_import_target(
1470                                    target_ids,
1471                                    module_path,
1472                                    file_path,
1473                                    &[".py"],
1474                                    entity_map,
1475                                );
1476                                if let Some(target_id) = target {
1477                                    local_imports.push((
1478                                        (file_path.clone(), imported_name.to_string()),
1479                                        target_id.clone(),
1480                                    ));
1481                                }
1482                            }
1483                        }
1484                    }
1485                }
1486            }
1487
1488            // JS/TS imports: import { foo, bar as baz } from './module'
1489            //                import Foo from './module'
1490            let is_js_ts = file_path.ends_with(".js") || file_path.ends_with(".ts")
1491                || file_path.ends_with(".jsx") || file_path.ends_with(".tsx");
1492
1493            if is_js_ts {
1494                static JS_NAMED_RE: LazyLock<Regex> = LazyLock::new(|| {
1495                    Regex::new(r#"import\s*\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#).unwrap()
1496                });
1497                static JS_DEFAULT_RE: LazyLock<Regex> = LazyLock::new(|| {
1498                    Regex::new(r#"import\s+(?:type\s+)?([A-Za-z_]\w*)\s+from\s*['"]([^'"]+)['"]"#).unwrap()
1499                });
1500
1501                for cap in JS_NAMED_RE.captures_iter(content) {
1502                    let names_str = cap.get(1).unwrap().as_str();
1503                    let module_path = cap.get(2).unwrap().as_str();
1504
1505                    for name_part in names_str.split(',') {
1506                        let name_part = name_part.trim();
1507                        if name_part.is_empty() { continue; }
1508
1509                        // Handle "foo as bar" aliases and "type foo" prefixes
1510                        let (original_name, local_name) = if let Some(pos) = name_part.find(" as ") {
1511                            let orig = name_part[..pos].trim();
1512                            let local = name_part[pos + 4..].trim();
1513                            let orig = orig.strip_prefix("type ").unwrap_or(orig);
1514                            (orig, local)
1515                        } else {
1516                            let name = name_part.strip_prefix("type ").unwrap_or(name_part);
1517                            (name, name)
1518                        };
1519
1520                        if original_name.is_empty() || local_name.is_empty() { continue; }
1521
1522                        if let Some(target_ids) = symbol_table.get(original_name) {
1523                            let target = find_import_target(
1524                                target_ids,
1525                                module_path,
1526                                file_path,
1527                                &[".ts", ".tsx", ".js", ".jsx"],
1528                                entity_map,
1529                            );
1530                            if let Some(target_id) = target {
1531                                local_imports.push((
1532                                    (file_path.clone(), local_name.to_string()),
1533                                    target_id.clone(),
1534                                ));
1535                            }
1536                        }
1537                    }
1538                }
1539
1540                for cap in JS_DEFAULT_RE.captures_iter(content) {
1541                    let local_name = cap.get(1).unwrap().as_str();
1542                    let module_path = cap.get(2).unwrap().as_str();
1543
1544                    if let Some(target_ids) = symbol_table.get(local_name) {
1545                        let target = find_import_target(
1546                            target_ids,
1547                            module_path,
1548                            file_path,
1549                            &[".ts", ".tsx", ".js", ".jsx"],
1550                            entity_map,
1551                        );
1552                        if let Some(target_id) = target {
1553                            local_imports.push((
1554                                (file_path.clone(), local_name.to_string()),
1555                                target_id.clone(),
1556                            ));
1557                        }
1558                    }
1559                }
1560            }
1561
1562            // Rust imports: use crate::module::Name; / use crate::module::{A, B};
1563            // Also: use super::module::Name; / use self::module::Name;
1564            let is_rust = file_path.ends_with(".rs");
1565            if is_rust {
1566                static RUST_USE_SIMPLE_RE: LazyLock<Regex> = LazyLock::new(|| {
1567                    // use crate::config::Config;
1568                    // use super::types::Entity;
1569                    // use config::Config;  (bare module path in binary crates)
1570                    Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*;").unwrap()
1571                });
1572                static RUST_USE_GROUP_RE: LazyLock<Regex> = LazyLock::new(|| {
1573                    // use crate::types::{Entity, ParseError};
1574                    // use types::{Entity, ParseError};  (bare module path)
1575                    Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)::\{([^}]+)\}\s*;").unwrap()
1576                });
1577
1578                // Use a local import table for Rust alias resolution
1579                let mut local_import_table: HashMap<(String, String), String> = HashMap::new();
1580
1581                // Build a map: module_name -> list of file paths whose stem matches
1582                // For "use crate::config::Config", module is "config", name is "Config"
1583                for cap in RUST_USE_SIMPLE_RE.captures_iter(content) {
1584                    let full_path_str = cap.get(1).unwrap().as_str();
1585                    let parts: Vec<&str> = full_path_str.split("::").collect();
1586                    if parts.is_empty() { continue; }
1587
1588                    // Last part is the imported name, everything before is the module path
1589                    let imported_name = parts[parts.len() - 1];
1590                    // The module is the second-to-last part, or the first if only one part
1591                    let source_module = if parts.len() >= 2 {
1592                        parts[parts.len() - 2]
1593                    } else {
1594                        parts[0]
1595                    };
1596
1597                    resolve_rust_import(
1598                        file_path, imported_name, source_module,
1599                        symbol_table, entity_map, &mut local_import_table,
1600                    );
1601                }
1602
1603                for cap in RUST_USE_GROUP_RE.captures_iter(content) {
1604                    let module_path = cap.get(1).unwrap().as_str();
1605                    let names_str = cap.get(2).unwrap().as_str();
1606
1607                    // source_module is the last segment of the module path
1608                    let source_module = module_path.rsplit("::").next().unwrap_or(module_path);
1609
1610                    for name_part in names_str.split(',') {
1611                        let name_part = name_part.trim();
1612                        // Handle "Name as Alias"
1613                        let (original, local) = if let Some(pos) = name_part.find(" as ") {
1614                            (&name_part[..pos], name_part[pos + 4..].trim())
1615                        } else {
1616                            (name_part, name_part)
1617                        };
1618                        let original = original.trim();
1619                        let local = local.trim();
1620                        if original.is_empty() || local.is_empty() { continue; }
1621
1622                        resolve_rust_import(
1623                            file_path, original, source_module,
1624                            symbol_table, entity_map, &mut local_import_table,
1625                        );
1626                        // If aliased, also map the local name
1627                        if local != original {
1628                            if let Some(target) = local_import_table.get(&(file_path.clone(), original.to_string())).cloned() {
1629                                local_import_table.insert(
1630                                    (file_path.clone(), local.to_string()),
1631                                    target,
1632                                );
1633                            }
1634                        }
1635                    }
1636                }
1637
1638                // Collect all Rust imports into local_imports
1639                for (key, val) in local_import_table {
1640                    local_imports.push((key, val));
1641                }
1642            }
1643
1644            // Go imports are handled by the scope resolver (avoids O(n²) import table explosion).
1645            // Skip Go files here entirely.
1646
1647            Some(local_imports)
1648        })
1649        .collect();
1650
1651    // Merge all per-file imports into a single table
1652    let mut import_table: HashMap<(String, String), String> = HashMap::new();
1653    for local_imports in per_file_imports {
1654        for (key, val) in local_imports {
1655            import_table.insert(key, val);
1656        }
1657    }
1658
1659    import_table
1660}
1661
1662/// Resolve a Rust import: find the target entity in the symbol table
1663/// by matching the imported name against entities in files whose stem matches source_module.
1664fn resolve_rust_import(
1665    file_path: &str,
1666    imported_name: &str,
1667    source_module: &str,
1668    symbol_table: &HashMap<String, Vec<String>>,
1669    entity_map: &HashMap<String, EntityInfo>,
1670    import_table: &mut HashMap<(String, String), String>,
1671) {
1672    if let Some(target_ids) = symbol_table.get(imported_name) {
1673        let target = target_ids.iter().find(|id| {
1674            entity_map.get(*id).map_or(false, |e| {
1675                let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1676                let stem = strip_file_ext(stem);
1677                stem == source_module
1678            })
1679        });
1680        if let Some(target_id) = target {
1681            import_table.insert(
1682                (file_path.to_string(), imported_name.to_string()),
1683                target_id.clone(),
1684            );
1685        }
1686    }
1687}
1688
1689/// Strip common file extensions from a filename.
1690fn strip_file_ext(s: &str) -> &str {
1691    s.strip_suffix(".py")
1692        .or_else(|| s.strip_suffix(".ts"))
1693        .or_else(|| s.strip_suffix(".js"))
1694        .or_else(|| s.strip_suffix(".tsx"))
1695        .or_else(|| s.strip_suffix(".jsx"))
1696        .or_else(|| s.strip_suffix(".rs"))
1697        .unwrap_or(s)
1698}
1699
1700/// Strip comments and string literals from content to avoid false references.
1701/// Returns a new string with comments/docstrings replaced by spaces.
1702fn strip_comments_and_strings(content: &str) -> String {
1703    let bytes = content.as_bytes();
1704    let len = bytes.len();
1705    let mut result = vec![b' '; len];
1706    let mut i = 0;
1707
1708    while i < len {
1709        // Triple-quoted strings (Python docstrings)
1710        if i + 2 < len && bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1711            i += 3;
1712            while i + 2 < len {
1713                if bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1714                    i += 3;
1715                    break;
1716                }
1717                i += 1;
1718            }
1719            continue;
1720        }
1721        if i + 2 < len && bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1722            i += 3;
1723            while i + 2 < len {
1724                if bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1725                    i += 3;
1726                    break;
1727                }
1728                i += 1;
1729            }
1730            continue;
1731        }
1732        // Double-quoted strings
1733        if bytes[i] == b'"' {
1734            i += 1;
1735            while i < len {
1736                if bytes[i] == b'\\' { i += 2; continue; }
1737                if bytes[i] == b'"' { i += 1; break; }
1738                i += 1;
1739            }
1740            continue;
1741        }
1742        // Single-quoted strings
1743        if bytes[i] == b'\'' {
1744            i += 1;
1745            while i < len {
1746                if bytes[i] == b'\\' { i += 2; continue; }
1747                if bytes[i] == b'\'' { i += 1; break; }
1748                i += 1;
1749            }
1750            continue;
1751        }
1752        // Python/Ruby single-line comments
1753        if bytes[i] == b'#' {
1754            while i < len && bytes[i] != b'\n' { i += 1; }
1755            continue;
1756        }
1757        // C-style single-line comments
1758        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'/' {
1759            while i < len && bytes[i] != b'\n' { i += 1; }
1760            continue;
1761        }
1762        // C-style block comments
1763        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
1764            i += 2;
1765            while i + 1 < len {
1766                if bytes[i] == b'*' && bytes[i + 1] == b'/' { i += 2; break; }
1767                i += 1;
1768            }
1769            continue;
1770        }
1771        // Regular code: copy through
1772        result[i] = bytes[i];
1773        i += 1;
1774    }
1775
1776    String::from_utf8_lossy(&result).into_owned()
1777}
1778
1779/// Extract dot-chains (receiver.member) from content for precise resolution.
1780/// Returns unique (receiver, member) pairs found in the content.
1781fn extract_dot_chains<'a>(content: &'a str) -> Vec<(&'a str, &'a str)> {
1782    static DOT_CHAIN_RE: LazyLock<Regex> = LazyLock::new(|| {
1783        Regex::new(r"\b([A-Za-z_]\w*)\.([A-Za-z_]\w*)").unwrap()
1784    });
1785
1786    let mut chains = Vec::new();
1787    let mut seen: HashSet<(&str, &str)> = HashSet::new();
1788    for cap in DOT_CHAIN_RE.captures_iter(content) {
1789        let receiver = cap.get(1).unwrap().as_str();
1790        let member = cap.get(2).unwrap().as_str();
1791        if seen.insert((receiver, member)) {
1792            chains.push((receiver, member));
1793        }
1794    }
1795    chains
1796}
1797
1798/// Extract identifier references from entity content using simple token analysis.
1799/// Strips comments and strings first to avoid false positives from docstrings.
1800/// Returns borrowed slices from the stripped content.
1801fn extract_references_from_content<'a>(content: &'a str, own_name: &str) -> Vec<&'a str> {
1802    let stripped = strip_comments_and_strings(content);
1803    extract_references_with_stripped(content, own_name, &stripped)
1804}
1805
1806/// Extract references using a pre-stripped version of the content.
1807/// Use this when you already have the stripped content (e.g. from dot-chain extraction)
1808/// to avoid stripping comments/strings twice.
1809fn extract_references_with_stripped<'a>(content: &'a str, own_name: &str, stripped: &str) -> Vec<&'a str> {
1810    let stripped_words: HashSet<&str> = stripped
1811        .split(|c: char| !c.is_alphanumeric() && c != '_')
1812        .filter(|w| !w.is_empty())
1813        .collect();
1814
1815    let mut refs = Vec::new();
1816    let mut seen: HashSet<&str> = HashSet::new();
1817
1818    for word in content.split(|c: char| !c.is_alphanumeric() && c != '_') {
1819        if word.is_empty() || word == own_name {
1820            continue;
1821        }
1822        if is_keyword(word) || word.len() < 2 {
1823            continue;
1824        }
1825        // Skip very short lowercase identifiers (likely local vars: i, x, a, ok, id, etc.)
1826        if word.starts_with(|c: char| c.is_lowercase()) && word.len() < 3 {
1827            continue;
1828        }
1829        if !word.starts_with(|c: char| c.is_alphabetic() || c == '_') {
1830            continue;
1831        }
1832        // Skip common local variable names that create false graph edges
1833        if is_common_local_name(word) {
1834            continue;
1835        }
1836        // Skip words that only appear in comments/strings
1837        if !stripped_words.contains(word) {
1838            continue;
1839        }
1840        if seen.insert(word) {
1841            refs.push(word);
1842        }
1843    }
1844
1845    refs
1846}
1847
1848static COMMON_LOCAL_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1849    [
1850        "result", "results", "data", "config", "value", "values",
1851        "item", "items", "input", "output", "args", "opts",
1852        "name", "path", "file", "line", "count", "index",
1853        "temp", "prev", "next", "curr", "current", "node",
1854        "left", "right", "root", "head", "tail", "body",
1855        "text", "content", "source", "target", "entry",
1856        "error", "errors", "message", "response", "request",
1857        "context", "state", "props", "event", "handler",
1858        "callback", "options", "params", "query", "list",
1859        "base", "info", "meta", "kind", "mode", "flag",
1860        "size", "length", "width", "height", "start", "stop",
1861        "begin", "done", "found", "status", "code",
1862    ].into_iter().collect()
1863});
1864
1865/// Names that are overwhelmingly local variables, not entity references.
1866/// These create massive false-positive edges in the dependency graph.
1867fn is_common_local_name(word: &str) -> bool {
1868    COMMON_LOCAL_NAMES.contains(word)
1869}
1870
1871/// Infer reference type from context using word-boundary-aware matching.
1872fn infer_ref_type(content: &str, ref_name: &str) -> RefType {
1873    // Check if it's a function call: ref_name followed by ( with word boundary before.
1874    // Avoids format! allocation by finding ref_name and checking the next char.
1875    let bytes = content.as_bytes();
1876    let name_bytes = ref_name.as_bytes();
1877    let mut search_start = 0;
1878    while let Some(rel_pos) = content[search_start..].find(ref_name) {
1879        let pos = search_start + rel_pos;
1880        let after = pos + name_bytes.len();
1881        // Check next char is '('
1882        if after < bytes.len() && bytes[after] == b'(' {
1883            // Verify word boundary before
1884            let is_boundary = pos == 0 || {
1885                let prev = bytes[pos - 1];
1886                !prev.is_ascii_alphanumeric() && prev != b'_'
1887            };
1888            if is_boundary {
1889                return RefType::Calls;
1890            }
1891        }
1892        // Advance past pos to the next char boundary to avoid slicing inside a multi-byte UTF-8 char.
1893        search_start = pos + 1;
1894        while search_start < content.len() && !content.is_char_boundary(search_start) {
1895            search_start += 1;
1896        }
1897    }
1898
1899    // Check if it's in an import/use statement (line-level, not substring)
1900    for line in content.lines() {
1901        let trimmed = line.trim();
1902        if (trimmed.starts_with("import ") || trimmed.starts_with("use ")
1903            || trimmed.starts_with("from ") || trimmed.starts_with("require("))
1904            && trimmed.contains(ref_name)
1905        {
1906            return RefType::Imports;
1907        }
1908    }
1909
1910    // Default to type reference
1911    RefType::TypeRef
1912}
1913
1914static KEYWORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1915    [
1916        // Common across languages
1917        "if", "else", "for", "while", "do", "switch", "case", "break",
1918        "continue", "return", "try", "catch", "finally", "throw",
1919        "new", "delete", "typeof", "instanceof", "in", "of",
1920        "true", "false", "null", "undefined", "void", "this",
1921        "super", "class", "extends", "implements", "interface",
1922        "enum", "const", "let", "var", "function", "async",
1923        "await", "yield", "import", "export", "default", "from",
1924        "as", "static", "public", "private", "protected",
1925        "abstract", "final", "override",
1926        // Rust
1927        "fn", "pub", "mod", "use", "struct", "impl", "trait",
1928        "where", "type", "self", "Self", "mut", "ref", "match",
1929        "loop", "move", "unsafe", "extern", "crate", "dyn",
1930        // Python
1931        "def", "elif", "except", "raise", "with",
1932        "pass", "lambda", "nonlocal", "global", "assert",
1933        "True", "False", "and", "or", "not", "is",
1934        // Go
1935        "func", "package", "range", "select", "chan", "go",
1936        "defer", "map", "make", "append", "len", "cap",
1937        // C/C++
1938        "auto", "register", "volatile", "sizeof", "typedef",
1939        "template", "typename", "namespace", "virtual", "inline",
1940        "constexpr", "nullptr", "noexcept", "explicit", "friend",
1941        "operator", "using", "cout", "endl", "cerr", "cin",
1942        "printf", "scanf", "malloc", "free", "NULL", "include",
1943        "ifdef", "ifndef", "endif", "define", "pragma",
1944        // Ruby
1945        "end", "then", "elsif", "unless", "until",
1946        "begin", "rescue", "ensure", "when", "require",
1947        "attr_accessor", "attr_reader", "attr_writer",
1948        "puts", "nil", "module", "defined",
1949        // C#
1950        "internal", "sealed", "readonly",
1951        "partial", "delegate", "event", "params", "out",
1952        "object", "decimal", "sbyte", "ushort", "uint",
1953        "ulong", "nint", "nuint", "dynamic",
1954        "get", "set", "value", "init", "record",
1955        // Types (primitives)
1956        "string", "number", "boolean", "int", "float", "double",
1957        "bool", "char", "byte", "i8", "i16", "i32", "i64",
1958        "u8", "u16", "u32", "u64", "f32", "f64", "usize",
1959        "isize", "str", "String", "Vec", "Option", "Result",
1960        "Box", "Arc", "Rc", "HashMap", "HashSet", "Some",
1961        "Ok", "Err",
1962    ].into_iter().collect()
1963});
1964
1965fn is_keyword(word: &str) -> bool {
1966    KEYWORDS.contains(word)
1967}
1968
1969#[cfg(test)]
1970mod tests {
1971    use super::*;
1972    use crate::git::types::{FileChange, FileStatus};
1973    use std::io::Write;
1974    use tempfile::TempDir;
1975
1976    fn create_test_repo() -> (TempDir, ParserRegistry) {
1977        let dir = TempDir::new().unwrap();
1978        let registry = crate::parser::plugins::create_default_registry();
1979        (dir, registry)
1980    }
1981
1982    fn write_file(dir: &Path, name: &str, content: &str) {
1983        let path = dir.join(name);
1984        if let Some(parent) = path.parent() {
1985            std::fs::create_dir_all(parent).unwrap();
1986        }
1987        let mut f = std::fs::File::create(path).unwrap();
1988        f.write_all(content.as_bytes()).unwrap();
1989    }
1990
1991    #[test]
1992    fn test_incremental_add_file() {
1993        let (dir, registry) = create_test_repo();
1994        let root = dir.path();
1995
1996        // Start with one file
1997        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1998        write_file(root, "b.ts", "export function bar() { return 1; }\n");
1999
2000        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
2001        assert_eq!(graph.entities.len(), 2);
2002
2003        // Add a new file
2004        write_file(root, "c.ts", "export function baz() { return foo(); }\n");
2005        graph.update_from_changes(
2006            &[FileChange {
2007                file_path: "c.ts".into(),
2008                status: FileStatus::Added,
2009                old_file_path: None,
2010                before_content: None,
2011                after_content: None, // will read from disk
2012            }],
2013            root,
2014            &registry,
2015        );
2016
2017        assert_eq!(graph.entities.len(), 3);
2018        assert!(graph.entities.contains_key("c.ts::function::baz"));
2019        // baz references foo
2020        let baz_deps = graph.get_dependencies("c.ts::function::baz");
2021        assert!(
2022            baz_deps.iter().any(|d| d.name == "foo"),
2023            "baz should depend on foo. Deps: {:?}",
2024            baz_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2025        );
2026    }
2027
2028    #[test]
2029    fn test_incremental_delete_file() {
2030        let (dir, registry) = create_test_repo();
2031        let root = dir.path();
2032
2033        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
2034        write_file(root, "b.ts", "export function bar() { return 1; }\n");
2035
2036        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
2037        assert_eq!(graph.entities.len(), 2);
2038
2039        // Delete b.ts
2040        graph.update_from_changes(
2041            &[FileChange {
2042                file_path: "b.ts".into(),
2043                status: FileStatus::Deleted,
2044                old_file_path: None,
2045                before_content: None,
2046                after_content: None,
2047            }],
2048            root,
2049            &registry,
2050        );
2051
2052        assert_eq!(graph.entities.len(), 1);
2053        assert!(!graph.entities.contains_key("b.ts::function::bar"));
2054        // foo's dependency on bar should be pruned
2055        let foo_deps = graph.get_dependencies("a.ts::function::foo");
2056        assert!(
2057            foo_deps.is_empty(),
2058            "foo's deps should be empty after bar deleted. Deps: {:?}",
2059            foo_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2060        );
2061    }
2062
2063    #[test]
2064    fn test_incremental_modify_file() {
2065        let (dir, registry) = create_test_repo();
2066        let root = dir.path();
2067
2068        write_file(root, "a.ts", "export function foo() { return bar(); }\n");
2069        write_file(root, "b.ts", "export function bar() { return 1; }\nexport function baz() { return 2; }\n");
2070
2071        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], &registry);
2072        assert_eq!(graph.entities.len(), 3);
2073
2074        // Modify a.ts to call baz instead of bar
2075        write_file(root, "a.ts", "export function foo() { return baz(); }\n");
2076        graph.update_from_changes(
2077            &[FileChange {
2078                file_path: "a.ts".into(),
2079                status: FileStatus::Modified,
2080                old_file_path: None,
2081                before_content: None,
2082                after_content: None,
2083            }],
2084            root,
2085            &registry,
2086        );
2087
2088        assert_eq!(graph.entities.len(), 3);
2089        // foo should now depend on baz, not bar
2090        let foo_deps = graph.get_dependencies("a.ts::function::foo");
2091        let dep_names: Vec<&str> = foo_deps.iter().map(|d| d.name.as_str()).collect();
2092        assert!(dep_names.contains(&"baz"), "foo should depend on baz after modification. Deps: {:?}", dep_names);
2093        assert!(!dep_names.contains(&"bar"), "foo should no longer depend on bar. Deps: {:?}", dep_names);
2094    }
2095
2096    #[test]
2097    fn test_incremental_with_content() {
2098        let (dir, registry) = create_test_repo();
2099        let root = dir.path();
2100
2101        write_file(root, "a.ts", "export function foo() { return 1; }\n");
2102        let (mut graph, _) = EntityGraph::build(root, &["a.ts".into()], &registry);
2103        assert_eq!(graph.entities.len(), 1);
2104
2105        // Add file with content provided directly (no disk read needed)
2106        graph.update_from_changes(
2107            &[FileChange {
2108                file_path: "b.ts".into(),
2109                status: FileStatus::Added,
2110                old_file_path: None,
2111                before_content: None,
2112                after_content: Some("export function bar() { return foo(); }\n".into()),
2113            }],
2114            root,
2115            &registry,
2116        );
2117
2118        assert_eq!(graph.entities.len(), 2);
2119        let bar_deps = graph.get_dependencies("b.ts::function::bar");
2120        assert!(bar_deps.iter().any(|d| d.name == "foo"));
2121    }
2122
2123    #[cfg(feature = "lang-go")]
2124    #[test]
2125    fn test_go_method_parent_resolves_across_files_in_graph() {
2126        let (dir, registry) = create_test_repo();
2127        let root = dir.path();
2128
2129        write_file(root, "models.go", "package demo\n\ntype Service struct{}\n");
2130        write_file(
2131            root,
2132            "methods.go",
2133            "package demo\n\nfunc (s *Service) Run() {}\n",
2134        );
2135
2136        let (graph, entities) =
2137            EntityGraph::build(root, &["models.go".into(), "methods.go".into()], &registry);
2138        let service = graph
2139            .entities
2140            .get("models.go::type::Service")
2141            .expect("Service type should be in the graph");
2142        let run = entities
2143            .iter()
2144            .find(|e| e.name == "Run" && e.file_path == "methods.go")
2145            .expect("Run method should be extracted");
2146
2147        assert_eq!(run.parent_id.as_deref(), Some(service.id.as_str()));
2148        assert!(graph.entities.contains_key("models.go::type::Service::Run"));
2149    }
2150
2151    #[cfg(feature = "lang-go")]
2152    #[test]
2153    fn test_incremental_go_parent_repair_handles_clean_cached_method() {
2154        let (dir, registry) = create_test_repo();
2155        let root = dir.path();
2156        let models = "package demo\n\ntype Service struct{}\n";
2157        let methods = "package demo\n\nfunc (s *Service) Run() {}\n";
2158
2159        write_file(root, "models.go", models);
2160        write_file(root, "methods.go", methods);
2161
2162        let cached_entities = registry.extract_entities("methods.go", methods);
2163        let cached_run = cached_entities
2164            .iter()
2165            .find(|e| e.name == "Run")
2166            .expect("cached Run method should be extracted");
2167        assert_eq!(
2168            cached_run.parent_id.as_deref(),
2169            Some("methods.go::type::Service")
2170        );
2171
2172        let stale_file_cached_entities = registry.extract_entities("models.go", models);
2173        let (graph, entities) = EntityGraph::build_incremental(
2174            root,
2175            &["models.go".into()],
2176            &["models.go".into(), "methods.go".into()],
2177            cached_entities,
2178            vec![],
2179            stale_file_cached_entities,
2180            &registry,
2181        );
2182        let service = graph
2183            .entities
2184            .get("models.go::type::Service")
2185            .expect("Service type should be in the graph");
2186        let run = entities
2187            .iter()
2188            .find(|e| e.name == "Run" && e.file_path == "methods.go")
2189            .expect("Run method should be retained from clean cache");
2190
2191        assert_eq!(run.parent_id.as_deref(), Some(service.id.as_str()));
2192        assert!(graph.entities.contains_key("models.go::type::Service::Run"));
2193        assert!(!graph.entities.contains_key("methods.go::type::Service::Run"));
2194    }
2195
2196    #[test]
2197    fn test_extract_references() {
2198        let content = "function processData(input) {\n  const result = validateInput(input);\n  return transform(result);\n}";
2199        let refs = extract_references_from_content(content, "processData");
2200        assert!(refs.contains(&"validateInput"));
2201        assert!(refs.contains(&"transform"));
2202        assert!(!refs.contains(&"processData")); // self excluded
2203    }
2204
2205    #[test]
2206    fn test_extract_references_skips_keywords() {
2207        let content = "function foo() { if (true) { return false; } }";
2208        let refs = extract_references_from_content(content, "foo");
2209        assert!(!refs.contains(&"if"));
2210        assert!(!refs.contains(&"true"));
2211        assert!(!refs.contains(&"return"));
2212        assert!(!refs.contains(&"false"));
2213    }
2214
2215    #[test]
2216    fn test_infer_ref_type_call() {
2217        assert_eq!(
2218            infer_ref_type("validateInput(data)", "validateInput"),
2219            RefType::Calls,
2220        );
2221    }
2222
2223    #[test]
2224    fn test_infer_ref_type_type() {
2225        assert_eq!(
2226            infer_ref_type("let x: MyType = something", "MyType"),
2227            RefType::TypeRef,
2228        );
2229    }
2230
2231    #[test]
2232    fn test_infer_ref_type_multibyte_utf8() {
2233        // Ensure no panic when content contains multi-byte UTF-8 characters
2234        assert_eq!(
2235            infer_ref_type("let café = foo(x)", "foo"),
2236            RefType::Calls,
2237        );
2238        assert_eq!(
2239            infer_ref_type("class HandicapfrPublicationFieldsEnum:\n    É = 1\n    bar()", "bar"),
2240            RefType::Calls,
2241        );
2242        // No match should not panic either
2243        assert_eq!(
2244            infer_ref_type("// 日本語コメント\nlet x = 1", "missing"),
2245            RefType::TypeRef,
2246        );
2247    }
2248
2249    #[test]
2250    fn test_dot_chain_self_resolution() {
2251        let (dir, registry) = create_test_repo();
2252        let root = dir.path();
2253
2254        write_file(root, "service.py", "\
2255class MyService:
2256    def process(self):
2257        return self.validate()
2258
2259    def validate(self):
2260        return True
2261");
2262
2263        let (graph, _) = EntityGraph::build(root, &["service.py".into()], &registry);
2264
2265        // process should have an edge to validate via self.validate()
2266        let process_id = graph.entities.keys()
2267            .find(|id| id.contains("process"))
2268            .expect("process entity should exist");
2269        let deps = graph.get_dependencies(process_id);
2270        assert!(
2271            deps.iter().any(|d| d.name == "validate"),
2272            "process should depend on validate via self.validate(). Deps: {:?}",
2273            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2274        );
2275    }
2276
2277    #[test]
2278    fn test_dot_chain_this_resolution() {
2279        let (dir, registry) = create_test_repo();
2280        let root = dir.path();
2281
2282        write_file(root, "service.ts", "\
2283class UserService {
2284    process() {
2285        return this.validate();
2286    }
2287    validate() {
2288        return true;
2289    }
2290}
2291");
2292
2293        let (graph, _) = EntityGraph::build(root, &["service.ts".into()], &registry);
2294
2295        let process_id = graph.entities.keys()
2296            .find(|id| id.contains("process"))
2297            .expect("process entity should exist");
2298        let deps = graph.get_dependencies(process_id);
2299        assert!(
2300            deps.iter().any(|d| d.name == "validate"),
2301            "process should depend on validate via this.validate(). Deps: {:?}",
2302            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2303        );
2304    }
2305
2306    #[test]
2307    fn test_dot_chain_class_static() {
2308        let (dir, registry) = create_test_repo();
2309        let root = dir.path();
2310
2311        write_file(root, "utils.ts", "\
2312class MathUtils {
2313    static compute() { return 1; }
2314}
2315function caller() { return MathUtils.compute(); }
2316");
2317
2318        let (graph, _) = EntityGraph::build(root, &["utils.ts".into()], &registry);
2319
2320        let caller_id = graph.entities.keys()
2321            .find(|id| id.contains("caller"))
2322            .expect("caller entity should exist");
2323        let deps = graph.get_dependencies(caller_id);
2324        assert!(
2325            deps.iter().any(|d| d.name == "compute"),
2326            "caller should depend on compute via MathUtils.compute(). Deps: {:?}",
2327            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2328        );
2329    }
2330
2331    #[test]
2332    fn test_js_ts_import_resolution() {
2333        let (dir, registry) = create_test_repo();
2334        let root = dir.path();
2335
2336        write_file(root, "helper.ts", "\
2337export function helper() { return 1; }
2338");
2339        write_file(root, "main.ts", "\
2340import { helper } from './helper';
2341export function main() { return helper(); }
2342");
2343
2344        let (graph, _) = EntityGraph::build(
2345            root,
2346            &["helper.ts".into(), "main.ts".into()],
2347            &registry,
2348        );
2349
2350        let main_id = graph.entities.keys()
2351            .find(|id| id.contains("main"))
2352            .expect("main entity should exist");
2353        let deps = graph.get_dependencies(main_id);
2354        assert!(
2355            deps.iter().any(|d| d.name == "helper"),
2356            "main should depend on helper via JS import. Deps: {:?}",
2357            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2358        );
2359    }
2360
2361    #[test]
2362    fn test_js_ts_relative_import_resolution_uses_full_path() {
2363        let (dir, registry) = create_test_repo();
2364        let root = dir.path();
2365
2366        write_file(root, "src/a/util.ts", "\
2367export function helper() { return 1; }
2368");
2369        write_file(root, "src/b/util.ts", "\
2370export function helper() { return 2; }
2371");
2372        write_file(root, "src/main.ts", "\
2373import { helper } from './b/util';
2374export function caller() { return helper(); }
2375");
2376
2377        let (graph, _) = EntityGraph::build(
2378            root,
2379            &["src/a/util.ts".into(), "src/b/util.ts".into(), "src/main.ts".into()],
2380            &registry,
2381        );
2382
2383        let caller_id = graph.entities.keys()
2384            .find(|id| id.contains("caller"))
2385            .expect("caller entity should exist");
2386        let deps = graph.get_dependencies(caller_id);
2387        assert!(
2388            deps.iter().any(|d| d.name == "helper" && d.file_path == "src/b/util.ts"),
2389            "caller should resolve helper to src/b/util.ts. Deps: {:?}",
2390            deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2391        );
2392        assert!(
2393            !deps.iter().any(|d| d.name == "helper" && d.file_path == "src/a/util.ts"),
2394            "caller should not resolve helper to src/a/util.ts. Deps: {:?}",
2395            deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2396        );
2397    }
2398
2399    #[test]
2400    fn test_js_ts_relative_import_with_extension_prefers_exact_file() {
2401        let (dir, registry) = create_test_repo();
2402        let root = dir.path();
2403
2404        write_file(root, "src/util.js", "\
2405export function helper() { return 1; }
2406");
2407        write_file(root, "src/util.ts", "\
2408export function helper() { return 2; }
2409");
2410        write_file(root, "src/main.ts", "\
2411import { helper } from './util.ts';
2412export function caller() { return helper(); }
2413");
2414
2415        let (graph, _) = EntityGraph::build(
2416            root,
2417            &["src/util.js".into(), "src/util.ts".into(), "src/main.ts".into()],
2418            &registry,
2419        );
2420
2421        let caller_id = graph.entities.keys()
2422            .find(|id| id.contains("caller"))
2423            .expect("caller entity should exist");
2424        let deps = graph.get_dependencies(caller_id);
2425        assert!(
2426            deps.iter().any(|d| d.name == "helper" && d.file_path == "src/util.ts"),
2427            "caller should resolve helper to explicit src/util.ts. Deps: {:?}",
2428            deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2429        );
2430        assert!(
2431            !deps.iter().any(|d| d.name == "helper" && d.file_path == "src/util.js"),
2432            "caller should not resolve explicit ./util.ts to src/util.js. Deps: {:?}",
2433            deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2434        );
2435    }
2436
2437    #[test]
2438    fn test_python_relative_import_resolution_uses_full_path() {
2439        let (dir, registry) = create_test_repo();
2440        let root = dir.path();
2441
2442        write_file(root, "src/a/util.py", "\
2443def helper():
2444    return 1
2445");
2446        write_file(root, "src/b/util.py", "\
2447def helper():
2448    return 2
2449");
2450        write_file(root, "src/main.py", "\
2451from .b.util import helper
2452
2453def caller():
2454    return helper()
2455");
2456
2457        let (graph, _) = EntityGraph::build(
2458            root,
2459            &["src/a/util.py".into(), "src/b/util.py".into(), "src/main.py".into()],
2460            &registry,
2461        );
2462
2463        let caller_id = graph.entities.keys()
2464            .find(|id| id.contains("caller"))
2465            .expect("caller entity should exist");
2466        let deps = graph.get_dependencies(caller_id);
2467        assert!(
2468            deps.iter().any(|d| d.name == "helper" && d.file_path == "src/b/util.py"),
2469            "caller should resolve helper to src/b/util.py. Deps: {:?}",
2470            deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2471        );
2472        assert!(
2473            !deps.iter().any(|d| d.name == "helper" && d.file_path == "src/a/util.py"),
2474            "caller should not resolve helper to src/a/util.py. Deps: {:?}",
2475            deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2476        );
2477    }
2478
2479    #[test]
2480    fn test_python_absolute_import_resolution_uses_full_path() {
2481        let (dir, registry) = create_test_repo();
2482        let root = dir.path();
2483
2484        write_file(root, "src/a/util.py", "\
2485def helper():
2486    return 1
2487");
2488        write_file(root, "src/b/util.py", "\
2489def helper():
2490    return 2
2491");
2492        write_file(root, "src/main.py", "\
2493from src.b.util import helper
2494
2495def caller():
2496    return helper()
2497");
2498
2499        let (graph, _) = EntityGraph::build(
2500            root,
2501            &["src/a/util.py".into(), "src/b/util.py".into(), "src/main.py".into()],
2502            &registry,
2503        );
2504
2505        let caller_id = graph.entities.keys()
2506            .find(|id| id.contains("caller"))
2507            .expect("caller entity should exist");
2508        let deps = graph.get_dependencies(caller_id);
2509        assert!(
2510            deps.iter().any(|d| d.name == "helper" && d.file_path == "src/b/util.py"),
2511            "caller should resolve helper to src/b/util.py. Deps: {:?}",
2512            deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2513        );
2514        assert!(
2515            !deps.iter().any(|d| d.name == "helper" && d.file_path == "src/a/util.py"),
2516            "caller should not resolve helper to src/a/util.py. Deps: {:?}",
2517            deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2518        );
2519    }
2520
2521    #[test]
2522    fn test_js_ts_named_import_does_not_resolve_unrelated_method_receiver() {
2523        let (dir, registry) = create_test_repo();
2524        let root = dir.path();
2525
2526        write_file(root, "lib.ts", "\
2527export function foo() { return 1; }
2528");
2529        write_file(root, "main.ts", "\
2530import { foo } from './lib';
2531export function caller(other) { return other.foo(); }
2532export function actual() { return foo(); }
2533");
2534
2535        let (graph, _) = EntityGraph::build(
2536            root,
2537            &["lib.ts".into(), "main.ts".into()],
2538            &registry,
2539        );
2540
2541        let caller_id = graph.entities.keys()
2542            .find(|id| id.contains("caller"))
2543            .expect("caller entity should exist");
2544        let caller_deps = graph.get_dependencies(caller_id);
2545        assert!(
2546            !caller_deps.iter().any(|d| d.name == "foo" && d.file_path == "lib.ts"),
2547            "other.foo() should not resolve through a bare named import. Deps: {:?}",
2548            caller_deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2549        );
2550
2551        let actual_id = graph.entities.keys()
2552            .find(|id| id.contains("actual"))
2553            .expect("actual entity should exist");
2554        let actual_deps = graph.get_dependencies(actual_id);
2555        assert!(
2556            actual_deps.iter().any(|d| d.name == "foo" && d.file_path == "lib.ts"),
2557            "foo() should still resolve through the named import. Deps: {:?}",
2558            actual_deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2559        );
2560    }
2561
2562    #[test]
2563    fn test_unresolved_method_does_not_block_unrelated_fallback_import() {
2564        let (dir, registry) = create_test_repo();
2565        let root = dir.path();
2566
2567        write_file(root, "lib.ts", "\
2568export const answer = 1;
2569export function foo() { return 1; }
2570");
2571        write_file(root, "main.ts", "\
2572import { answer, foo } from './lib';
2573export function caller(other) {
2574    other.foo();
2575    return answer;
2576}
2577");
2578
2579        let (graph, _) = EntityGraph::build(
2580            root,
2581            &["lib.ts".into(), "main.ts".into()],
2582            &registry,
2583        );
2584
2585        let caller_id = graph.entities.keys()
2586            .find(|id| id.contains("caller"))
2587            .expect("caller entity should exist");
2588        let deps = graph.get_dependencies(caller_id);
2589        assert!(
2590            deps.iter().any(|d| d.name == "answer" && d.file_path == "lib.ts"),
2591            "unresolved other.foo() should not block bare answer import fallback. Deps: {:?}",
2592            deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2593        );
2594        assert!(
2595            !deps.iter().any(|d| d.name == "foo" && d.file_path == "lib.ts"),
2596            "other.foo() should not resolve through the named import. Deps: {:?}",
2597            deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2598        );
2599    }
2600
2601    #[test]
2602    fn test_js_ts_namespace_import_respects_receiver_alias() {
2603        let (dir, registry) = create_test_repo();
2604        let root = dir.path();
2605
2606        write_file(root, "lib.ts", "\
2607export function foo() { return 1; }
2608");
2609        write_file(root, "other.ts", "\
2610export function foo() { return 2; }
2611");
2612        write_file(root, "main.ts", "\
2613import * as lib from './lib';
2614export function caller(other) { return other.foo(); }
2615export function actual() { return lib.foo(); }
2616");
2617
2618        let (graph, _) = EntityGraph::build(
2619            root,
2620            &["lib.ts".into(), "other.ts".into(), "main.ts".into()],
2621            &registry,
2622        );
2623
2624        let caller_id = graph.entities.keys()
2625            .find(|id| id.contains("caller"))
2626            .expect("caller entity should exist");
2627        let caller_deps = graph.get_dependencies(caller_id);
2628        assert!(
2629            !caller_deps.iter().any(|d| d.name == "foo"),
2630            "other.foo() should not resolve via namespace import lib. Deps: {:?}",
2631            caller_deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2632        );
2633
2634        let actual_id = graph.entities.keys()
2635            .find(|id| id.contains("actual"))
2636            .expect("actual entity should exist");
2637        let actual_deps = graph.get_dependencies(actual_id);
2638        assert!(
2639            actual_deps.iter().any(|d| d.name == "foo" && d.file_path == "lib.ts"),
2640            "lib.foo() should resolve to lib.ts. Deps: {:?}",
2641            actual_deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2642        );
2643        assert!(
2644            !actual_deps.iter().any(|d| d.name == "foo" && d.file_path == "other.ts"),
2645            "lib.foo() should not resolve to other.ts. Deps: {:?}",
2646            actual_deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2647        );
2648    }
2649
2650    #[test]
2651    fn test_js_ts_local_binding_shadows_imported_class_receiver() {
2652        let (dir, registry) = create_test_repo();
2653        let root = dir.path();
2654
2655        write_file(root, "lib.ts", "\
2656export class Service {
2657    static run() { return 1; }
2658}
2659");
2660        write_file(root, "main.ts", "\
2661import { Service } from './lib';
2662export function caller(Service) { return Service.run(); }
2663");
2664
2665        let (graph, _) = EntityGraph::build(
2666            root,
2667            &["lib.ts".into(), "main.ts".into()],
2668            &registry,
2669        );
2670
2671        let caller_id = graph.entities.keys()
2672            .find(|id| id.contains("caller"))
2673            .expect("caller entity should exist");
2674        let deps = graph.get_dependencies(caller_id);
2675        assert!(
2676            !deps.iter().any(|d| d.name == "run" && d.file_path == "lib.ts"),
2677            "local parameter Service should shadow imported class receiver. Deps: {:?}",
2678            deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2679        );
2680        assert!(
2681            !deps.iter().any(|d| d.name == "Service" && d.file_path == "lib.ts"),
2682            "local parameter Service should shadow imported class name. Deps: {:?}",
2683            deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2684        );
2685    }
2686
2687    #[test]
2688    fn test_js_ts_local_binding_shadows_namespace_receiver() {
2689        let (dir, registry) = create_test_repo();
2690        let root = dir.path();
2691
2692        write_file(root, "lib.ts", "\
2693export function foo() { return 1; }
2694");
2695        write_file(root, "main.ts", "\
2696import * as lib from './lib';
2697export function caller(lib) { return lib.foo(); }
2698");
2699
2700        let (graph, _) = EntityGraph::build(
2701            root,
2702            &["lib.ts".into(), "main.ts".into()],
2703            &registry,
2704        );
2705
2706        let caller_id = graph.entities.keys()
2707            .find(|id| id.contains("caller"))
2708            .expect("caller entity should exist");
2709        let deps = graph.get_dependencies(caller_id);
2710        assert!(
2711            !deps.iter().any(|d| d.name == "foo" && d.file_path == "lib.ts"),
2712            "local parameter lib should shadow namespace import receiver. Deps: {:?}",
2713            deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2714        );
2715    }
2716
2717    #[test]
2718    fn test_js_ts_local_binding_shadows_named_import_call() {
2719        let (dir, registry) = create_test_repo();
2720        let root = dir.path();
2721
2722        write_file(root, "lib.ts", "\
2723export function foo() { return 1; }
2724");
2725        write_file(root, "main.ts", "\
2726import { foo } from './lib';
2727export function caller(foo) { return foo(); }
2728");
2729
2730        let (graph, _) = EntityGraph::build(
2731            root,
2732            &["lib.ts".into(), "main.ts".into()],
2733            &registry,
2734        );
2735
2736        let caller_id = graph.entities.keys()
2737            .find(|id| id.contains("caller"))
2738            .expect("caller entity should exist");
2739        let deps = graph.get_dependencies(caller_id);
2740        assert!(
2741            !deps.iter().any(|d| d.name == "foo" && d.file_path == "lib.ts"),
2742            "local parameter foo should shadow named import. Deps: {:?}",
2743            deps.iter().map(|d| (&d.name, &d.file_path)).collect::<Vec<_>>()
2744        );
2745    }
2746
2747    #[test]
2748    fn test_dot_chain_no_false_edges() {
2749        let (dir, registry) = create_test_repo();
2750        let root = dir.path();
2751
2752        // Two classes with same method name "process".
2753        // self.process() in ClassA should NOT create edge to ClassB::process.
2754        write_file(root, "a.py", "\
2755class ClassA:
2756    def run(self):
2757        return self.process()
2758
2759    def process(self):
2760        return 1
2761");
2762        write_file(root, "b.py", "\
2763class ClassB:
2764    def process(self):
2765        return 2
2766");
2767
2768        let (graph, _) = EntityGraph::build(
2769            root,
2770            &["a.py".into(), "b.py".into()],
2771            &registry,
2772        );
2773
2774        let run_id = graph.entities.keys()
2775            .find(|id| id.contains("run"))
2776            .expect("run entity should exist");
2777        let deps = graph.get_dependencies(run_id);
2778        // Should have edge to ClassA::process, NOT ClassB::process
2779        for dep in &deps {
2780            if dep.name == "process" {
2781                assert!(
2782                    dep.file_path == "a.py",
2783                    "run's process dep should be in a.py, not {}",
2784                    dep.file_path
2785                );
2786            }
2787        }
2788    }
2789
2790    #[test]
2791    fn test_dot_chain_fallback() {
2792        let (dir, registry) = create_test_repo();
2793        let root = dir.path();
2794
2795        // someVar.unknownMethod() - "someVar" is not a class,
2796        // so the chain is unresolved and words fall through to bag-of-words.
2797        // "helper" should still resolve via bag-of-words.
2798        write_file(root, "app.ts", "\
2799export function helper() { return 1; }
2800export function caller() {
2801    const val = helper();
2802    return val;
2803}
2804");
2805
2806        let (graph, _) = EntityGraph::build(root, &["app.ts".into()], &registry);
2807
2808        let caller_id = graph.entities.keys()
2809            .find(|id| id.contains("caller"))
2810            .expect("caller entity should exist");
2811        let deps = graph.get_dependencies(caller_id);
2812        assert!(
2813            deps.iter().any(|d| d.name == "helper"),
2814            "caller should still resolve helper via bag-of-words. Deps: {:?}",
2815            deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2816        );
2817    }
2818
2819}