1use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::{Arc, LazyLock};
13
14#[cfg(feature = "parallel")]
15use rayon::prelude::*;
16use regex::Regex;
17use serde::{Deserialize, Serialize};
18
19macro_rules! maybe_par_iter {
21 ($slice:expr) => {{
22 #[cfg(feature = "parallel")]
23 { $slice.par_iter() }
24 #[cfg(not(feature = "parallel"))]
25 { $slice.iter() }
26 }};
27}
28
29use crate::git::types::{FileChange, FileStatus};
30use crate::model::entity::SemanticEntity;
31use crate::parser::registry::ParserRegistry;
32use crate::parser::scope_resolve;
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
36#[serde(rename_all = "camelCase")]
37pub struct EntityRef {
38 pub from_entity: String,
39 pub to_entity: String,
40 pub ref_type: RefType,
41}
42
43#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
45#[serde(rename_all = "lowercase")]
46pub enum RefType {
47 Calls,
49 TypeRef,
51 Imports,
53}
54
55#[derive(Debug)]
57pub struct EntityGraph {
58 pub entities: HashMap<String, EntityInfo>,
60 pub edges: Vec<EntityRef>,
62 pub dependents: HashMap<String, Vec<String>>,
64 pub dependencies: HashMap<String, Vec<String>>,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
70#[serde(rename_all = "camelCase")]
71pub struct EntityInfo {
72 pub id: String,
73 pub name: String,
74 pub entity_type: String,
75 pub file_path: String,
76 #[serde(skip_serializing_if = "Option::is_none")]
77 pub parent_id: Option<String>,
78 pub start_line: usize,
79 pub end_line: usize,
80}
81
82impl EntityGraph {
83 pub fn from_parts(entities: HashMap<String, EntityInfo>, edges: Vec<EntityRef>) -> Self {
85 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
86 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
87 for edge in &edges {
88 dependents
89 .entry(edge.to_entity.clone())
90 .or_default()
91 .push(edge.from_entity.clone());
92 dependencies
93 .entry(edge.from_entity.clone())
94 .or_default()
95 .push(edge.to_entity.clone());
96 }
97 EntityGraph {
98 entities,
99 edges,
100 dependents,
101 dependencies,
102 }
103 }
104
105 pub fn build(
111 root: &Path,
112 file_paths: &[String],
113 registry: &ParserRegistry,
114 ) -> (Self, Vec<SemanticEntity>) {
115 let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = maybe_par_iter!(file_paths)
118 .filter_map(|file_path| {
119 let full_path = root.join(file_path);
120 let content = std::fs::read_to_string(&full_path).ok()?;
121 let (entities, tree) = registry.extract_entities_with_tree(file_path, &content)?;
122 let parsed = tree.map(|t| (file_path.clone(), content, t));
123 Some((entities, parsed))
124 })
125 .collect();
126
127 let mut all_entities: Vec<SemanticEntity> = Vec::new();
128 let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
129 for (entities, parsed) in per_file {
130 all_entities.extend(entities);
131 if let Some(p) = parsed {
132 parsed_files.push(p);
133 }
134 }
135
136 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
139 let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
140 let mut parent_child_pairs: HashSet<(&str, &str)> = HashSet::new();
141 let mut class_child_names: HashSet<(&str, &str)> = HashSet::new();
142 let mut class_entity_names: HashSet<&str> = HashSet::new();
143 let mut id_to_name: HashMap<&str, &str> = HashMap::with_capacity(all_entities.len());
144 let mut scope_entity_ranges: HashMap<String, Vec<(usize, usize, String)>> = HashMap::new();
145
146 for entity in &all_entities {
147 symbol_table
148 .entry(entity.name.clone())
149 .or_default()
150 .push(entity.id.clone());
151
152 entity_map.insert(
153 entity.id.clone(),
154 EntityInfo {
155 id: entity.id.clone(),
156 name: entity.name.clone(),
157 entity_type: entity.entity_type.clone(),
158 file_path: entity.file_path.clone(),
159 parent_id: entity.parent_id.clone(),
160 start_line: entity.start_line,
161 end_line: entity.end_line,
162 },
163 );
164
165 if let Some(ref pid) = entity.parent_id {
166 parent_child_pairs.insert((pid.as_str(), entity.id.as_str()));
167 class_child_names.insert((pid.as_str(), entity.name.as_str()));
168 }
169
170 if matches!(entity.entity_type.as_str(), "class" | "struct" | "interface" | "class_type") {
171 class_entity_names.insert(entity.name.as_str());
172 }
173
174 id_to_name.insert(entity.id.as_str(), entity.name.as_str());
175
176 scope_entity_ranges.entry(entity.file_path.clone()).or_default()
177 .push((entity.start_line, entity.end_line, entity.id.clone()));
178 }
179
180 let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
183 let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
184 let mut scope_class_members: HashMap<String, Vec<(String, String)>> = HashMap::new();
185
186 for entity in &all_entities {
187 if let Some(ref pid) = entity.parent_id {
188 if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
189 if class_entity_names.contains(parent_name) {
190 enclosing_class.insert(entity.id.as_str(), parent_name);
191 class_members
192 .entry(parent_name)
193 .or_default()
194 .push((entity.name.as_str(), entity.id.as_str()));
195 }
196 }
197 if let Some(parent) = entity_map.get(pid.as_str()) {
199 if matches!(parent.entity_type.as_str(), "class" | "struct" | "interface" | "impl") {
200 scope_class_members.entry(parent.name.clone()).or_default()
201 .push((entity.name.clone(), entity.id.clone()));
202 }
203 }
204 }
205 if entity.entity_type == "method" && entity.file_path.ends_with(".go") {
207 if let Some(struct_name) = scope_resolve::extract_go_receiver_type(&entity.content) {
208 scope_class_members.entry(struct_name).or_default()
209 .push((entity.name.clone(), entity.id.clone()));
210 }
211 }
212 }
213
214 let import_table = build_import_table(root, file_paths, &symbol_table, &entity_map, Some(&parsed_files));
217 let owned_go_pkg_index: HashMap<String, Vec<(String, String)>> = if file_paths.iter().any(|f| f.ends_with(".go")) {
219 let mut idx: HashMap<String, Vec<(String, String)>> = HashMap::new();
220 for (name, target_ids) in symbol_table.iter() {
221 for target_id in target_ids {
222 if let Some(entity) = entity_map.get(target_id) {
223 let file_stem = entity.file_path.rsplit('/').next().unwrap_or(&entity.file_path);
224 let file_stem = strip_file_ext(file_stem);
225 idx.entry(file_stem.to_string())
226 .or_default()
227 .push((name.clone(), target_id.clone()));
228 if let Some(parent_start) = entity.file_path.rfind('/') {
229 let parent_path = &entity.file_path[..parent_start];
230 if let Some(dir_name_start) = parent_path.rfind('/') {
231 let dir_name = &parent_path[dir_name_start + 1..];
232 if dir_name != file_stem {
233 idx.entry(dir_name.to_string())
234 .or_default()
235 .push((name.clone(), target_id.clone()));
236 }
237 } else if !parent_path.is_empty() && parent_path != file_stem {
238 idx.entry(parent_path.to_string())
239 .or_default()
240 .push((name.clone(), target_id.clone()));
241 }
242 }
243 }
244 }
245 }
246 idx
247 } else {
248 HashMap::new()
249 };
250
251 let symbol_table = Arc::new(symbol_table);
253
254 let pre_built = scope_resolve::PreBuiltLookups {
255 symbol_table: Arc::clone(&symbol_table),
256 class_members: scope_class_members,
257 entity_ranges: scope_entity_ranges,
258 go_pkg_index: owned_go_pkg_index,
259 };
260
261 let has_scope_lang = file_paths.iter().any(|f| {
263 let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
264 crate::parser::plugins::code::languages::get_language_config(ext)
265 .and_then(|c| c.scope_resolve)
266 .is_some()
267 });
268 let (scope_edges, scope_resolved_entities) = if has_scope_lang {
269 let result = scope_resolve::resolve_with_scopes_full(root, file_paths, &all_entities, &entity_map, Some(parsed_files), Some(pre_built));
270 let resolved_entity_ids: HashSet<String> = result.edges.iter()
271 .map(|(from, _, _)| from.clone())
272 .collect();
273 (result.edges, resolved_entity_ids)
274 } else {
275 (vec![], HashSet::new())
276 };
277
278 let resolved_refs: Vec<(String, String, RefType)> = maybe_par_iter!(all_entities)
284 .flat_map(|entity| {
285 if scope_resolved_entities.contains(&entity.id) {
287 return vec![];
288 }
289
290 let ext = entity.file_path.rfind('.').map(|i| &entity.file_path[i..]).unwrap_or("");
293 if crate::parser::plugins::code::languages::get_language_config(ext).is_none() {
294 return vec![];
295 }
296
297 let mut entity_edges = Vec::new();
298 let mut consumed_words: HashSet<String> = HashSet::new();
299
300 let stripped = strip_comments_and_strings(&entity.content);
302
303 let dot_chains = extract_dot_chains(&stripped);
305
306 for (receiver, member) in &dot_chains {
307 if *receiver == "self" || *receiver == "this" {
308 if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
310 if let Some(members) = class_members.get(class_name) {
311 for (n, tid) in members {
312 if *n == *member && *tid != entity.id.as_str() {
313 entity_edges.push((
314 entity.id.clone(),
315 tid.to_string(),
316 RefType::Calls,
317 ));
318 consumed_words.insert(member.to_string());
319 break;
320 }
321 }
322 }
323 }
324 } else if class_entity_names.contains(*receiver) {
325 if let Some(members) = class_members.get(*receiver) {
327 for (n, tid) in members {
328 if *n == *member {
329 entity_edges.push((
330 entity.id.clone(),
331 tid.to_string(),
332 RefType::Calls,
333 ));
334 consumed_words.insert(member.to_string());
335 consumed_words.insert(receiver.to_string());
336 break;
337 }
338 }
339 }
340 }
341 }
343
344 let refs = extract_references_with_stripped(&entity.content, &entity.name, &stripped);
347 for ref_name in refs {
348 if consumed_words.contains(ref_name) {
349 continue;
350 }
351
352 if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
354 continue;
355 }
356
357 let import_key = (entity.file_path.clone(), ref_name.to_string());
360 if let Some(import_target_id) = import_table.get(&import_key) {
361 if import_target_id != &entity.id
362 && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
363 && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
364 {
365 let ref_type = infer_ref_type(&entity.content, &ref_name);
366 entity_edges.push((
367 entity.id.clone(),
368 import_target_id.clone(),
369 ref_type,
370 ));
371 }
372 continue;
373 }
374
375 if let Some(target_ids) = symbol_table.get(ref_name) {
376 let target = target_ids
379 .iter()
380 .find(|id| {
381 *id != &entity.id
382 && entity_map
383 .get(*id)
384 .map_or(false, |e| e.file_path == entity.file_path)
385 });
386
387 if let Some(target_id) = target {
388 if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
390 || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
391 {
392 continue;
393 }
394 let ref_type = infer_ref_type(&entity.content, &ref_name);
395 entity_edges.push((
396 entity.id.clone(),
397 target_id.clone(),
398 ref_type,
399 ));
400 }
401 }
402 }
403 entity_edges
404 })
405 .collect();
406
407 let mut combined: Vec<(String, String, RefType)> = scope_edges;
409 combined.extend(resolved_refs);
410 let mut seen_edges: HashSet<(String, String)> = HashSet::with_capacity(combined.len());
411 let mut all_resolved: Vec<(String, String, RefType)> = Vec::with_capacity(combined.len());
412 for edge in combined {
413 if seen_edges.insert((edge.0.clone(), edge.1.clone())) {
414 all_resolved.push(edge);
415 }
416 }
417
418 let mut edges: Vec<EntityRef> = Vec::with_capacity(all_resolved.len());
420 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
421 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
422
423 for (from_entity, to_entity, ref_type) in all_resolved {
424 dependents
425 .entry(to_entity.clone())
426 .or_default()
427 .push(from_entity.clone());
428 dependencies
429 .entry(from_entity.clone())
430 .or_default()
431 .push(to_entity.clone());
432 edges.push(EntityRef {
433 from_entity,
434 to_entity,
435 ref_type,
436 });
437 }
438
439 let graph = EntityGraph {
440 entities: entity_map,
441 edges,
442 dependents,
443 dependencies,
444 };
445
446 (graph, all_entities)
447 }
448
449 pub fn build_incremental(
455 root: &Path,
456 stale_files: &[String],
457 all_file_paths: &[String],
458 cached_entities: Vec<SemanticEntity>,
459 cached_edges: Vec<EntityRef>,
460 stale_file_cached_entities: Vec<SemanticEntity>,
461 registry: &ParserRegistry,
462 ) -> (Self, Vec<SemanticEntity>) {
463 let stale_set: HashSet<&str> = stale_files.iter().map(|s| s.as_str()).collect();
465
466 let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = maybe_par_iter!(stale_files)
468 .filter_map(|file_path| {
469 let full_path = root.join(file_path);
470 let content = std::fs::read_to_string(&full_path).ok()?;
471 let (entities, tree) = registry.extract_entities_with_tree(file_path, &content)?;
472 let parsed = tree.map(|t| (file_path.clone(), content, t));
473 Some((entities, parsed))
474 })
475 .collect();
476
477 let mut new_entities: Vec<SemanticEntity> = Vec::new();
478 let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
479 for (entities, parsed) in per_file {
480 new_entities.extend(entities);
481 if let Some(p) = parsed {
482 parsed_files.push(p);
483 }
484 }
485
486 let cached_hashes: HashMap<&str, &str> = stale_file_cached_entities
489 .iter()
490 .map(|e| (e.id.as_str(), e.content_hash.as_str()))
491 .collect();
492
493 let mut truly_changed_ids: HashSet<String> = HashSet::new();
495 let mut content_clean_ids: HashSet<String> = HashSet::new();
496 for entity in &new_entities {
497 match cached_hashes.get(entity.id.as_str()) {
498 Some(old_hash) if *old_hash == entity.content_hash.as_str() => {
499 content_clean_ids.insert(entity.id.clone());
500 }
501 _ => {
502 truly_changed_ids.insert(entity.id.clone());
504 }
505 }
506 }
507
508 let new_entity_ids: HashSet<&str> = new_entities.iter().map(|e| e.id.as_str()).collect();
510 let deleted_ids: HashSet<&str> = stale_file_cached_entities
511 .iter()
512 .filter(|e| !new_entity_ids.contains(e.id.as_str()))
513 .map(|e| e.id.as_str())
514 .collect();
515
516 let all_entities: Vec<SemanticEntity> = cached_entities
518 .into_iter()
519 .chain(new_entities.into_iter())
520 .collect();
521
522 let mut affected_clean_ids: HashSet<String> = HashSet::new();
524 for edge in &cached_edges {
525 let to_truly_changed = truly_changed_ids.contains(&edge.to_entity)
526 || deleted_ids.contains(edge.to_entity.as_str());
527 if to_truly_changed && !stale_set.contains(
528 all_entities.iter()
529 .find(|e| e.id == edge.from_entity)
530 .map(|e| e.file_path.as_str())
531 .unwrap_or("")
532 ) {
533 affected_clean_ids.insert(edge.from_entity.clone());
534 }
535 }
536
537 let stale_entity_ids: HashSet<&str> = all_entities
539 .iter()
540 .filter(|e| stale_set.contains(e.file_path.as_str()))
541 .map(|e| e.id.as_str())
542 .collect();
543
544 let kept_edges: Vec<EntityRef> = cached_edges
549 .into_iter()
550 .filter(|e| {
551 let from_stale = stale_entity_ids.contains(e.from_entity.as_str());
552 let to_stale = stale_entity_ids.contains(e.to_entity.as_str());
553
554 if !from_stale && !to_stale && !affected_clean_ids.contains(&e.from_entity) {
555 return true;
557 }
558 false
559 })
560 .collect();
561
562 let needs_resolution: HashSet<&str> = all_entities
566 .iter()
567 .filter(|e| {
568 truly_changed_ids.contains(&e.id)
569 || content_clean_ids.contains(&e.id)
570 || affected_clean_ids.contains(&e.id)
571 })
572 .map(|e| e.id.as_str())
573 .collect();
574
575 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
580 let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
581
582 for entity in &all_entities {
583 symbol_table
584 .entry(entity.name.clone())
585 .or_default()
586 .push(entity.id.clone());
587 entity_map.insert(
588 entity.id.clone(),
589 EntityInfo {
590 id: entity.id.clone(),
591 name: entity.name.clone(),
592 entity_type: entity.entity_type.clone(),
593 file_path: entity.file_path.clone(),
594 parent_id: entity.parent_id.clone(),
595 start_line: entity.start_line,
596 end_line: entity.end_line,
597 },
598 );
599 }
600
601 let parent_child_pairs: HashSet<(&str, &str)> = all_entities
603 .iter()
604 .filter_map(|e| {
605 e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
606 })
607 .collect();
608
609 let class_child_names: HashSet<(&str, &str)> = all_entities
610 .iter()
611 .filter_map(|e| {
612 e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
613 })
614 .collect();
615
616 let class_entity_names: HashSet<&str> = all_entities
617 .iter()
618 .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
619 .map(|e| e.name.as_str())
620 .collect();
621
622 let id_to_name: HashMap<&str, &str> = all_entities
623 .iter()
624 .map(|e| (e.id.as_str(), e.name.as_str()))
625 .collect();
626
627 let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
628 let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
629
630 for entity in &all_entities {
631 if let Some(ref pid) = entity.parent_id {
632 if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
633 if class_entity_names.contains(parent_name) {
634 enclosing_class.insert(entity.id.as_str(), parent_name);
635 class_members
636 .entry(parent_name)
637 .or_default()
638 .push((entity.name.as_str(), entity.id.as_str()));
639 }
640 }
641 }
642 }
643
644 let import_table = build_import_table(root, all_file_paths, &symbol_table, &entity_map, Some(&parsed_files));
646
647 let resolve_file_paths: Vec<String> = all_file_paths
649 .iter()
650 .filter(|f| {
651 stale_set.contains(f.as_str()) || all_entities.iter().any(|e| {
653 e.file_path == **f && affected_clean_ids.contains(&e.id)
654 })
655 })
656 .cloned()
657 .collect();
658
659 let has_scope_lang = resolve_file_paths.iter().any(|f| {
660 let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
661 crate::parser::plugins::code::languages::get_language_config(ext)
662 .and_then(|c| c.scope_resolve)
663 .is_some()
664 });
665 let (scope_edges, scope_resolved_entities) = if has_scope_lang {
666 let resolve_set: HashSet<&str> = resolve_file_paths.iter().map(|s| s.as_str()).collect();
668 let relevant_parsed: Vec<(String, String, tree_sitter::Tree)> = parsed_files
669 .into_iter()
670 .filter(|(fp, _, _)| resolve_set.contains(fp.as_str()))
671 .collect();
672 let pre = if relevant_parsed.is_empty() { None } else { Some(relevant_parsed) };
673 let result = scope_resolve::resolve_with_scopes_full(root, &resolve_file_paths, &all_entities, &entity_map, pre, None);
674 let resolved_entity_ids: HashSet<String> = result.edges.iter()
675 .map(|(from, _, _)| from.clone())
676 .collect();
677 (result.edges, resolved_entity_ids)
678 } else {
679 (vec![], HashSet::new())
680 };
681
682 let resolved_refs: Vec<(String, String, RefType)> = maybe_par_iter!(all_entities)
684 .filter(|e| needs_resolution.contains(e.id.as_str()))
685 .flat_map(|entity| {
686 if scope_resolved_entities.contains(&entity.id) {
687 return vec![];
688 }
689
690 let ext = entity.file_path.rfind('.').map(|i| &entity.file_path[i..]).unwrap_or("");
692 if crate::parser::plugins::code::languages::get_language_config(ext).is_none() {
693 return vec![];
694 }
695
696 let mut entity_edges = Vec::new();
697 let mut consumed_words: HashSet<String> = HashSet::new();
698
699 let stripped = strip_comments_and_strings(&entity.content);
701
702 let dot_chains = extract_dot_chains(&stripped);
704
705 for (receiver, member) in &dot_chains {
706 if *receiver == "self" || *receiver == "this" {
707 if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
708 if let Some(members) = class_members.get(class_name) {
709 for (n, tid) in members {
710 if *n == *member && *tid != entity.id.as_str() {
711 entity_edges.push((
712 entity.id.clone(),
713 tid.to_string(),
714 RefType::Calls,
715 ));
716 consumed_words.insert(member.to_string());
717 break;
718 }
719 }
720 }
721 }
722 } else if class_entity_names.contains(*receiver) {
723 if let Some(members) = class_members.get(*receiver) {
724 for (n, tid) in members {
725 if *n == *member {
726 entity_edges.push((
727 entity.id.clone(),
728 tid.to_string(),
729 RefType::Calls,
730 ));
731 consumed_words.insert(member.to_string());
732 consumed_words.insert(receiver.to_string());
733 break;
734 }
735 }
736 }
737 }
738 }
739
740 let refs = extract_references_with_stripped(&entity.content, &entity.name, &stripped);
742 for ref_name in refs {
743 if consumed_words.contains(ref_name) {
744 continue;
745 }
746 if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
747 continue;
748 }
749
750 let import_key = (entity.file_path.clone(), ref_name.to_string());
751 if let Some(import_target_id) = import_table.get(&import_key) {
752 if import_target_id != &entity.id
753 && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
754 && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
755 {
756 let ref_type = infer_ref_type(&entity.content, &ref_name);
757 entity_edges.push((
758 entity.id.clone(),
759 import_target_id.clone(),
760 ref_type,
761 ));
762 }
763 continue;
764 }
765
766 if let Some(target_ids) = symbol_table.get(ref_name) {
767 let target = target_ids
768 .iter()
769 .find(|id| {
770 *id != &entity.id
771 && entity_map
772 .get(*id)
773 .map_or(false, |e| e.file_path == entity.file_path)
774 });
775
776 if let Some(target_id) = target {
777 if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
778 || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
779 {
780 continue;
781 }
782 let ref_type = infer_ref_type(&entity.content, &ref_name);
783 entity_edges.push((
784 entity.id.clone(),
785 target_id.clone(),
786 ref_type,
787 ));
788 }
789 }
790 }
791 entity_edges
792 })
793 .collect();
794
795 let mut combined: Vec<(String, String, RefType)> = scope_edges;
797 combined.extend(resolved_refs);
798 let mut seen_edges: HashSet<(String, String)> = HashSet::with_capacity(combined.len());
799 let mut all_resolved: Vec<(String, String, RefType)> = Vec::with_capacity(combined.len());
800 for edge in combined {
801 if seen_edges.insert((edge.0.clone(), edge.1.clone())) {
802 all_resolved.push(edge);
803 }
804 }
805
806 let mut edges: Vec<EntityRef> = Vec::with_capacity(kept_edges.len() + all_resolved.len());
808 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
809 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
810
811 let mut all_edge_pairs: HashSet<(String, String)> = HashSet::new();
813
814 for edge in kept_edges {
816 all_edge_pairs.insert((edge.from_entity.clone(), edge.to_entity.clone()));
817 dependents
818 .entry(edge.to_entity.clone())
819 .or_default()
820 .push(edge.from_entity.clone());
821 dependencies
822 .entry(edge.from_entity.clone())
823 .or_default()
824 .push(edge.to_entity.clone());
825 edges.push(edge);
826 }
827
828 for (from_entity, to_entity, ref_type) in all_resolved {
830 if !all_edge_pairs.insert((from_entity.clone(), to_entity.clone())) {
831 continue;
832 }
833 dependents
834 .entry(to_entity.clone())
835 .or_default()
836 .push(from_entity.clone());
837 dependencies
838 .entry(from_entity.clone())
839 .or_default()
840 .push(to_entity.clone());
841 edges.push(EntityRef {
842 from_entity,
843 to_entity,
844 ref_type,
845 });
846 }
847
848 let graph = EntityGraph {
849 entities: entity_map,
850 edges,
851 dependents,
852 dependencies,
853 };
854
855 (graph, all_entities)
856 }
857
858 pub fn get_dependents(&self, entity_id: &str) -> Vec<&EntityInfo> {
860 self.dependents
861 .get(entity_id)
862 .map(|ids| {
863 ids.iter()
864 .filter_map(|id| self.entities.get(id))
865 .collect()
866 })
867 .unwrap_or_default()
868 }
869
870 pub fn get_dependencies(&self, entity_id: &str) -> Vec<&EntityInfo> {
872 self.dependencies
873 .get(entity_id)
874 .map(|ids| {
875 ids.iter()
876 .filter_map(|id| self.entities.get(id))
877 .collect()
878 })
879 .unwrap_or_default()
880 }
881
882 pub fn impact_analysis(&self, entity_id: &str) -> Vec<&EntityInfo> {
885 self.impact_analysis_capped(entity_id, 10_000)
886 }
887
888 pub fn impact_analysis_bounded(&self, entity_id: &str, max_depth: usize) -> Vec<(&EntityInfo, usize)> {
891 let mut visited: HashSet<&str> = HashSet::new();
892 let mut queue: std::collections::VecDeque<(&str, usize)> = std::collections::VecDeque::new();
893 let mut result = Vec::new();
894
895 let start_key = match self.entities.get_key_value(entity_id) {
896 Some((k, _)) => k.as_str(),
897 None => return result,
898 };
899
900 queue.push_back((start_key, 0));
901 visited.insert(start_key);
902
903 while let Some((current, depth)) = queue.pop_front() {
904 if let Some(deps) = self.dependents.get(current) {
905 let next_depth = depth + 1;
906 if max_depth > 0 && next_depth > max_depth {
907 continue;
908 }
909 for dep in deps {
910 if visited.insert(dep.as_str()) {
911 if let Some(info) = self.entities.get(dep.as_str()) {
912 result.push((info, next_depth));
913 }
914 queue.push_back((dep.as_str(), next_depth));
915 }
916 }
917 }
918 }
919
920 result
921 }
922
923 pub fn impact_analysis_capped(&self, entity_id: &str, max_visited: usize) -> Vec<&EntityInfo> {
926 let mut visited: HashSet<&str> = HashSet::new();
927 let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
928 let mut result = Vec::new();
929
930 let start_key = match self.entities.get_key_value(entity_id) {
931 Some((k, _)) => k.as_str(),
932 None => return result,
933 };
934
935 queue.push_back(start_key);
936 visited.insert(start_key);
937
938 while let Some(current) = queue.pop_front() {
939 if result.len() >= max_visited {
940 break;
941 }
942 if let Some(deps) = self.dependents.get(current) {
943 for dep in deps {
944 if visited.insert(dep.as_str()) {
945 if let Some(info) = self.entities.get(dep.as_str()) {
946 result.push(info);
947 }
948 queue.push_back(dep.as_str());
949 if result.len() >= max_visited {
950 break;
951 }
952 }
953 }
954 }
955 }
956
957 result
958 }
959
960 pub fn impact_count(&self, entity_id: &str, max_count: usize) -> usize {
963 let mut visited: HashSet<&str> = HashSet::new();
964 let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
965 let mut count = 0;
966
967 let start_key = match self.entities.get_key_value(entity_id) {
969 Some((k, _)) => k.as_str(),
970 None => return 0,
971 };
972
973 queue.push_back(start_key);
974 visited.insert(start_key);
975
976 while let Some(current) = queue.pop_front() {
977 if count >= max_count {
978 break;
979 }
980 if let Some(deps) = self.dependents.get(current) {
981 for dep in deps {
982 if visited.insert(dep.as_str()) {
983 count += 1;
984 queue.push_back(dep.as_str());
985 if count >= max_count {
986 break;
987 }
988 }
989 }
990 }
991 }
992
993 count
994 }
995
996 pub fn filter_test_entities(&self, entities: &[crate::model::entity::SemanticEntity]) -> HashSet<String> {
999 let mut test_ids = HashSet::new();
1000 for entity in entities {
1001 if is_test_entity(entity) {
1002 test_ids.insert(entity.id.clone());
1003 }
1004 }
1005 test_ids
1006 }
1007
1008 pub fn test_impact(
1011 &self,
1012 entity_id: &str,
1013 all_entities: &[crate::model::entity::SemanticEntity],
1014 ) -> Vec<&EntityInfo> {
1015 let test_ids = self.filter_test_entities(all_entities);
1016 let impact = self.impact_analysis(entity_id);
1017 impact
1018 .into_iter()
1019 .filter(|info| test_ids.contains(&info.id))
1020 .collect()
1021 }
1022
1023 pub fn update_from_changes(
1034 &mut self,
1035 changed_files: &[FileChange],
1036 root: &Path,
1037 registry: &ParserRegistry,
1038 ) {
1039 let mut affected_files: HashSet<String> = HashSet::new();
1040 let mut new_entities: Vec<SemanticEntity> = Vec::new();
1041
1042 for change in changed_files {
1043 affected_files.insert(change.file_path.clone());
1044 if let Some(ref old_path) = change.old_file_path {
1045 affected_files.insert(old_path.clone());
1046 }
1047
1048 match change.status {
1049 FileStatus::Deleted => {
1050 self.remove_entities_for_file(&change.file_path);
1051 }
1052 FileStatus::Renamed => {
1053 if let Some(ref old_path) = change.old_file_path {
1055 self.remove_entities_for_file(old_path);
1056 }
1057 if let Some(entities) = self.extract_file_entities(
1059 &change.file_path,
1060 change.after_content.as_deref(),
1061 root,
1062 registry,
1063 ) {
1064 new_entities.extend(entities);
1065 }
1066 }
1067 FileStatus::Added | FileStatus::Modified => {
1068 self.remove_entities_for_file(&change.file_path);
1070 if let Some(entities) = self.extract_file_entities(
1072 &change.file_path,
1073 change.after_content.as_deref(),
1074 root,
1075 registry,
1076 ) {
1077 new_entities.extend(entities);
1078 }
1079 }
1080 }
1081 }
1082
1083 for entity in &new_entities {
1085 self.entities.insert(
1086 entity.id.clone(),
1087 EntityInfo {
1088 id: entity.id.clone(),
1089 name: entity.name.clone(),
1090 entity_type: entity.entity_type.clone(),
1091 file_path: entity.file_path.clone(),
1092 parent_id: entity.parent_id.clone(),
1093 start_line: entity.start_line,
1094 end_line: entity.end_line,
1095 },
1096 );
1097 }
1098
1099 let symbol_table = self.build_symbol_table();
1101
1102 for entity in &new_entities {
1104 self.resolve_entity_references(entity, &symbol_table);
1105 }
1106
1107 let changed_entity_names: HashSet<String> = new_entities
1110 .iter()
1111 .map(|e| e.name.clone())
1112 .collect();
1113
1114 let entities_to_recheck: Vec<String> = self
1116 .entities
1117 .values()
1118 .filter(|e| !affected_files.contains(&e.file_path))
1119 .filter(|e| {
1120 self.dependencies
1121 .get(&e.id)
1122 .map_or(false, |deps| {
1123 deps.iter().any(|dep_id| {
1124 self.entities
1125 .get(dep_id)
1126 .map_or(false, |dep| changed_entity_names.contains(&dep.name))
1127 })
1128 })
1129 })
1130 .map(|e| e.id.clone())
1131 .collect();
1132
1133 let _ = entities_to_recheck; }
1140
1141 fn extract_file_entities(
1143 &self,
1144 file_path: &str,
1145 content: Option<&str>,
1146 root: &Path,
1147 registry: &ParserRegistry,
1148 ) -> Option<Vec<SemanticEntity>> {
1149 let content = if let Some(c) = content {
1150 c.to_string()
1151 } else {
1152 let full_path = root.join(file_path);
1153 std::fs::read_to_string(&full_path).ok()?
1154 };
1155
1156 Some(registry.extract_entities(file_path, &content))
1157 }
1158
1159 fn remove_entities_for_file(&mut self, file_path: &str) {
1161 let ids_to_remove: Vec<String> = self
1163 .entities
1164 .values()
1165 .filter(|e| e.file_path == file_path)
1166 .map(|e| e.id.clone())
1167 .collect();
1168
1169 let id_set: HashSet<&str> = ids_to_remove.iter().map(|s| s.as_str()).collect();
1170
1171 for id in &ids_to_remove {
1173 self.entities.remove(id);
1174 }
1175
1176 self.edges
1178 .retain(|e| !id_set.contains(e.from_entity.as_str()) && !id_set.contains(e.to_entity.as_str()));
1179
1180 for id in &ids_to_remove {
1182 if let Some(deps) = self.dependencies.remove(id) {
1184 for dep in &deps {
1186 if let Some(dependents) = self.dependents.get_mut(dep) {
1187 dependents.retain(|d| d != id);
1188 }
1189 }
1190 }
1191 if let Some(deps) = self.dependents.remove(id) {
1193 for dep in &deps {
1195 if let Some(dependencies) = self.dependencies.get_mut(dep) {
1196 dependencies.retain(|d| d != id);
1197 }
1198 }
1199 }
1200 }
1201 }
1202
1203 fn build_symbol_table(&self) -> HashMap<String, Vec<String>> {
1205 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::new();
1206 for entity in self.entities.values() {
1207 symbol_table
1208 .entry(entity.name.clone())
1209 .or_default()
1210 .push(entity.id.clone());
1211 }
1212 symbol_table
1213 }
1214
1215 fn resolve_entity_references(
1217 &mut self,
1218 entity: &SemanticEntity,
1219 symbol_table: &HashMap<String, Vec<String>>,
1220 ) {
1221 let refs = extract_references_from_content(&entity.content, &entity.name);
1222
1223 for ref_name in refs {
1224 if let Some(target_ids) = symbol_table.get(ref_name) {
1225 let target = target_ids
1226 .iter()
1227 .find(|id| {
1228 *id != &entity.id
1229 && self
1230 .entities
1231 .get(*id)
1232 .map_or(false, |e| e.file_path == entity.file_path)
1233 })
1234 .or_else(|| target_ids.iter().find(|id| *id != &entity.id));
1235
1236 if let Some(target_id) = target {
1237 let ref_type = infer_ref_type(&entity.content, &ref_name);
1238 self.edges.push(EntityRef {
1239 from_entity: entity.id.clone(),
1240 to_entity: target_id.clone(),
1241 ref_type,
1242 });
1243 self.dependents
1244 .entry(target_id.clone())
1245 .or_default()
1246 .push(entity.id.clone());
1247 self.dependencies
1248 .entry(entity.id.clone())
1249 .or_default()
1250 .push(target_id.clone());
1251 }
1252 }
1253 }
1254 }
1255}
1256
1257fn is_test_entity(entity: &crate::model::entity::SemanticEntity) -> bool {
1259 let name = &entity.name;
1260 let path = &entity.file_path;
1261 let content = &entity.content;
1262
1263 if name.starts_with("test_") || name.starts_with("Test") || name.ends_with("_test") || name.ends_with("Test") {
1265 return true;
1266 }
1267 if name.starts_with("it_") || name.starts_with("describe_") || name.starts_with("spec_") {
1268 return true;
1269 }
1270
1271 let path_lower = path.to_lowercase();
1273 let in_test_file = path_lower.contains("/test/")
1274 || path_lower.contains("/tests/")
1275 || path_lower.contains("/spec/")
1276 || path_lower.contains("_test.")
1277 || path_lower.contains(".test.")
1278 || path_lower.contains("_spec.")
1279 || path_lower.contains(".spec.");
1280
1281 let has_test_marker = content.contains("#[test]")
1283 || content.contains("#[cfg(test)]")
1284 || content.contains("@Test")
1285 || content.contains("@pytest")
1286 || content.contains("@test")
1287 || content.contains("describe(")
1288 || content.contains("it(")
1289 || content.contains("test(");
1290
1291 in_test_file && has_test_marker
1292}
1293
1294fn build_import_table(
1299 root: &Path,
1300 file_paths: &[String],
1301 symbol_table: &HashMap<String, Vec<String>>,
1302 entity_map: &HashMap<String, EntityInfo>,
1303 pre_parsed_content: Option<&[(String, String, tree_sitter::Tree)]>,
1304) -> HashMap<(String, String), String> {
1305 let content_map: HashMap<&str, &str> = pre_parsed_content
1307 .map(|files| {
1308 files.iter().map(|(fp, content, _)| (fp.as_str(), content.as_str())).collect()
1309 })
1310 .unwrap_or_default();
1311
1312 let per_file_imports: Vec<Vec<((String, String), String)>> = maybe_par_iter!(file_paths)
1317 .filter_map(|file_path| {
1318 if file_path.ends_with(".go") {
1320 return None;
1321 }
1322
1323 let owned_content: Option<String>;
1325 let content: &str = if let Some(c) = content_map.get(file_path.as_str()) {
1326 c
1327 } else {
1328 let full_path = root.join(file_path);
1329 owned_content = std::fs::read_to_string(&full_path).ok();
1330 match owned_content.as_deref() {
1331 Some(c) => c,
1332 None => return None,
1333 }
1334 };
1335
1336 let mut local_imports: Vec<((String, String), String)> = Vec::new();
1337
1338 let mut logical_lines: Vec<String> = Vec::new();
1341 let mut current_line = String::new();
1342 let mut in_parens = false;
1343
1344 for line in content.lines() {
1345 let trimmed = line.trim();
1346 if in_parens {
1347 let clean = trimmed.trim_end_matches(|c: char| c == ')' || c == ',');
1349 let clean = clean.split('#').next().unwrap_or(clean).trim();
1350 if !clean.is_empty() && clean != "(" {
1351 current_line.push_str(", ");
1352 current_line.push_str(clean);
1353 }
1354 if trimmed.contains(')') {
1355 in_parens = false;
1356 logical_lines.push(std::mem::take(&mut current_line));
1357 }
1358 } else if trimmed.starts_with("from ") && trimmed.contains(" import ") {
1359 if trimmed.contains('(') && !trimmed.contains(')') {
1360 in_parens = true;
1362 let before_paren = trimmed.split('(').next().unwrap_or(trimmed);
1364 current_line = before_paren.trim().to_string();
1365 if let Some(after) = trimmed.split('(').nth(1) {
1367 let after = after.trim().trim_end_matches(')').trim();
1368 if !after.is_empty() {
1369 current_line.push(' ');
1370 current_line.push_str(after);
1371 }
1372 }
1373 } else {
1374 logical_lines.push(trimmed.to_string());
1375 }
1376 }
1377 }
1378
1379 for logical_line in &logical_lines {
1380 if let Some(rest) = logical_line.strip_prefix("from ") {
1381 let import_match = rest.find(" import ")
1383 .map(|pos| (pos, 8))
1384 .or_else(|| rest.find(" import,").map(|pos| (pos, 8)));
1385 if let Some((import_pos, skip)) = import_match {
1386 let module_path = &rest[..import_pos];
1387 let names_str = &rest[import_pos + skip..];
1388
1389 let source_module = module_path
1390 .trim_start_matches('.')
1391 .rsplit('.')
1392 .next()
1393 .unwrap_or(module_path.trim_start_matches('.'));
1394
1395 for name_part in names_str.split(',') {
1396 let name_part = name_part.trim();
1397 let imported_name = name_part.split_whitespace().next().unwrap_or(name_part);
1398 let imported_name = imported_name.trim_matches(|c: char| c == '(' || c == ')' || c == ',');
1400 if imported_name.is_empty() {
1401 continue;
1402 }
1403
1404 if let Some(target_ids) = symbol_table.get(imported_name) {
1405 let target = target_ids.iter().find(|id| {
1406 entity_map.get(*id).map_or(false, |e| {
1407 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1408 let stem = stem.strip_suffix(".py")
1409 .or_else(|| stem.strip_suffix(".ts"))
1410 .or_else(|| stem.strip_suffix(".js"))
1411 .or_else(|| stem.strip_suffix(".rs"))
1412 .unwrap_or(stem);
1413 stem == source_module
1414 })
1415 });
1416 if let Some(target_id) = target {
1417 local_imports.push((
1418 (file_path.clone(), imported_name.to_string()),
1419 target_id.clone(),
1420 ));
1421 }
1422 }
1423 }
1424 }
1425 }
1426 }
1427
1428 let is_js_ts = file_path.ends_with(".js") || file_path.ends_with(".ts")
1431 || file_path.ends_with(".jsx") || file_path.ends_with(".tsx");
1432
1433 if is_js_ts {
1434 static JS_NAMED_RE: LazyLock<Regex> = LazyLock::new(|| {
1435 Regex::new(r#"import\s*\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#).unwrap()
1436 });
1437 static JS_DEFAULT_RE: LazyLock<Regex> = LazyLock::new(|| {
1438 Regex::new(r#"import\s+(?:type\s+)?([A-Za-z_]\w*)\s+from\s*['"]([^'"]+)['"]"#).unwrap()
1439 });
1440
1441 for cap in JS_NAMED_RE.captures_iter(content) {
1442 let names_str = cap.get(1).unwrap().as_str();
1443 let module_path = cap.get(2).unwrap().as_str();
1444 let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1445 let source_module = strip_js_ext(source_module);
1446
1447 for name_part in names_str.split(',') {
1448 let name_part = name_part.trim();
1449 if name_part.is_empty() { continue; }
1450
1451 let (original_name, local_name) = if let Some(pos) = name_part.find(" as ") {
1453 let orig = name_part[..pos].trim();
1454 let local = name_part[pos + 4..].trim();
1455 let orig = orig.strip_prefix("type ").unwrap_or(orig);
1456 (orig, local)
1457 } else {
1458 let name = name_part.strip_prefix("type ").unwrap_or(name_part);
1459 (name, name)
1460 };
1461
1462 if original_name.is_empty() || local_name.is_empty() { continue; }
1463
1464 if let Some(target_ids) = symbol_table.get(original_name) {
1465 let target = target_ids.iter().find(|id| {
1466 entity_map.get(*id).map_or(false, |e| {
1467 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1468 let stem = strip_file_ext(stem);
1469 stem == source_module
1470 })
1471 });
1472 if let Some(target_id) = target {
1473 local_imports.push((
1474 (file_path.clone(), local_name.to_string()),
1475 target_id.clone(),
1476 ));
1477 }
1478 }
1479 }
1480 }
1481
1482 for cap in JS_DEFAULT_RE.captures_iter(content) {
1483 let local_name = cap.get(1).unwrap().as_str();
1484 let module_path = cap.get(2).unwrap().as_str();
1485 let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1486 let source_module = strip_js_ext(source_module);
1487
1488 if let Some(target_ids) = symbol_table.get(local_name) {
1489 let target = target_ids.iter().find(|id| {
1490 entity_map.get(*id).map_or(false, |e| {
1491 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1492 let stem = strip_file_ext(stem);
1493 stem == source_module
1494 })
1495 });
1496 if let Some(target_id) = target {
1497 local_imports.push((
1498 (file_path.clone(), local_name.to_string()),
1499 target_id.clone(),
1500 ));
1501 }
1502 }
1503 }
1504 }
1505
1506 let is_rust = file_path.ends_with(".rs");
1509 if is_rust {
1510 static RUST_USE_SIMPLE_RE: LazyLock<Regex> = LazyLock::new(|| {
1511 Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*;").unwrap()
1515 });
1516 static RUST_USE_GROUP_RE: LazyLock<Regex> = LazyLock::new(|| {
1517 Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)::\{([^}]+)\}\s*;").unwrap()
1520 });
1521
1522 let mut local_import_table: HashMap<(String, String), String> = HashMap::new();
1524
1525 for cap in RUST_USE_SIMPLE_RE.captures_iter(content) {
1528 let full_path_str = cap.get(1).unwrap().as_str();
1529 let parts: Vec<&str> = full_path_str.split("::").collect();
1530 if parts.is_empty() { continue; }
1531
1532 let imported_name = parts[parts.len() - 1];
1534 let source_module = if parts.len() >= 2 {
1536 parts[parts.len() - 2]
1537 } else {
1538 parts[0]
1539 };
1540
1541 resolve_rust_import(
1542 file_path, imported_name, source_module,
1543 symbol_table, entity_map, &mut local_import_table,
1544 );
1545 }
1546
1547 for cap in RUST_USE_GROUP_RE.captures_iter(content) {
1548 let module_path = cap.get(1).unwrap().as_str();
1549 let names_str = cap.get(2).unwrap().as_str();
1550
1551 let source_module = module_path.rsplit("::").next().unwrap_or(module_path);
1553
1554 for name_part in names_str.split(',') {
1555 let name_part = name_part.trim();
1556 let (original, local) = if let Some(pos) = name_part.find(" as ") {
1558 (&name_part[..pos], name_part[pos + 4..].trim())
1559 } else {
1560 (name_part, name_part)
1561 };
1562 let original = original.trim();
1563 let local = local.trim();
1564 if original.is_empty() || local.is_empty() { continue; }
1565
1566 resolve_rust_import(
1567 file_path, original, source_module,
1568 symbol_table, entity_map, &mut local_import_table,
1569 );
1570 if local != original {
1572 if let Some(target) = local_import_table.get(&(file_path.clone(), original.to_string())).cloned() {
1573 local_import_table.insert(
1574 (file_path.clone(), local.to_string()),
1575 target,
1576 );
1577 }
1578 }
1579 }
1580 }
1581
1582 for (key, val) in local_import_table {
1584 local_imports.push((key, val));
1585 }
1586 }
1587
1588 Some(local_imports)
1592 })
1593 .collect();
1594
1595 let mut import_table: HashMap<(String, String), String> = HashMap::new();
1597 for local_imports in per_file_imports {
1598 for (key, val) in local_imports {
1599 import_table.insert(key, val);
1600 }
1601 }
1602
1603 import_table
1604}
1605
1606fn resolve_rust_import(
1609 file_path: &str,
1610 imported_name: &str,
1611 source_module: &str,
1612 symbol_table: &HashMap<String, Vec<String>>,
1613 entity_map: &HashMap<String, EntityInfo>,
1614 import_table: &mut HashMap<(String, String), String>,
1615) {
1616 if let Some(target_ids) = symbol_table.get(imported_name) {
1617 let target = target_ids.iter().find(|id| {
1618 entity_map.get(*id).map_or(false, |e| {
1619 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1620 let stem = strip_file_ext(stem);
1621 stem == source_module
1622 })
1623 });
1624 if let Some(target_id) = target {
1625 import_table.insert(
1626 (file_path.to_string(), imported_name.to_string()),
1627 target_id.clone(),
1628 );
1629 }
1630 }
1631}
1632
1633fn strip_js_ext(s: &str) -> &str {
1635 s.strip_suffix(".js")
1636 .or_else(|| s.strip_suffix(".ts"))
1637 .or_else(|| s.strip_suffix(".jsx"))
1638 .or_else(|| s.strip_suffix(".tsx"))
1639 .unwrap_or(s)
1640}
1641
1642fn strip_file_ext(s: &str) -> &str {
1644 s.strip_suffix(".py")
1645 .or_else(|| s.strip_suffix(".ts"))
1646 .or_else(|| s.strip_suffix(".js"))
1647 .or_else(|| s.strip_suffix(".tsx"))
1648 .or_else(|| s.strip_suffix(".jsx"))
1649 .or_else(|| s.strip_suffix(".rs"))
1650 .unwrap_or(s)
1651}
1652
1653fn strip_comments_and_strings(content: &str) -> String {
1656 let bytes = content.as_bytes();
1657 let len = bytes.len();
1658 let mut result = vec![b' '; len];
1659 let mut i = 0;
1660
1661 while i < len {
1662 if i + 2 < len && bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1664 i += 3;
1665 while i + 2 < len {
1666 if bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1667 i += 3;
1668 break;
1669 }
1670 i += 1;
1671 }
1672 continue;
1673 }
1674 if i + 2 < len && bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1675 i += 3;
1676 while i + 2 < len {
1677 if bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1678 i += 3;
1679 break;
1680 }
1681 i += 1;
1682 }
1683 continue;
1684 }
1685 if bytes[i] == b'"' {
1687 i += 1;
1688 while i < len {
1689 if bytes[i] == b'\\' { i += 2; continue; }
1690 if bytes[i] == b'"' { i += 1; break; }
1691 i += 1;
1692 }
1693 continue;
1694 }
1695 if bytes[i] == b'\'' {
1697 i += 1;
1698 while i < len {
1699 if bytes[i] == b'\\' { i += 2; continue; }
1700 if bytes[i] == b'\'' { i += 1; break; }
1701 i += 1;
1702 }
1703 continue;
1704 }
1705 if bytes[i] == b'#' {
1707 while i < len && bytes[i] != b'\n' { i += 1; }
1708 continue;
1709 }
1710 if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'/' {
1712 while i < len && bytes[i] != b'\n' { i += 1; }
1713 continue;
1714 }
1715 if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
1717 i += 2;
1718 while i + 1 < len {
1719 if bytes[i] == b'*' && bytes[i + 1] == b'/' { i += 2; break; }
1720 i += 1;
1721 }
1722 continue;
1723 }
1724 result[i] = bytes[i];
1726 i += 1;
1727 }
1728
1729 String::from_utf8_lossy(&result).into_owned()
1730}
1731
1732fn extract_dot_chains<'a>(content: &'a str) -> Vec<(&'a str, &'a str)> {
1735 static DOT_CHAIN_RE: LazyLock<Regex> = LazyLock::new(|| {
1736 Regex::new(r"\b([A-Za-z_]\w*)\.([A-Za-z_]\w*)").unwrap()
1737 });
1738
1739 let mut chains = Vec::new();
1740 let mut seen: HashSet<(&str, &str)> = HashSet::new();
1741 for cap in DOT_CHAIN_RE.captures_iter(content) {
1742 let receiver = cap.get(1).unwrap().as_str();
1743 let member = cap.get(2).unwrap().as_str();
1744 if seen.insert((receiver, member)) {
1745 chains.push((receiver, member));
1746 }
1747 }
1748 chains
1749}
1750
1751fn extract_references_from_content<'a>(content: &'a str, own_name: &str) -> Vec<&'a str> {
1755 let stripped = strip_comments_and_strings(content);
1756 extract_references_with_stripped(content, own_name, &stripped)
1757}
1758
1759fn extract_references_with_stripped<'a>(content: &'a str, own_name: &str, stripped: &str) -> Vec<&'a str> {
1763 let stripped_words: HashSet<&str> = stripped
1764 .split(|c: char| !c.is_alphanumeric() && c != '_')
1765 .filter(|w| !w.is_empty())
1766 .collect();
1767
1768 let mut refs = Vec::new();
1769 let mut seen: HashSet<&str> = HashSet::new();
1770
1771 for word in content.split(|c: char| !c.is_alphanumeric() && c != '_') {
1772 if word.is_empty() || word == own_name {
1773 continue;
1774 }
1775 if is_keyword(word) || word.len() < 2 {
1776 continue;
1777 }
1778 if word.starts_with(|c: char| c.is_lowercase()) && word.len() < 3 {
1780 continue;
1781 }
1782 if !word.starts_with(|c: char| c.is_alphabetic() || c == '_') {
1783 continue;
1784 }
1785 if is_common_local_name(word) {
1787 continue;
1788 }
1789 if !stripped_words.contains(word) {
1791 continue;
1792 }
1793 if seen.insert(word) {
1794 refs.push(word);
1795 }
1796 }
1797
1798 refs
1799}
1800
1801static COMMON_LOCAL_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1802 [
1803 "result", "results", "data", "config", "value", "values",
1804 "item", "items", "input", "output", "args", "opts",
1805 "name", "path", "file", "line", "count", "index",
1806 "temp", "prev", "next", "curr", "current", "node",
1807 "left", "right", "root", "head", "tail", "body",
1808 "text", "content", "source", "target", "entry",
1809 "error", "errors", "message", "response", "request",
1810 "context", "state", "props", "event", "handler",
1811 "callback", "options", "params", "query", "list",
1812 "base", "info", "meta", "kind", "mode", "flag",
1813 "size", "length", "width", "height", "start", "stop",
1814 "begin", "done", "found", "status", "code",
1815 ].into_iter().collect()
1816});
1817
1818fn is_common_local_name(word: &str) -> bool {
1821 COMMON_LOCAL_NAMES.contains(word)
1822}
1823
1824fn infer_ref_type(content: &str, ref_name: &str) -> RefType {
1826 let bytes = content.as_bytes();
1829 let name_bytes = ref_name.as_bytes();
1830 let mut search_start = 0;
1831 while let Some(rel_pos) = content[search_start..].find(ref_name) {
1832 let pos = search_start + rel_pos;
1833 let after = pos + name_bytes.len();
1834 if after < bytes.len() && bytes[after] == b'(' {
1836 let is_boundary = pos == 0 || {
1838 let prev = bytes[pos - 1];
1839 !prev.is_ascii_alphanumeric() && prev != b'_'
1840 };
1841 if is_boundary {
1842 return RefType::Calls;
1843 }
1844 }
1845 search_start = pos + 1;
1847 while search_start < content.len() && !content.is_char_boundary(search_start) {
1848 search_start += 1;
1849 }
1850 }
1851
1852 for line in content.lines() {
1854 let trimmed = line.trim();
1855 if (trimmed.starts_with("import ") || trimmed.starts_with("use ")
1856 || trimmed.starts_with("from ") || trimmed.starts_with("require("))
1857 && trimmed.contains(ref_name)
1858 {
1859 return RefType::Imports;
1860 }
1861 }
1862
1863 RefType::TypeRef
1865}
1866
1867static KEYWORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1868 [
1869 "if", "else", "for", "while", "do", "switch", "case", "break",
1871 "continue", "return", "try", "catch", "finally", "throw",
1872 "new", "delete", "typeof", "instanceof", "in", "of",
1873 "true", "false", "null", "undefined", "void", "this",
1874 "super", "class", "extends", "implements", "interface",
1875 "enum", "const", "let", "var", "function", "async",
1876 "await", "yield", "import", "export", "default", "from",
1877 "as", "static", "public", "private", "protected",
1878 "abstract", "final", "override",
1879 "fn", "pub", "mod", "use", "struct", "impl", "trait",
1881 "where", "type", "self", "Self", "mut", "ref", "match",
1882 "loop", "move", "unsafe", "extern", "crate", "dyn",
1883 "def", "elif", "except", "raise", "with",
1885 "pass", "lambda", "nonlocal", "global", "assert",
1886 "True", "False", "and", "or", "not", "is",
1887 "func", "package", "range", "select", "chan", "go",
1889 "defer", "map", "make", "append", "len", "cap",
1890 "auto", "register", "volatile", "sizeof", "typedef",
1892 "template", "typename", "namespace", "virtual", "inline",
1893 "constexpr", "nullptr", "noexcept", "explicit", "friend",
1894 "operator", "using", "cout", "endl", "cerr", "cin",
1895 "printf", "scanf", "malloc", "free", "NULL", "include",
1896 "ifdef", "ifndef", "endif", "define", "pragma",
1897 "end", "then", "elsif", "unless", "until",
1899 "begin", "rescue", "ensure", "when", "require",
1900 "attr_accessor", "attr_reader", "attr_writer",
1901 "puts", "nil", "module", "defined",
1902 "internal", "sealed", "readonly",
1904 "partial", "delegate", "event", "params", "out",
1905 "object", "decimal", "sbyte", "ushort", "uint",
1906 "ulong", "nint", "nuint", "dynamic",
1907 "get", "set", "value", "init", "record",
1908 "string", "number", "boolean", "int", "float", "double",
1910 "bool", "char", "byte", "i8", "i16", "i32", "i64",
1911 "u8", "u16", "u32", "u64", "f32", "f64", "usize",
1912 "isize", "str", "String", "Vec", "Option", "Result",
1913 "Box", "Arc", "Rc", "HashMap", "HashSet", "Some",
1914 "Ok", "Err",
1915 ].into_iter().collect()
1916});
1917
1918fn is_keyword(word: &str) -> bool {
1919 KEYWORDS.contains(word)
1920}
1921
1922#[cfg(test)]
1923mod tests {
1924 use super::*;
1925 use crate::git::types::{FileChange, FileStatus};
1926 use std::io::Write;
1927 use tempfile::TempDir;
1928
1929 fn create_test_repo() -> (TempDir, ParserRegistry) {
1930 let dir = TempDir::new().unwrap();
1931 let registry = crate::parser::plugins::create_default_registry();
1932 (dir, registry)
1933 }
1934
1935 fn write_file(dir: &Path, name: &str, content: &str) {
1936 let path = dir.join(name);
1937 if let Some(parent) = path.parent() {
1938 std::fs::create_dir_all(parent).unwrap();
1939 }
1940 let mut f = std::fs::File::create(path).unwrap();
1941 f.write_all(content.as_bytes()).unwrap();
1942 }
1943
1944 #[test]
1945 fn test_incremental_add_file() {
1946 let (dir, registry) = create_test_repo();
1947 let root = dir.path();
1948
1949 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1951 write_file(root, "b.ts", "export function bar() { return 1; }\n");
1952
1953 let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
1954 assert_eq!(graph.entities.len(), 2);
1955
1956 write_file(root, "c.ts", "export function baz() { return foo(); }\n");
1958 graph.update_from_changes(
1959 &[FileChange {
1960 file_path: "c.ts".into(),
1961 status: FileStatus::Added,
1962 old_file_path: None,
1963 before_content: None,
1964 after_content: None, }],
1966 root,
1967 ®istry,
1968 );
1969
1970 assert_eq!(graph.entities.len(), 3);
1971 assert!(graph.entities.contains_key("c.ts::function::baz"));
1972 let baz_deps = graph.get_dependencies("c.ts::function::baz");
1974 assert!(
1975 baz_deps.iter().any(|d| d.name == "foo"),
1976 "baz should depend on foo. Deps: {:?}",
1977 baz_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1978 );
1979 }
1980
1981 #[test]
1982 fn test_incremental_delete_file() {
1983 let (dir, registry) = create_test_repo();
1984 let root = dir.path();
1985
1986 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1987 write_file(root, "b.ts", "export function bar() { return 1; }\n");
1988
1989 let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
1990 assert_eq!(graph.entities.len(), 2);
1991
1992 graph.update_from_changes(
1994 &[FileChange {
1995 file_path: "b.ts".into(),
1996 status: FileStatus::Deleted,
1997 old_file_path: None,
1998 before_content: None,
1999 after_content: None,
2000 }],
2001 root,
2002 ®istry,
2003 );
2004
2005 assert_eq!(graph.entities.len(), 1);
2006 assert!(!graph.entities.contains_key("b.ts::function::bar"));
2007 let foo_deps = graph.get_dependencies("a.ts::function::foo");
2009 assert!(
2010 foo_deps.is_empty(),
2011 "foo's deps should be empty after bar deleted. Deps: {:?}",
2012 foo_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2013 );
2014 }
2015
2016 #[test]
2017 fn test_incremental_modify_file() {
2018 let (dir, registry) = create_test_repo();
2019 let root = dir.path();
2020
2021 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
2022 write_file(root, "b.ts", "export function bar() { return 1; }\nexport function baz() { return 2; }\n");
2023
2024 let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
2025 assert_eq!(graph.entities.len(), 3);
2026
2027 write_file(root, "a.ts", "export function foo() { return baz(); }\n");
2029 graph.update_from_changes(
2030 &[FileChange {
2031 file_path: "a.ts".into(),
2032 status: FileStatus::Modified,
2033 old_file_path: None,
2034 before_content: None,
2035 after_content: None,
2036 }],
2037 root,
2038 ®istry,
2039 );
2040
2041 assert_eq!(graph.entities.len(), 3);
2042 let foo_deps = graph.get_dependencies("a.ts::function::foo");
2044 let dep_names: Vec<&str> = foo_deps.iter().map(|d| d.name.as_str()).collect();
2045 assert!(dep_names.contains(&"baz"), "foo should depend on baz after modification. Deps: {:?}", dep_names);
2046 assert!(!dep_names.contains(&"bar"), "foo should no longer depend on bar. Deps: {:?}", dep_names);
2047 }
2048
2049 #[test]
2050 fn test_incremental_with_content() {
2051 let (dir, registry) = create_test_repo();
2052 let root = dir.path();
2053
2054 write_file(root, "a.ts", "export function foo() { return 1; }\n");
2055 let (mut graph, _) = EntityGraph::build(root, &["a.ts".into()], ®istry);
2056 assert_eq!(graph.entities.len(), 1);
2057
2058 graph.update_from_changes(
2060 &[FileChange {
2061 file_path: "b.ts".into(),
2062 status: FileStatus::Added,
2063 old_file_path: None,
2064 before_content: None,
2065 after_content: Some("export function bar() { return foo(); }\n".into()),
2066 }],
2067 root,
2068 ®istry,
2069 );
2070
2071 assert_eq!(graph.entities.len(), 2);
2072 let bar_deps = graph.get_dependencies("b.ts::function::bar");
2073 assert!(bar_deps.iter().any(|d| d.name == "foo"));
2074 }
2075
2076 #[test]
2077 fn test_extract_references() {
2078 let content = "function processData(input) {\n const result = validateInput(input);\n return transform(result);\n}";
2079 let refs = extract_references_from_content(content, "processData");
2080 assert!(refs.contains(&"validateInput"));
2081 assert!(refs.contains(&"transform"));
2082 assert!(!refs.contains(&"processData")); }
2084
2085 #[test]
2086 fn test_extract_references_skips_keywords() {
2087 let content = "function foo() { if (true) { return false; } }";
2088 let refs = extract_references_from_content(content, "foo");
2089 assert!(!refs.contains(&"if"));
2090 assert!(!refs.contains(&"true"));
2091 assert!(!refs.contains(&"return"));
2092 assert!(!refs.contains(&"false"));
2093 }
2094
2095 #[test]
2096 fn test_infer_ref_type_call() {
2097 assert_eq!(
2098 infer_ref_type("validateInput(data)", "validateInput"),
2099 RefType::Calls,
2100 );
2101 }
2102
2103 #[test]
2104 fn test_infer_ref_type_type() {
2105 assert_eq!(
2106 infer_ref_type("let x: MyType = something", "MyType"),
2107 RefType::TypeRef,
2108 );
2109 }
2110
2111 #[test]
2112 fn test_infer_ref_type_multibyte_utf8() {
2113 assert_eq!(
2115 infer_ref_type("let café = foo(x)", "foo"),
2116 RefType::Calls,
2117 );
2118 assert_eq!(
2119 infer_ref_type("class HandicapfrPublicationFieldsEnum:\n É = 1\n bar()", "bar"),
2120 RefType::Calls,
2121 );
2122 assert_eq!(
2124 infer_ref_type("// 日本語コメント\nlet x = 1", "missing"),
2125 RefType::TypeRef,
2126 );
2127 }
2128
2129 #[test]
2130 fn test_dot_chain_self_resolution() {
2131 let (dir, registry) = create_test_repo();
2132 let root = dir.path();
2133
2134 write_file(root, "service.py", "\
2135class MyService:
2136 def process(self):
2137 return self.validate()
2138
2139 def validate(self):
2140 return True
2141");
2142
2143 let (graph, _) = EntityGraph::build(root, &["service.py".into()], ®istry);
2144
2145 let process_id = graph.entities.keys()
2147 .find(|id| id.contains("process"))
2148 .expect("process entity should exist");
2149 let deps = graph.get_dependencies(process_id);
2150 assert!(
2151 deps.iter().any(|d| d.name == "validate"),
2152 "process should depend on validate via self.validate(). Deps: {:?}",
2153 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2154 );
2155 }
2156
2157 #[test]
2158 fn test_dot_chain_this_resolution() {
2159 let (dir, registry) = create_test_repo();
2160 let root = dir.path();
2161
2162 write_file(root, "service.ts", "\
2163class UserService {
2164 process() {
2165 return this.validate();
2166 }
2167 validate() {
2168 return true;
2169 }
2170}
2171");
2172
2173 let (graph, _) = EntityGraph::build(root, &["service.ts".into()], ®istry);
2174
2175 let process_id = graph.entities.keys()
2176 .find(|id| id.contains("process"))
2177 .expect("process entity should exist");
2178 let deps = graph.get_dependencies(process_id);
2179 assert!(
2180 deps.iter().any(|d| d.name == "validate"),
2181 "process should depend on validate via this.validate(). Deps: {:?}",
2182 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2183 );
2184 }
2185
2186 #[test]
2187 fn test_dot_chain_class_static() {
2188 let (dir, registry) = create_test_repo();
2189 let root = dir.path();
2190
2191 write_file(root, "utils.ts", "\
2192class MathUtils {
2193 static compute() { return 1; }
2194}
2195function caller() { return MathUtils.compute(); }
2196");
2197
2198 let (graph, _) = EntityGraph::build(root, &["utils.ts".into()], ®istry);
2199
2200 let caller_id = graph.entities.keys()
2201 .find(|id| id.contains("caller"))
2202 .expect("caller entity should exist");
2203 let deps = graph.get_dependencies(caller_id);
2204 assert!(
2205 deps.iter().any(|d| d.name == "compute"),
2206 "caller should depend on compute via MathUtils.compute(). Deps: {:?}",
2207 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2208 );
2209 }
2210
2211 #[test]
2212 fn test_js_ts_import_resolution() {
2213 let (dir, registry) = create_test_repo();
2214 let root = dir.path();
2215
2216 write_file(root, "helper.ts", "\
2217export function helper() { return 1; }
2218");
2219 write_file(root, "main.ts", "\
2220import { helper } from './helper';
2221export function main() { return helper(); }
2222");
2223
2224 let (graph, _) = EntityGraph::build(
2225 root,
2226 &["helper.ts".into(), "main.ts".into()],
2227 ®istry,
2228 );
2229
2230 let main_id = graph.entities.keys()
2231 .find(|id| id.contains("main"))
2232 .expect("main entity should exist");
2233 let deps = graph.get_dependencies(main_id);
2234 assert!(
2235 deps.iter().any(|d| d.name == "helper"),
2236 "main should depend on helper via JS import. Deps: {:?}",
2237 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2238 );
2239 }
2240
2241 #[test]
2242 fn test_dot_chain_no_false_edges() {
2243 let (dir, registry) = create_test_repo();
2244 let root = dir.path();
2245
2246 write_file(root, "a.py", "\
2249class ClassA:
2250 def run(self):
2251 return self.process()
2252
2253 def process(self):
2254 return 1
2255");
2256 write_file(root, "b.py", "\
2257class ClassB:
2258 def process(self):
2259 return 2
2260");
2261
2262 let (graph, _) = EntityGraph::build(
2263 root,
2264 &["a.py".into(), "b.py".into()],
2265 ®istry,
2266 );
2267
2268 let run_id = graph.entities.keys()
2269 .find(|id| id.contains("run"))
2270 .expect("run entity should exist");
2271 let deps = graph.get_dependencies(run_id);
2272 for dep in &deps {
2274 if dep.name == "process" {
2275 assert!(
2276 dep.file_path == "a.py",
2277 "run's process dep should be in a.py, not {}",
2278 dep.file_path
2279 );
2280 }
2281 }
2282 }
2283
2284 #[test]
2285 fn test_dot_chain_fallback() {
2286 let (dir, registry) = create_test_repo();
2287 let root = dir.path();
2288
2289 write_file(root, "app.ts", "\
2293export function helper() { return 1; }
2294export function caller() {
2295 const val = helper();
2296 return val;
2297}
2298");
2299
2300 let (graph, _) = EntityGraph::build(root, &["app.ts".into()], ®istry);
2301
2302 let caller_id = graph.entities.keys()
2303 .find(|id| id.contains("caller"))
2304 .expect("caller entity should exist");
2305 let deps = graph.get_dependencies(caller_id);
2306 assert!(
2307 deps.iter().any(|d| d.name == "helper"),
2308 "caller should still resolve helper via bag-of-words. Deps: {:?}",
2309 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2310 );
2311 }
2312
2313}