1use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::{Arc, LazyLock};
13
14use rayon::prelude::*;
15use regex::Regex;
16use serde::{Deserialize, Serialize};
17
18use crate::git::types::{FileChange, FileStatus};
19use crate::model::entity::SemanticEntity;
20use crate::parser::registry::ParserRegistry;
21use crate::parser::scope_resolve;
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
25#[serde(rename_all = "camelCase")]
26pub struct EntityRef {
27 pub from_entity: String,
28 pub to_entity: String,
29 pub ref_type: RefType,
30}
31
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34#[serde(rename_all = "lowercase")]
35pub enum RefType {
36 Calls,
38 TypeRef,
40 Imports,
42}
43
44#[derive(Debug)]
46pub struct EntityGraph {
47 pub entities: HashMap<String, EntityInfo>,
49 pub edges: Vec<EntityRef>,
51 pub dependents: HashMap<String, Vec<String>>,
53 pub dependencies: HashMap<String, Vec<String>>,
55}
56
57#[derive(Debug, Clone, Serialize, Deserialize)]
59#[serde(rename_all = "camelCase")]
60pub struct EntityInfo {
61 pub id: String,
62 pub name: String,
63 pub entity_type: String,
64 pub file_path: String,
65 #[serde(skip_serializing_if = "Option::is_none")]
66 pub parent_id: Option<String>,
67 pub start_line: usize,
68 pub end_line: usize,
69}
70
71impl EntityGraph {
72 pub fn from_parts(entities: HashMap<String, EntityInfo>, edges: Vec<EntityRef>) -> Self {
74 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
75 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
76 for edge in &edges {
77 dependents
78 .entry(edge.to_entity.clone())
79 .or_default()
80 .push(edge.from_entity.clone());
81 dependencies
82 .entry(edge.from_entity.clone())
83 .or_default()
84 .push(edge.to_entity.clone());
85 }
86 EntityGraph {
87 entities,
88 edges,
89 dependents,
90 dependencies,
91 }
92 }
93
94 pub fn build(
100 root: &Path,
101 file_paths: &[String],
102 registry: &ParserRegistry,
103 ) -> (Self, Vec<SemanticEntity>) {
104 let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = file_paths
107 .par_iter()
108 .filter_map(|file_path| {
109 let full_path = root.join(file_path);
110 let content = std::fs::read_to_string(&full_path).ok()?;
111 let (entities, tree) = registry.extract_entities_with_tree(file_path, &content)?;
112 let parsed = tree.map(|t| (file_path.clone(), content, t));
113 Some((entities, parsed))
114 })
115 .collect();
116
117 let mut all_entities: Vec<SemanticEntity> = Vec::new();
118 let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
119 for (entities, parsed) in per_file {
120 all_entities.extend(entities);
121 if let Some(p) = parsed {
122 parsed_files.push(p);
123 }
124 }
125
126 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
129 let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
130 let mut parent_child_pairs: HashSet<(&str, &str)> = HashSet::new();
131 let mut class_child_names: HashSet<(&str, &str)> = HashSet::new();
132 let mut class_entity_names: HashSet<&str> = HashSet::new();
133 let mut id_to_name: HashMap<&str, &str> = HashMap::with_capacity(all_entities.len());
134 let mut scope_entity_ranges: HashMap<String, Vec<(usize, usize, String)>> = HashMap::new();
135
136 for entity in &all_entities {
137 symbol_table
138 .entry(entity.name.clone())
139 .or_default()
140 .push(entity.id.clone());
141
142 entity_map.insert(
143 entity.id.clone(),
144 EntityInfo {
145 id: entity.id.clone(),
146 name: entity.name.clone(),
147 entity_type: entity.entity_type.clone(),
148 file_path: entity.file_path.clone(),
149 parent_id: entity.parent_id.clone(),
150 start_line: entity.start_line,
151 end_line: entity.end_line,
152 },
153 );
154
155 if let Some(ref pid) = entity.parent_id {
156 parent_child_pairs.insert((pid.as_str(), entity.id.as_str()));
157 class_child_names.insert((pid.as_str(), entity.name.as_str()));
158 }
159
160 if matches!(entity.entity_type.as_str(), "class" | "struct" | "interface" | "class_type") {
161 class_entity_names.insert(entity.name.as_str());
162 }
163
164 id_to_name.insert(entity.id.as_str(), entity.name.as_str());
165
166 scope_entity_ranges.entry(entity.file_path.clone()).or_default()
167 .push((entity.start_line, entity.end_line, entity.id.clone()));
168 }
169
170 let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
173 let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
174 let mut scope_class_members: HashMap<String, Vec<(String, String)>> = HashMap::new();
175
176 for entity in &all_entities {
177 if let Some(ref pid) = entity.parent_id {
178 if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
179 if class_entity_names.contains(parent_name) {
180 enclosing_class.insert(entity.id.as_str(), parent_name);
181 class_members
182 .entry(parent_name)
183 .or_default()
184 .push((entity.name.as_str(), entity.id.as_str()));
185 }
186 }
187 if let Some(parent) = entity_map.get(pid.as_str()) {
189 if matches!(parent.entity_type.as_str(), "class" | "struct" | "interface" | "impl") {
190 scope_class_members.entry(parent.name.clone()).or_default()
191 .push((entity.name.clone(), entity.id.clone()));
192 }
193 }
194 }
195 if entity.entity_type == "method" && entity.file_path.ends_with(".go") {
197 if let Some(struct_name) = scope_resolve::extract_go_receiver_type(&entity.content) {
198 scope_class_members.entry(struct_name).or_default()
199 .push((entity.name.clone(), entity.id.clone()));
200 }
201 }
202 }
203
204 let import_table = build_import_table(root, file_paths, &symbol_table, &entity_map, Some(&parsed_files));
207 let owned_go_pkg_index: HashMap<String, Vec<(String, String)>> = if file_paths.iter().any(|f| f.ends_with(".go")) {
209 let mut idx: HashMap<String, Vec<(String, String)>> = HashMap::new();
210 for (name, target_ids) in symbol_table.iter() {
211 for target_id in target_ids {
212 if let Some(entity) = entity_map.get(target_id) {
213 let file_stem = entity.file_path.rsplit('/').next().unwrap_or(&entity.file_path);
214 let file_stem = strip_file_ext(file_stem);
215 idx.entry(file_stem.to_string())
216 .or_default()
217 .push((name.clone(), target_id.clone()));
218 if let Some(parent_start) = entity.file_path.rfind('/') {
219 let parent_path = &entity.file_path[..parent_start];
220 if let Some(dir_name_start) = parent_path.rfind('/') {
221 let dir_name = &parent_path[dir_name_start + 1..];
222 if dir_name != file_stem {
223 idx.entry(dir_name.to_string())
224 .or_default()
225 .push((name.clone(), target_id.clone()));
226 }
227 } else if !parent_path.is_empty() && parent_path != file_stem {
228 idx.entry(parent_path.to_string())
229 .or_default()
230 .push((name.clone(), target_id.clone()));
231 }
232 }
233 }
234 }
235 }
236 idx
237 } else {
238 HashMap::new()
239 };
240
241 let symbol_table = Arc::new(symbol_table);
243
244 let pre_built = scope_resolve::PreBuiltLookups {
245 symbol_table: Arc::clone(&symbol_table),
246 class_members: scope_class_members,
247 entity_ranges: scope_entity_ranges,
248 go_pkg_index: owned_go_pkg_index,
249 };
250
251 let has_scope_lang = file_paths.iter().any(|f| {
253 let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
254 crate::parser::plugins::code::languages::get_language_config(ext)
255 .and_then(|c| c.scope_resolve)
256 .is_some()
257 });
258 let (scope_edges, scope_resolved_entities) = if has_scope_lang {
259 let result = scope_resolve::resolve_with_scopes_full(root, file_paths, &all_entities, &entity_map, Some(parsed_files), Some(pre_built));
260 let resolved_entity_ids: HashSet<String> = result.edges.iter()
261 .map(|(from, _, _)| from.clone())
262 .collect();
263 (result.edges, resolved_entity_ids)
264 } else {
265 (vec![], HashSet::new())
266 };
267
268 let resolved_refs: Vec<(String, String, RefType)> = all_entities
274 .par_iter()
275 .flat_map(|entity| {
276 if scope_resolved_entities.contains(&entity.id) {
278 return vec![];
279 }
280
281 let ext = entity.file_path.rfind('.').map(|i| &entity.file_path[i..]).unwrap_or("");
284 if crate::parser::plugins::code::languages::get_language_config(ext).is_none() {
285 return vec![];
286 }
287
288 let mut entity_edges = Vec::new();
289 let mut consumed_words: HashSet<String> = HashSet::new();
290
291 let stripped = strip_comments_and_strings(&entity.content);
293
294 let dot_chains = extract_dot_chains(&stripped);
296
297 for (receiver, member) in &dot_chains {
298 if *receiver == "self" || *receiver == "this" {
299 if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
301 if let Some(members) = class_members.get(class_name) {
302 for (n, tid) in members {
303 if *n == *member && *tid != entity.id.as_str() {
304 entity_edges.push((
305 entity.id.clone(),
306 tid.to_string(),
307 RefType::Calls,
308 ));
309 consumed_words.insert(member.to_string());
310 break;
311 }
312 }
313 }
314 }
315 } else if class_entity_names.contains(*receiver) {
316 if let Some(members) = class_members.get(*receiver) {
318 for (n, tid) in members {
319 if *n == *member {
320 entity_edges.push((
321 entity.id.clone(),
322 tid.to_string(),
323 RefType::Calls,
324 ));
325 consumed_words.insert(member.to_string());
326 consumed_words.insert(receiver.to_string());
327 break;
328 }
329 }
330 }
331 }
332 }
334
335 let refs = extract_references_with_stripped(&entity.content, &entity.name, &stripped);
338 for ref_name in refs {
339 if consumed_words.contains(ref_name) {
340 continue;
341 }
342
343 if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
345 continue;
346 }
347
348 let import_key = (entity.file_path.clone(), ref_name.to_string());
351 if let Some(import_target_id) = import_table.get(&import_key) {
352 if import_target_id != &entity.id
353 && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
354 && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
355 {
356 let ref_type = infer_ref_type(&entity.content, &ref_name);
357 entity_edges.push((
358 entity.id.clone(),
359 import_target_id.clone(),
360 ref_type,
361 ));
362 }
363 continue;
364 }
365
366 if let Some(target_ids) = symbol_table.get(ref_name) {
367 let target = target_ids
370 .iter()
371 .find(|id| {
372 *id != &entity.id
373 && entity_map
374 .get(*id)
375 .map_or(false, |e| e.file_path == entity.file_path)
376 });
377
378 if let Some(target_id) = target {
379 if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
381 || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
382 {
383 continue;
384 }
385 let ref_type = infer_ref_type(&entity.content, &ref_name);
386 entity_edges.push((
387 entity.id.clone(),
388 target_id.clone(),
389 ref_type,
390 ));
391 }
392 }
393 }
394 entity_edges
395 })
396 .collect();
397
398 let mut combined: Vec<(String, String, RefType)> = scope_edges;
400 combined.extend(resolved_refs);
401 let mut seen_edges: HashSet<(String, String)> = HashSet::with_capacity(combined.len());
402 let mut all_resolved: Vec<(String, String, RefType)> = Vec::with_capacity(combined.len());
403 for edge in combined {
404 if seen_edges.insert((edge.0.clone(), edge.1.clone())) {
405 all_resolved.push(edge);
406 }
407 }
408
409 let mut edges: Vec<EntityRef> = Vec::with_capacity(all_resolved.len());
411 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
412 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
413
414 for (from_entity, to_entity, ref_type) in all_resolved {
415 dependents
416 .entry(to_entity.clone())
417 .or_default()
418 .push(from_entity.clone());
419 dependencies
420 .entry(from_entity.clone())
421 .or_default()
422 .push(to_entity.clone());
423 edges.push(EntityRef {
424 from_entity,
425 to_entity,
426 ref_type,
427 });
428 }
429
430 let graph = EntityGraph {
431 entities: entity_map,
432 edges,
433 dependents,
434 dependencies,
435 };
436
437 (graph, all_entities)
438 }
439
440 pub fn build_incremental(
446 root: &Path,
447 stale_files: &[String],
448 all_file_paths: &[String],
449 cached_entities: Vec<SemanticEntity>,
450 cached_edges: Vec<EntityRef>,
451 stale_file_cached_entities: Vec<SemanticEntity>,
452 registry: &ParserRegistry,
453 ) -> (Self, Vec<SemanticEntity>) {
454 let stale_set: HashSet<&str> = stale_files.iter().map(|s| s.as_str()).collect();
456
457 let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = stale_files
459 .par_iter()
460 .filter_map(|file_path| {
461 let full_path = root.join(file_path);
462 let content = std::fs::read_to_string(&full_path).ok()?;
463 let (entities, tree) = registry.extract_entities_with_tree(file_path, &content)?;
464 let parsed = tree.map(|t| (file_path.clone(), content, t));
465 Some((entities, parsed))
466 })
467 .collect();
468
469 let mut new_entities: Vec<SemanticEntity> = Vec::new();
470 let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
471 for (entities, parsed) in per_file {
472 new_entities.extend(entities);
473 if let Some(p) = parsed {
474 parsed_files.push(p);
475 }
476 }
477
478 let cached_hashes: HashMap<&str, &str> = stale_file_cached_entities
481 .iter()
482 .map(|e| (e.id.as_str(), e.content_hash.as_str()))
483 .collect();
484
485 let mut truly_changed_ids: HashSet<String> = HashSet::new();
487 let mut content_clean_ids: HashSet<String> = HashSet::new();
488 for entity in &new_entities {
489 match cached_hashes.get(entity.id.as_str()) {
490 Some(old_hash) if *old_hash == entity.content_hash.as_str() => {
491 content_clean_ids.insert(entity.id.clone());
492 }
493 _ => {
494 truly_changed_ids.insert(entity.id.clone());
496 }
497 }
498 }
499
500 let new_entity_ids: HashSet<&str> = new_entities.iter().map(|e| e.id.as_str()).collect();
502 let deleted_ids: HashSet<&str> = stale_file_cached_entities
503 .iter()
504 .filter(|e| !new_entity_ids.contains(e.id.as_str()))
505 .map(|e| e.id.as_str())
506 .collect();
507
508 let all_entities: Vec<SemanticEntity> = cached_entities
510 .into_iter()
511 .chain(new_entities.into_iter())
512 .collect();
513
514 let mut affected_clean_ids: HashSet<String> = HashSet::new();
516 for edge in &cached_edges {
517 let to_truly_changed = truly_changed_ids.contains(&edge.to_entity)
518 || deleted_ids.contains(edge.to_entity.as_str());
519 if to_truly_changed && !stale_set.contains(
520 all_entities.iter()
521 .find(|e| e.id == edge.from_entity)
522 .map(|e| e.file_path.as_str())
523 .unwrap_or("")
524 ) {
525 affected_clean_ids.insert(edge.from_entity.clone());
526 }
527 }
528
529 let stale_entity_ids: HashSet<&str> = all_entities
531 .iter()
532 .filter(|e| stale_set.contains(e.file_path.as_str()))
533 .map(|e| e.id.as_str())
534 .collect();
535
536 let kept_edges: Vec<EntityRef> = cached_edges
540 .into_iter()
541 .filter(|e| {
542 let from_stale = stale_entity_ids.contains(e.from_entity.as_str());
543 let to_stale = stale_entity_ids.contains(e.to_entity.as_str());
544
545 if !from_stale && !to_stale && !affected_clean_ids.contains(&e.from_entity) {
546 return true;
548 }
549 if content_clean_ids.contains(&e.from_entity)
550 && !truly_changed_ids.contains(&e.to_entity)
551 && !deleted_ids.contains(e.to_entity.as_str())
552 && !affected_clean_ids.contains(&e.from_entity)
553 {
554 return true;
556 }
557 false
558 })
559 .collect();
560
561 let needs_resolution: HashSet<&str> = all_entities
564 .iter()
565 .filter(|e| {
566 truly_changed_ids.contains(&e.id)
567 || affected_clean_ids.contains(&e.id)
568 })
569 .map(|e| e.id.as_str())
570 .collect();
571
572 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
577 let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
578
579 for entity in &all_entities {
580 symbol_table
581 .entry(entity.name.clone())
582 .or_default()
583 .push(entity.id.clone());
584 entity_map.insert(
585 entity.id.clone(),
586 EntityInfo {
587 id: entity.id.clone(),
588 name: entity.name.clone(),
589 entity_type: entity.entity_type.clone(),
590 file_path: entity.file_path.clone(),
591 parent_id: entity.parent_id.clone(),
592 start_line: entity.start_line,
593 end_line: entity.end_line,
594 },
595 );
596 }
597
598 let parent_child_pairs: HashSet<(&str, &str)> = all_entities
600 .iter()
601 .filter_map(|e| {
602 e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
603 })
604 .collect();
605
606 let class_child_names: HashSet<(&str, &str)> = all_entities
607 .iter()
608 .filter_map(|e| {
609 e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
610 })
611 .collect();
612
613 let class_entity_names: HashSet<&str> = all_entities
614 .iter()
615 .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
616 .map(|e| e.name.as_str())
617 .collect();
618
619 let id_to_name: HashMap<&str, &str> = all_entities
620 .iter()
621 .map(|e| (e.id.as_str(), e.name.as_str()))
622 .collect();
623
624 let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
625 let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
626
627 for entity in &all_entities {
628 if let Some(ref pid) = entity.parent_id {
629 if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
630 if class_entity_names.contains(parent_name) {
631 enclosing_class.insert(entity.id.as_str(), parent_name);
632 class_members
633 .entry(parent_name)
634 .or_default()
635 .push((entity.name.as_str(), entity.id.as_str()));
636 }
637 }
638 }
639 }
640
641 let import_table = build_import_table(root, all_file_paths, &symbol_table, &entity_map, Some(&parsed_files));
643
644 let resolve_file_paths: Vec<String> = all_file_paths
646 .iter()
647 .filter(|f| {
648 stale_set.contains(f.as_str()) || all_entities.iter().any(|e| {
650 e.file_path == **f && affected_clean_ids.contains(&e.id)
651 })
652 })
653 .cloned()
654 .collect();
655
656 let has_scope_lang = resolve_file_paths.iter().any(|f| {
657 let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
658 crate::parser::plugins::code::languages::get_language_config(ext)
659 .and_then(|c| c.scope_resolve)
660 .is_some()
661 });
662 let (scope_edges, scope_resolved_entities) = if has_scope_lang {
663 let resolve_set: HashSet<&str> = resolve_file_paths.iter().map(|s| s.as_str()).collect();
665 let relevant_parsed: Vec<(String, String, tree_sitter::Tree)> = parsed_files
666 .into_iter()
667 .filter(|(fp, _, _)| resolve_set.contains(fp.as_str()))
668 .collect();
669 let pre = if relevant_parsed.is_empty() { None } else { Some(relevant_parsed) };
670 let result = scope_resolve::resolve_with_scopes_full(root, &resolve_file_paths, &all_entities, &entity_map, pre, None);
671 let resolved_entity_ids: HashSet<String> = result.edges.iter()
672 .map(|(from, _, _)| from.clone())
673 .collect();
674 (result.edges, resolved_entity_ids)
675 } else {
676 (vec![], HashSet::new())
677 };
678
679 let resolved_refs: Vec<(String, String, RefType)> = all_entities
681 .par_iter()
682 .filter(|e| needs_resolution.contains(e.id.as_str()))
683 .flat_map(|entity| {
684 if scope_resolved_entities.contains(&entity.id) {
685 return vec![];
686 }
687
688 let ext = entity.file_path.rfind('.').map(|i| &entity.file_path[i..]).unwrap_or("");
690 if crate::parser::plugins::code::languages::get_language_config(ext).is_none() {
691 return vec![];
692 }
693
694 let mut entity_edges = Vec::new();
695 let mut consumed_words: HashSet<String> = HashSet::new();
696
697 let stripped = strip_comments_and_strings(&entity.content);
699
700 let dot_chains = extract_dot_chains(&stripped);
702
703 for (receiver, member) in &dot_chains {
704 if *receiver == "self" || *receiver == "this" {
705 if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
706 if let Some(members) = class_members.get(class_name) {
707 for (n, tid) in members {
708 if *n == *member && *tid != entity.id.as_str() {
709 entity_edges.push((
710 entity.id.clone(),
711 tid.to_string(),
712 RefType::Calls,
713 ));
714 consumed_words.insert(member.to_string());
715 break;
716 }
717 }
718 }
719 }
720 } else if class_entity_names.contains(*receiver) {
721 if let Some(members) = class_members.get(*receiver) {
722 for (n, tid) in members {
723 if *n == *member {
724 entity_edges.push((
725 entity.id.clone(),
726 tid.to_string(),
727 RefType::Calls,
728 ));
729 consumed_words.insert(member.to_string());
730 consumed_words.insert(receiver.to_string());
731 break;
732 }
733 }
734 }
735 }
736 }
737
738 let refs = extract_references_with_stripped(&entity.content, &entity.name, &stripped);
740 for ref_name in refs {
741 if consumed_words.contains(ref_name) {
742 continue;
743 }
744 if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
745 continue;
746 }
747
748 let import_key = (entity.file_path.clone(), ref_name.to_string());
749 if let Some(import_target_id) = import_table.get(&import_key) {
750 if import_target_id != &entity.id
751 && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
752 && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
753 {
754 let ref_type = infer_ref_type(&entity.content, &ref_name);
755 entity_edges.push((
756 entity.id.clone(),
757 import_target_id.clone(),
758 ref_type,
759 ));
760 }
761 continue;
762 }
763
764 if let Some(target_ids) = symbol_table.get(ref_name) {
765 let target = target_ids
766 .iter()
767 .find(|id| {
768 *id != &entity.id
769 && entity_map
770 .get(*id)
771 .map_or(false, |e| e.file_path == entity.file_path)
772 });
773
774 if let Some(target_id) = target {
775 if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
776 || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
777 {
778 continue;
779 }
780 let ref_type = infer_ref_type(&entity.content, &ref_name);
781 entity_edges.push((
782 entity.id.clone(),
783 target_id.clone(),
784 ref_type,
785 ));
786 }
787 }
788 }
789 entity_edges
790 })
791 .collect();
792
793 let mut combined: Vec<(String, String, RefType)> = scope_edges;
795 combined.extend(resolved_refs);
796 let mut seen_edges: HashSet<(String, String)> = HashSet::with_capacity(combined.len());
797 let mut all_resolved: Vec<(String, String, RefType)> = Vec::with_capacity(combined.len());
798 for edge in combined {
799 if seen_edges.insert((edge.0.clone(), edge.1.clone())) {
800 all_resolved.push(edge);
801 }
802 }
803
804 let mut edges: Vec<EntityRef> = Vec::with_capacity(kept_edges.len() + all_resolved.len());
806 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
807 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
808
809 let mut all_edge_pairs: HashSet<(String, String)> = HashSet::new();
811
812 for edge in kept_edges {
814 all_edge_pairs.insert((edge.from_entity.clone(), edge.to_entity.clone()));
815 dependents
816 .entry(edge.to_entity.clone())
817 .or_default()
818 .push(edge.from_entity.clone());
819 dependencies
820 .entry(edge.from_entity.clone())
821 .or_default()
822 .push(edge.to_entity.clone());
823 edges.push(edge);
824 }
825
826 for (from_entity, to_entity, ref_type) in all_resolved {
828 if !all_edge_pairs.insert((from_entity.clone(), to_entity.clone())) {
829 continue;
830 }
831 dependents
832 .entry(to_entity.clone())
833 .or_default()
834 .push(from_entity.clone());
835 dependencies
836 .entry(from_entity.clone())
837 .or_default()
838 .push(to_entity.clone());
839 edges.push(EntityRef {
840 from_entity,
841 to_entity,
842 ref_type,
843 });
844 }
845
846 let graph = EntityGraph {
847 entities: entity_map,
848 edges,
849 dependents,
850 dependencies,
851 };
852
853 (graph, all_entities)
854 }
855
856 pub fn get_dependents(&self, entity_id: &str) -> Vec<&EntityInfo> {
858 self.dependents
859 .get(entity_id)
860 .map(|ids| {
861 ids.iter()
862 .filter_map(|id| self.entities.get(id))
863 .collect()
864 })
865 .unwrap_or_default()
866 }
867
868 pub fn get_dependencies(&self, entity_id: &str) -> Vec<&EntityInfo> {
870 self.dependencies
871 .get(entity_id)
872 .map(|ids| {
873 ids.iter()
874 .filter_map(|id| self.entities.get(id))
875 .collect()
876 })
877 .unwrap_or_default()
878 }
879
880 pub fn impact_analysis(&self, entity_id: &str) -> Vec<&EntityInfo> {
883 self.impact_analysis_capped(entity_id, 10_000)
884 }
885
886 pub fn impact_analysis_bounded(&self, entity_id: &str, max_depth: usize) -> Vec<(&EntityInfo, usize)> {
889 let mut visited: HashSet<&str> = HashSet::new();
890 let mut queue: std::collections::VecDeque<(&str, usize)> = std::collections::VecDeque::new();
891 let mut result = Vec::new();
892
893 let start_key = match self.entities.get_key_value(entity_id) {
894 Some((k, _)) => k.as_str(),
895 None => return result,
896 };
897
898 queue.push_back((start_key, 0));
899 visited.insert(start_key);
900
901 while let Some((current, depth)) = queue.pop_front() {
902 if let Some(deps) = self.dependents.get(current) {
903 let next_depth = depth + 1;
904 if max_depth > 0 && next_depth > max_depth {
905 continue;
906 }
907 for dep in deps {
908 if visited.insert(dep.as_str()) {
909 if let Some(info) = self.entities.get(dep.as_str()) {
910 result.push((info, next_depth));
911 }
912 queue.push_back((dep.as_str(), next_depth));
913 }
914 }
915 }
916 }
917
918 result
919 }
920
921 pub fn impact_analysis_capped(&self, entity_id: &str, max_visited: usize) -> Vec<&EntityInfo> {
924 let mut visited: HashSet<&str> = HashSet::new();
925 let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
926 let mut result = Vec::new();
927
928 let start_key = match self.entities.get_key_value(entity_id) {
929 Some((k, _)) => k.as_str(),
930 None => return result,
931 };
932
933 queue.push_back(start_key);
934 visited.insert(start_key);
935
936 while let Some(current) = queue.pop_front() {
937 if result.len() >= max_visited {
938 break;
939 }
940 if let Some(deps) = self.dependents.get(current) {
941 for dep in deps {
942 if visited.insert(dep.as_str()) {
943 if let Some(info) = self.entities.get(dep.as_str()) {
944 result.push(info);
945 }
946 queue.push_back(dep.as_str());
947 if result.len() >= max_visited {
948 break;
949 }
950 }
951 }
952 }
953 }
954
955 result
956 }
957
958 pub fn impact_count(&self, entity_id: &str, max_count: usize) -> usize {
961 let mut visited: HashSet<&str> = HashSet::new();
962 let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
963 let mut count = 0;
964
965 let start_key = match self.entities.get_key_value(entity_id) {
967 Some((k, _)) => k.as_str(),
968 None => return 0,
969 };
970
971 queue.push_back(start_key);
972 visited.insert(start_key);
973
974 while let Some(current) = queue.pop_front() {
975 if count >= max_count {
976 break;
977 }
978 if let Some(deps) = self.dependents.get(current) {
979 for dep in deps {
980 if visited.insert(dep.as_str()) {
981 count += 1;
982 queue.push_back(dep.as_str());
983 if count >= max_count {
984 break;
985 }
986 }
987 }
988 }
989 }
990
991 count
992 }
993
994 pub fn filter_test_entities(&self, entities: &[crate::model::entity::SemanticEntity]) -> HashSet<String> {
997 let mut test_ids = HashSet::new();
998 for entity in entities {
999 if is_test_entity(entity) {
1000 test_ids.insert(entity.id.clone());
1001 }
1002 }
1003 test_ids
1004 }
1005
1006 pub fn test_impact(
1009 &self,
1010 entity_id: &str,
1011 all_entities: &[crate::model::entity::SemanticEntity],
1012 ) -> Vec<&EntityInfo> {
1013 let test_ids = self.filter_test_entities(all_entities);
1014 let impact = self.impact_analysis(entity_id);
1015 impact
1016 .into_iter()
1017 .filter(|info| test_ids.contains(&info.id))
1018 .collect()
1019 }
1020
1021 pub fn update_from_changes(
1032 &mut self,
1033 changed_files: &[FileChange],
1034 root: &Path,
1035 registry: &ParserRegistry,
1036 ) {
1037 let mut affected_files: HashSet<String> = HashSet::new();
1038 let mut new_entities: Vec<SemanticEntity> = Vec::new();
1039
1040 for change in changed_files {
1041 affected_files.insert(change.file_path.clone());
1042 if let Some(ref old_path) = change.old_file_path {
1043 affected_files.insert(old_path.clone());
1044 }
1045
1046 match change.status {
1047 FileStatus::Deleted => {
1048 self.remove_entities_for_file(&change.file_path);
1049 }
1050 FileStatus::Renamed => {
1051 if let Some(ref old_path) = change.old_file_path {
1053 self.remove_entities_for_file(old_path);
1054 }
1055 if let Some(entities) = self.extract_file_entities(
1057 &change.file_path,
1058 change.after_content.as_deref(),
1059 root,
1060 registry,
1061 ) {
1062 new_entities.extend(entities);
1063 }
1064 }
1065 FileStatus::Added | FileStatus::Modified => {
1066 self.remove_entities_for_file(&change.file_path);
1068 if let Some(entities) = self.extract_file_entities(
1070 &change.file_path,
1071 change.after_content.as_deref(),
1072 root,
1073 registry,
1074 ) {
1075 new_entities.extend(entities);
1076 }
1077 }
1078 }
1079 }
1080
1081 for entity in &new_entities {
1083 self.entities.insert(
1084 entity.id.clone(),
1085 EntityInfo {
1086 id: entity.id.clone(),
1087 name: entity.name.clone(),
1088 entity_type: entity.entity_type.clone(),
1089 file_path: entity.file_path.clone(),
1090 parent_id: entity.parent_id.clone(),
1091 start_line: entity.start_line,
1092 end_line: entity.end_line,
1093 },
1094 );
1095 }
1096
1097 let symbol_table = self.build_symbol_table();
1099
1100 for entity in &new_entities {
1102 self.resolve_entity_references(entity, &symbol_table);
1103 }
1104
1105 let changed_entity_names: HashSet<String> = new_entities
1108 .iter()
1109 .map(|e| e.name.clone())
1110 .collect();
1111
1112 let entities_to_recheck: Vec<String> = self
1114 .entities
1115 .values()
1116 .filter(|e| !affected_files.contains(&e.file_path))
1117 .filter(|e| {
1118 self.dependencies
1119 .get(&e.id)
1120 .map_or(false, |deps| {
1121 deps.iter().any(|dep_id| {
1122 self.entities
1123 .get(dep_id)
1124 .map_or(false, |dep| changed_entity_names.contains(&dep.name))
1125 })
1126 })
1127 })
1128 .map(|e| e.id.clone())
1129 .collect();
1130
1131 let _ = entities_to_recheck; }
1138
1139 fn extract_file_entities(
1141 &self,
1142 file_path: &str,
1143 content: Option<&str>,
1144 root: &Path,
1145 registry: &ParserRegistry,
1146 ) -> Option<Vec<SemanticEntity>> {
1147 let content = if let Some(c) = content {
1148 c.to_string()
1149 } else {
1150 let full_path = root.join(file_path);
1151 std::fs::read_to_string(&full_path).ok()?
1152 };
1153
1154 Some(registry.extract_entities(file_path, &content))
1155 }
1156
1157 fn remove_entities_for_file(&mut self, file_path: &str) {
1159 let ids_to_remove: Vec<String> = self
1161 .entities
1162 .values()
1163 .filter(|e| e.file_path == file_path)
1164 .map(|e| e.id.clone())
1165 .collect();
1166
1167 let id_set: HashSet<&str> = ids_to_remove.iter().map(|s| s.as_str()).collect();
1168
1169 for id in &ids_to_remove {
1171 self.entities.remove(id);
1172 }
1173
1174 self.edges
1176 .retain(|e| !id_set.contains(e.from_entity.as_str()) && !id_set.contains(e.to_entity.as_str()));
1177
1178 for id in &ids_to_remove {
1180 if let Some(deps) = self.dependencies.remove(id) {
1182 for dep in &deps {
1184 if let Some(dependents) = self.dependents.get_mut(dep) {
1185 dependents.retain(|d| d != id);
1186 }
1187 }
1188 }
1189 if let Some(deps) = self.dependents.remove(id) {
1191 for dep in &deps {
1193 if let Some(dependencies) = self.dependencies.get_mut(dep) {
1194 dependencies.retain(|d| d != id);
1195 }
1196 }
1197 }
1198 }
1199 }
1200
1201 fn build_symbol_table(&self) -> HashMap<String, Vec<String>> {
1203 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::new();
1204 for entity in self.entities.values() {
1205 symbol_table
1206 .entry(entity.name.clone())
1207 .or_default()
1208 .push(entity.id.clone());
1209 }
1210 symbol_table
1211 }
1212
1213 fn resolve_entity_references(
1215 &mut self,
1216 entity: &SemanticEntity,
1217 symbol_table: &HashMap<String, Vec<String>>,
1218 ) {
1219 let refs = extract_references_from_content(&entity.content, &entity.name);
1220
1221 for ref_name in refs {
1222 if let Some(target_ids) = symbol_table.get(ref_name) {
1223 let target = target_ids
1224 .iter()
1225 .find(|id| {
1226 *id != &entity.id
1227 && self
1228 .entities
1229 .get(*id)
1230 .map_or(false, |e| e.file_path == entity.file_path)
1231 })
1232 .or_else(|| target_ids.iter().find(|id| *id != &entity.id));
1233
1234 if let Some(target_id) = target {
1235 let ref_type = infer_ref_type(&entity.content, &ref_name);
1236 self.edges.push(EntityRef {
1237 from_entity: entity.id.clone(),
1238 to_entity: target_id.clone(),
1239 ref_type,
1240 });
1241 self.dependents
1242 .entry(target_id.clone())
1243 .or_default()
1244 .push(entity.id.clone());
1245 self.dependencies
1246 .entry(entity.id.clone())
1247 .or_default()
1248 .push(target_id.clone());
1249 }
1250 }
1251 }
1252 }
1253}
1254
1255fn is_test_entity(entity: &crate::model::entity::SemanticEntity) -> bool {
1257 let name = &entity.name;
1258 let path = &entity.file_path;
1259 let content = &entity.content;
1260
1261 if name.starts_with("test_") || name.starts_with("Test") || name.ends_with("_test") || name.ends_with("Test") {
1263 return true;
1264 }
1265 if name.starts_with("it_") || name.starts_with("describe_") || name.starts_with("spec_") {
1266 return true;
1267 }
1268
1269 let path_lower = path.to_lowercase();
1271 let in_test_file = path_lower.contains("/test/")
1272 || path_lower.contains("/tests/")
1273 || path_lower.contains("/spec/")
1274 || path_lower.contains("_test.")
1275 || path_lower.contains(".test.")
1276 || path_lower.contains("_spec.")
1277 || path_lower.contains(".spec.");
1278
1279 let has_test_marker = content.contains("#[test]")
1281 || content.contains("#[cfg(test)]")
1282 || content.contains("@Test")
1283 || content.contains("@pytest")
1284 || content.contains("@test")
1285 || content.contains("describe(")
1286 || content.contains("it(")
1287 || content.contains("test(");
1288
1289 in_test_file && has_test_marker
1290}
1291
1292fn build_import_table(
1297 root: &Path,
1298 file_paths: &[String],
1299 symbol_table: &HashMap<String, Vec<String>>,
1300 entity_map: &HashMap<String, EntityInfo>,
1301 pre_parsed_content: Option<&[(String, String, tree_sitter::Tree)]>,
1302) -> HashMap<(String, String), String> {
1303 let content_map: HashMap<&str, &str> = pre_parsed_content
1305 .map(|files| {
1306 files.iter().map(|(fp, content, _)| (fp.as_str(), content.as_str())).collect()
1307 })
1308 .unwrap_or_default();
1309
1310 let per_file_imports: Vec<Vec<((String, String), String)>> = file_paths
1315 .par_iter()
1316 .filter_map(|file_path| {
1317 if file_path.ends_with(".go") {
1319 return None;
1320 }
1321
1322 let owned_content: Option<String>;
1324 let content: &str = if let Some(c) = content_map.get(file_path.as_str()) {
1325 c
1326 } else {
1327 let full_path = root.join(file_path);
1328 owned_content = std::fs::read_to_string(&full_path).ok();
1329 match owned_content.as_deref() {
1330 Some(c) => c,
1331 None => return None,
1332 }
1333 };
1334
1335 let mut local_imports: Vec<((String, String), String)> = Vec::new();
1336
1337 let mut logical_lines: Vec<String> = Vec::new();
1340 let mut current_line = String::new();
1341 let mut in_parens = false;
1342
1343 for line in content.lines() {
1344 let trimmed = line.trim();
1345 if in_parens {
1346 let clean = trimmed.trim_end_matches(|c: char| c == ')' || c == ',');
1348 let clean = clean.split('#').next().unwrap_or(clean).trim();
1349 if !clean.is_empty() && clean != "(" {
1350 current_line.push_str(", ");
1351 current_line.push_str(clean);
1352 }
1353 if trimmed.contains(')') {
1354 in_parens = false;
1355 logical_lines.push(std::mem::take(&mut current_line));
1356 }
1357 } else if trimmed.starts_with("from ") && trimmed.contains(" import ") {
1358 if trimmed.contains('(') && !trimmed.contains(')') {
1359 in_parens = true;
1361 let before_paren = trimmed.split('(').next().unwrap_or(trimmed);
1363 current_line = before_paren.trim().to_string();
1364 if let Some(after) = trimmed.split('(').nth(1) {
1366 let after = after.trim().trim_end_matches(')').trim();
1367 if !after.is_empty() {
1368 current_line.push(' ');
1369 current_line.push_str(after);
1370 }
1371 }
1372 } else {
1373 logical_lines.push(trimmed.to_string());
1374 }
1375 }
1376 }
1377
1378 for logical_line in &logical_lines {
1379 if let Some(rest) = logical_line.strip_prefix("from ") {
1380 let import_match = rest.find(" import ")
1382 .map(|pos| (pos, 8))
1383 .or_else(|| rest.find(" import,").map(|pos| (pos, 8)));
1384 if let Some((import_pos, skip)) = import_match {
1385 let module_path = &rest[..import_pos];
1386 let names_str = &rest[import_pos + skip..];
1387
1388 let source_module = module_path
1389 .trim_start_matches('.')
1390 .rsplit('.')
1391 .next()
1392 .unwrap_or(module_path.trim_start_matches('.'));
1393
1394 for name_part in names_str.split(',') {
1395 let name_part = name_part.trim();
1396 let imported_name = name_part.split_whitespace().next().unwrap_or(name_part);
1397 let imported_name = imported_name.trim_matches(|c: char| c == '(' || c == ')' || c == ',');
1399 if imported_name.is_empty() {
1400 continue;
1401 }
1402
1403 if let Some(target_ids) = symbol_table.get(imported_name) {
1404 let target = target_ids.iter().find(|id| {
1405 entity_map.get(*id).map_or(false, |e| {
1406 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1407 let stem = stem.strip_suffix(".py")
1408 .or_else(|| stem.strip_suffix(".ts"))
1409 .or_else(|| stem.strip_suffix(".js"))
1410 .or_else(|| stem.strip_suffix(".rs"))
1411 .unwrap_or(stem);
1412 stem == source_module
1413 })
1414 });
1415 if let Some(target_id) = target {
1416 local_imports.push((
1417 (file_path.clone(), imported_name.to_string()),
1418 target_id.clone(),
1419 ));
1420 }
1421 }
1422 }
1423 }
1424 }
1425 }
1426
1427 let is_js_ts = file_path.ends_with(".js") || file_path.ends_with(".ts")
1430 || file_path.ends_with(".jsx") || file_path.ends_with(".tsx");
1431
1432 if is_js_ts {
1433 static JS_NAMED_RE: LazyLock<Regex> = LazyLock::new(|| {
1434 Regex::new(r#"import\s*\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#).unwrap()
1435 });
1436 static JS_DEFAULT_RE: LazyLock<Regex> = LazyLock::new(|| {
1437 Regex::new(r#"import\s+(?:type\s+)?([A-Za-z_]\w*)\s+from\s*['"]([^'"]+)['"]"#).unwrap()
1438 });
1439
1440 for cap in JS_NAMED_RE.captures_iter(content) {
1441 let names_str = cap.get(1).unwrap().as_str();
1442 let module_path = cap.get(2).unwrap().as_str();
1443 let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1444 let source_module = strip_js_ext(source_module);
1445
1446 for name_part in names_str.split(',') {
1447 let name_part = name_part.trim();
1448 if name_part.is_empty() { continue; }
1449
1450 let (original_name, local_name) = if let Some(pos) = name_part.find(" as ") {
1452 let orig = name_part[..pos].trim();
1453 let local = name_part[pos + 4..].trim();
1454 let orig = orig.strip_prefix("type ").unwrap_or(orig);
1455 (orig, local)
1456 } else {
1457 let name = name_part.strip_prefix("type ").unwrap_or(name_part);
1458 (name, name)
1459 };
1460
1461 if original_name.is_empty() || local_name.is_empty() { continue; }
1462
1463 if let Some(target_ids) = symbol_table.get(original_name) {
1464 let target = target_ids.iter().find(|id| {
1465 entity_map.get(*id).map_or(false, |e| {
1466 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1467 let stem = strip_file_ext(stem);
1468 stem == source_module
1469 })
1470 });
1471 if let Some(target_id) = target {
1472 local_imports.push((
1473 (file_path.clone(), local_name.to_string()),
1474 target_id.clone(),
1475 ));
1476 }
1477 }
1478 }
1479 }
1480
1481 for cap in JS_DEFAULT_RE.captures_iter(content) {
1482 let local_name = cap.get(1).unwrap().as_str();
1483 let module_path = cap.get(2).unwrap().as_str();
1484 let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1485 let source_module = strip_js_ext(source_module);
1486
1487 if let Some(target_ids) = symbol_table.get(local_name) {
1488 let target = target_ids.iter().find(|id| {
1489 entity_map.get(*id).map_or(false, |e| {
1490 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1491 let stem = strip_file_ext(stem);
1492 stem == source_module
1493 })
1494 });
1495 if let Some(target_id) = target {
1496 local_imports.push((
1497 (file_path.clone(), local_name.to_string()),
1498 target_id.clone(),
1499 ));
1500 }
1501 }
1502 }
1503 }
1504
1505 let is_rust = file_path.ends_with(".rs");
1508 if is_rust {
1509 static RUST_USE_SIMPLE_RE: LazyLock<Regex> = LazyLock::new(|| {
1510 Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*;").unwrap()
1514 });
1515 static RUST_USE_GROUP_RE: LazyLock<Regex> = LazyLock::new(|| {
1516 Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)::\{([^}]+)\}\s*;").unwrap()
1519 });
1520
1521 let mut local_import_table: HashMap<(String, String), String> = HashMap::new();
1523
1524 for cap in RUST_USE_SIMPLE_RE.captures_iter(content) {
1527 let full_path_str = cap.get(1).unwrap().as_str();
1528 let parts: Vec<&str> = full_path_str.split("::").collect();
1529 if parts.is_empty() { continue; }
1530
1531 let imported_name = parts[parts.len() - 1];
1533 let source_module = if parts.len() >= 2 {
1535 parts[parts.len() - 2]
1536 } else {
1537 parts[0]
1538 };
1539
1540 resolve_rust_import(
1541 file_path, imported_name, source_module,
1542 symbol_table, entity_map, &mut local_import_table,
1543 );
1544 }
1545
1546 for cap in RUST_USE_GROUP_RE.captures_iter(content) {
1547 let module_path = cap.get(1).unwrap().as_str();
1548 let names_str = cap.get(2).unwrap().as_str();
1549
1550 let source_module = module_path.rsplit("::").next().unwrap_or(module_path);
1552
1553 for name_part in names_str.split(',') {
1554 let name_part = name_part.trim();
1555 let (original, local) = if let Some(pos) = name_part.find(" as ") {
1557 (&name_part[..pos], name_part[pos + 4..].trim())
1558 } else {
1559 (name_part, name_part)
1560 };
1561 let original = original.trim();
1562 let local = local.trim();
1563 if original.is_empty() || local.is_empty() { continue; }
1564
1565 resolve_rust_import(
1566 file_path, original, source_module,
1567 symbol_table, entity_map, &mut local_import_table,
1568 );
1569 if local != original {
1571 if let Some(target) = local_import_table.get(&(file_path.clone(), original.to_string())).cloned() {
1572 local_import_table.insert(
1573 (file_path.clone(), local.to_string()),
1574 target,
1575 );
1576 }
1577 }
1578 }
1579 }
1580
1581 for (key, val) in local_import_table {
1583 local_imports.push((key, val));
1584 }
1585 }
1586
1587 Some(local_imports)
1591 })
1592 .collect();
1593
1594 let mut import_table: HashMap<(String, String), String> = HashMap::new();
1596 for local_imports in per_file_imports {
1597 for (key, val) in local_imports {
1598 import_table.insert(key, val);
1599 }
1600 }
1601
1602 import_table
1603}
1604
1605fn resolve_rust_import(
1608 file_path: &str,
1609 imported_name: &str,
1610 source_module: &str,
1611 symbol_table: &HashMap<String, Vec<String>>,
1612 entity_map: &HashMap<String, EntityInfo>,
1613 import_table: &mut HashMap<(String, String), String>,
1614) {
1615 if let Some(target_ids) = symbol_table.get(imported_name) {
1616 let target = target_ids.iter().find(|id| {
1617 entity_map.get(*id).map_or(false, |e| {
1618 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1619 let stem = strip_file_ext(stem);
1620 stem == source_module
1621 })
1622 });
1623 if let Some(target_id) = target {
1624 import_table.insert(
1625 (file_path.to_string(), imported_name.to_string()),
1626 target_id.clone(),
1627 );
1628 }
1629 }
1630}
1631
1632fn strip_js_ext(s: &str) -> &str {
1634 s.strip_suffix(".js")
1635 .or_else(|| s.strip_suffix(".ts"))
1636 .or_else(|| s.strip_suffix(".jsx"))
1637 .or_else(|| s.strip_suffix(".tsx"))
1638 .unwrap_or(s)
1639}
1640
1641fn strip_file_ext(s: &str) -> &str {
1643 s.strip_suffix(".py")
1644 .or_else(|| s.strip_suffix(".ts"))
1645 .or_else(|| s.strip_suffix(".js"))
1646 .or_else(|| s.strip_suffix(".tsx"))
1647 .or_else(|| s.strip_suffix(".jsx"))
1648 .or_else(|| s.strip_suffix(".rs"))
1649 .unwrap_or(s)
1650}
1651
1652fn strip_comments_and_strings(content: &str) -> String {
1655 let bytes = content.as_bytes();
1656 let len = bytes.len();
1657 let mut result = vec![b' '; len];
1658 let mut i = 0;
1659
1660 while i < len {
1661 if i + 2 < len && bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1663 i += 3;
1664 while i + 2 < len {
1665 if bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1666 i += 3;
1667 break;
1668 }
1669 i += 1;
1670 }
1671 continue;
1672 }
1673 if i + 2 < len && bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1674 i += 3;
1675 while i + 2 < len {
1676 if bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1677 i += 3;
1678 break;
1679 }
1680 i += 1;
1681 }
1682 continue;
1683 }
1684 if bytes[i] == b'"' {
1686 i += 1;
1687 while i < len {
1688 if bytes[i] == b'\\' { i += 2; continue; }
1689 if bytes[i] == b'"' { i += 1; break; }
1690 i += 1;
1691 }
1692 continue;
1693 }
1694 if bytes[i] == b'\'' {
1696 i += 1;
1697 while i < len {
1698 if bytes[i] == b'\\' { i += 2; continue; }
1699 if bytes[i] == b'\'' { i += 1; break; }
1700 i += 1;
1701 }
1702 continue;
1703 }
1704 if bytes[i] == b'#' {
1706 while i < len && bytes[i] != b'\n' { i += 1; }
1707 continue;
1708 }
1709 if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'/' {
1711 while i < len && bytes[i] != b'\n' { i += 1; }
1712 continue;
1713 }
1714 if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
1716 i += 2;
1717 while i + 1 < len {
1718 if bytes[i] == b'*' && bytes[i + 1] == b'/' { i += 2; break; }
1719 i += 1;
1720 }
1721 continue;
1722 }
1723 result[i] = bytes[i];
1725 i += 1;
1726 }
1727
1728 String::from_utf8_lossy(&result).into_owned()
1729}
1730
1731fn extract_dot_chains<'a>(content: &'a str) -> Vec<(&'a str, &'a str)> {
1734 static DOT_CHAIN_RE: LazyLock<Regex> = LazyLock::new(|| {
1735 Regex::new(r"\b([A-Za-z_]\w*)\.([A-Za-z_]\w*)").unwrap()
1736 });
1737
1738 let mut chains = Vec::new();
1739 let mut seen: HashSet<(&str, &str)> = HashSet::new();
1740 for cap in DOT_CHAIN_RE.captures_iter(content) {
1741 let receiver = cap.get(1).unwrap().as_str();
1742 let member = cap.get(2).unwrap().as_str();
1743 if seen.insert((receiver, member)) {
1744 chains.push((receiver, member));
1745 }
1746 }
1747 chains
1748}
1749
1750fn extract_references_from_content<'a>(content: &'a str, own_name: &str) -> Vec<&'a str> {
1754 let stripped = strip_comments_and_strings(content);
1755 extract_references_with_stripped(content, own_name, &stripped)
1756}
1757
1758fn extract_references_with_stripped<'a>(content: &'a str, own_name: &str, stripped: &str) -> Vec<&'a str> {
1762 let stripped_words: HashSet<&str> = stripped
1763 .split(|c: char| !c.is_alphanumeric() && c != '_')
1764 .filter(|w| !w.is_empty())
1765 .collect();
1766
1767 let mut refs = Vec::new();
1768 let mut seen: HashSet<&str> = HashSet::new();
1769
1770 for word in content.split(|c: char| !c.is_alphanumeric() && c != '_') {
1771 if word.is_empty() || word == own_name {
1772 continue;
1773 }
1774 if is_keyword(word) || word.len() < 2 {
1775 continue;
1776 }
1777 if word.starts_with(|c: char| c.is_lowercase()) && word.len() < 3 {
1779 continue;
1780 }
1781 if !word.starts_with(|c: char| c.is_alphabetic() || c == '_') {
1782 continue;
1783 }
1784 if is_common_local_name(word) {
1786 continue;
1787 }
1788 if !stripped_words.contains(word) {
1790 continue;
1791 }
1792 if seen.insert(word) {
1793 refs.push(word);
1794 }
1795 }
1796
1797 refs
1798}
1799
1800static COMMON_LOCAL_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1801 [
1802 "result", "results", "data", "config", "value", "values",
1803 "item", "items", "input", "output", "args", "opts",
1804 "name", "path", "file", "line", "count", "index",
1805 "temp", "prev", "next", "curr", "current", "node",
1806 "left", "right", "root", "head", "tail", "body",
1807 "text", "content", "source", "target", "entry",
1808 "error", "errors", "message", "response", "request",
1809 "context", "state", "props", "event", "handler",
1810 "callback", "options", "params", "query", "list",
1811 "base", "info", "meta", "kind", "mode", "flag",
1812 "size", "length", "width", "height", "start", "stop",
1813 "begin", "done", "found", "status", "code",
1814 ].into_iter().collect()
1815});
1816
1817fn is_common_local_name(word: &str) -> bool {
1820 COMMON_LOCAL_NAMES.contains(word)
1821}
1822
1823fn infer_ref_type(content: &str, ref_name: &str) -> RefType {
1825 let bytes = content.as_bytes();
1828 let name_bytes = ref_name.as_bytes();
1829 let mut search_start = 0;
1830 while let Some(rel_pos) = content[search_start..].find(ref_name) {
1831 let pos = search_start + rel_pos;
1832 let after = pos + name_bytes.len();
1833 if after < bytes.len() && bytes[after] == b'(' {
1835 let is_boundary = pos == 0 || {
1837 let prev = bytes[pos - 1];
1838 !prev.is_ascii_alphanumeric() && prev != b'_'
1839 };
1840 if is_boundary {
1841 return RefType::Calls;
1842 }
1843 }
1844 search_start = pos + 1;
1846 while search_start < content.len() && !content.is_char_boundary(search_start) {
1847 search_start += 1;
1848 }
1849 }
1850
1851 for line in content.lines() {
1853 let trimmed = line.trim();
1854 if (trimmed.starts_with("import ") || trimmed.starts_with("use ")
1855 || trimmed.starts_with("from ") || trimmed.starts_with("require("))
1856 && trimmed.contains(ref_name)
1857 {
1858 return RefType::Imports;
1859 }
1860 }
1861
1862 RefType::TypeRef
1864}
1865
1866static KEYWORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1867 [
1868 "if", "else", "for", "while", "do", "switch", "case", "break",
1870 "continue", "return", "try", "catch", "finally", "throw",
1871 "new", "delete", "typeof", "instanceof", "in", "of",
1872 "true", "false", "null", "undefined", "void", "this",
1873 "super", "class", "extends", "implements", "interface",
1874 "enum", "const", "let", "var", "function", "async",
1875 "await", "yield", "import", "export", "default", "from",
1876 "as", "static", "public", "private", "protected",
1877 "abstract", "final", "override",
1878 "fn", "pub", "mod", "use", "struct", "impl", "trait",
1880 "where", "type", "self", "Self", "mut", "ref", "match",
1881 "loop", "move", "unsafe", "extern", "crate", "dyn",
1882 "def", "elif", "except", "raise", "with",
1884 "pass", "lambda", "nonlocal", "global", "assert",
1885 "True", "False", "and", "or", "not", "is",
1886 "func", "package", "range", "select", "chan", "go",
1888 "defer", "map", "make", "append", "len", "cap",
1889 "auto", "register", "volatile", "sizeof", "typedef",
1891 "template", "typename", "namespace", "virtual", "inline",
1892 "constexpr", "nullptr", "noexcept", "explicit", "friend",
1893 "operator", "using", "cout", "endl", "cerr", "cin",
1894 "printf", "scanf", "malloc", "free", "NULL", "include",
1895 "ifdef", "ifndef", "endif", "define", "pragma",
1896 "end", "then", "elsif", "unless", "until",
1898 "begin", "rescue", "ensure", "when", "require",
1899 "attr_accessor", "attr_reader", "attr_writer",
1900 "puts", "nil", "module", "defined",
1901 "internal", "sealed", "readonly",
1903 "partial", "delegate", "event", "params", "out",
1904 "object", "decimal", "sbyte", "ushort", "uint",
1905 "ulong", "nint", "nuint", "dynamic",
1906 "get", "set", "value", "init", "record",
1907 "string", "number", "boolean", "int", "float", "double",
1909 "bool", "char", "byte", "i8", "i16", "i32", "i64",
1910 "u8", "u16", "u32", "u64", "f32", "f64", "usize",
1911 "isize", "str", "String", "Vec", "Option", "Result",
1912 "Box", "Arc", "Rc", "HashMap", "HashSet", "Some",
1913 "Ok", "Err",
1914 ].into_iter().collect()
1915});
1916
1917fn is_keyword(word: &str) -> bool {
1918 KEYWORDS.contains(word)
1919}
1920
1921#[cfg(test)]
1922mod tests {
1923 use super::*;
1924 use crate::git::types::{FileChange, FileStatus};
1925 use std::io::Write;
1926 use tempfile::TempDir;
1927
1928 fn create_test_repo() -> (TempDir, ParserRegistry) {
1929 let dir = TempDir::new().unwrap();
1930 let registry = crate::parser::plugins::create_default_registry();
1931 (dir, registry)
1932 }
1933
1934 fn write_file(dir: &Path, name: &str, content: &str) {
1935 let path = dir.join(name);
1936 if let Some(parent) = path.parent() {
1937 std::fs::create_dir_all(parent).unwrap();
1938 }
1939 let mut f = std::fs::File::create(path).unwrap();
1940 f.write_all(content.as_bytes()).unwrap();
1941 }
1942
1943 #[test]
1944 fn test_incremental_add_file() {
1945 let (dir, registry) = create_test_repo();
1946 let root = dir.path();
1947
1948 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1950 write_file(root, "b.ts", "export function bar() { return 1; }\n");
1951
1952 let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
1953 assert_eq!(graph.entities.len(), 2);
1954
1955 write_file(root, "c.ts", "export function baz() { return foo(); }\n");
1957 graph.update_from_changes(
1958 &[FileChange {
1959 file_path: "c.ts".into(),
1960 status: FileStatus::Added,
1961 old_file_path: None,
1962 before_content: None,
1963 after_content: None, }],
1965 root,
1966 ®istry,
1967 );
1968
1969 assert_eq!(graph.entities.len(), 3);
1970 assert!(graph.entities.contains_key("c.ts::function::baz"));
1971 let baz_deps = graph.get_dependencies("c.ts::function::baz");
1973 assert!(
1974 baz_deps.iter().any(|d| d.name == "foo"),
1975 "baz should depend on foo. Deps: {:?}",
1976 baz_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1977 );
1978 }
1979
1980 #[test]
1981 fn test_incremental_delete_file() {
1982 let (dir, registry) = create_test_repo();
1983 let root = dir.path();
1984
1985 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1986 write_file(root, "b.ts", "export function bar() { return 1; }\n");
1987
1988 let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
1989 assert_eq!(graph.entities.len(), 2);
1990
1991 graph.update_from_changes(
1993 &[FileChange {
1994 file_path: "b.ts".into(),
1995 status: FileStatus::Deleted,
1996 old_file_path: None,
1997 before_content: None,
1998 after_content: None,
1999 }],
2000 root,
2001 ®istry,
2002 );
2003
2004 assert_eq!(graph.entities.len(), 1);
2005 assert!(!graph.entities.contains_key("b.ts::function::bar"));
2006 let foo_deps = graph.get_dependencies("a.ts::function::foo");
2008 assert!(
2009 foo_deps.is_empty(),
2010 "foo's deps should be empty after bar deleted. Deps: {:?}",
2011 foo_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2012 );
2013 }
2014
2015 #[test]
2016 fn test_incremental_modify_file() {
2017 let (dir, registry) = create_test_repo();
2018 let root = dir.path();
2019
2020 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
2021 write_file(root, "b.ts", "export function bar() { return 1; }\nexport function baz() { return 2; }\n");
2022
2023 let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
2024 assert_eq!(graph.entities.len(), 3);
2025
2026 write_file(root, "a.ts", "export function foo() { return baz(); }\n");
2028 graph.update_from_changes(
2029 &[FileChange {
2030 file_path: "a.ts".into(),
2031 status: FileStatus::Modified,
2032 old_file_path: None,
2033 before_content: None,
2034 after_content: None,
2035 }],
2036 root,
2037 ®istry,
2038 );
2039
2040 assert_eq!(graph.entities.len(), 3);
2041 let foo_deps = graph.get_dependencies("a.ts::function::foo");
2043 let dep_names: Vec<&str> = foo_deps.iter().map(|d| d.name.as_str()).collect();
2044 assert!(dep_names.contains(&"baz"), "foo should depend on baz after modification. Deps: {:?}", dep_names);
2045 assert!(!dep_names.contains(&"bar"), "foo should no longer depend on bar. Deps: {:?}", dep_names);
2046 }
2047
2048 #[test]
2049 fn test_incremental_with_content() {
2050 let (dir, registry) = create_test_repo();
2051 let root = dir.path();
2052
2053 write_file(root, "a.ts", "export function foo() { return 1; }\n");
2054 let (mut graph, _) = EntityGraph::build(root, &["a.ts".into()], ®istry);
2055 assert_eq!(graph.entities.len(), 1);
2056
2057 graph.update_from_changes(
2059 &[FileChange {
2060 file_path: "b.ts".into(),
2061 status: FileStatus::Added,
2062 old_file_path: None,
2063 before_content: None,
2064 after_content: Some("export function bar() { return foo(); }\n".into()),
2065 }],
2066 root,
2067 ®istry,
2068 );
2069
2070 assert_eq!(graph.entities.len(), 2);
2071 let bar_deps = graph.get_dependencies("b.ts::function::bar");
2072 assert!(bar_deps.iter().any(|d| d.name == "foo"));
2073 }
2074
2075 #[test]
2076 fn test_extract_references() {
2077 let content = "function processData(input) {\n const result = validateInput(input);\n return transform(result);\n}";
2078 let refs = extract_references_from_content(content, "processData");
2079 assert!(refs.contains(&"validateInput"));
2080 assert!(refs.contains(&"transform"));
2081 assert!(!refs.contains(&"processData")); }
2083
2084 #[test]
2085 fn test_extract_references_skips_keywords() {
2086 let content = "function foo() { if (true) { return false; } }";
2087 let refs = extract_references_from_content(content, "foo");
2088 assert!(!refs.contains(&"if"));
2089 assert!(!refs.contains(&"true"));
2090 assert!(!refs.contains(&"return"));
2091 assert!(!refs.contains(&"false"));
2092 }
2093
2094 #[test]
2095 fn test_infer_ref_type_call() {
2096 assert_eq!(
2097 infer_ref_type("validateInput(data)", "validateInput"),
2098 RefType::Calls,
2099 );
2100 }
2101
2102 #[test]
2103 fn test_infer_ref_type_type() {
2104 assert_eq!(
2105 infer_ref_type("let x: MyType = something", "MyType"),
2106 RefType::TypeRef,
2107 );
2108 }
2109
2110 #[test]
2111 fn test_infer_ref_type_multibyte_utf8() {
2112 assert_eq!(
2114 infer_ref_type("let café = foo(x)", "foo"),
2115 RefType::Calls,
2116 );
2117 assert_eq!(
2118 infer_ref_type("class HandicapfrPublicationFieldsEnum:\n É = 1\n bar()", "bar"),
2119 RefType::Calls,
2120 );
2121 assert_eq!(
2123 infer_ref_type("// 日本語コメント\nlet x = 1", "missing"),
2124 RefType::TypeRef,
2125 );
2126 }
2127
2128 #[test]
2129 fn test_dot_chain_self_resolution() {
2130 let (dir, registry) = create_test_repo();
2131 let root = dir.path();
2132
2133 write_file(root, "service.py", "\
2134class MyService:
2135 def process(self):
2136 return self.validate()
2137
2138 def validate(self):
2139 return True
2140");
2141
2142 let (graph, _) = EntityGraph::build(root, &["service.py".into()], ®istry);
2143
2144 let process_id = graph.entities.keys()
2146 .find(|id| id.contains("process"))
2147 .expect("process entity should exist");
2148 let deps = graph.get_dependencies(process_id);
2149 assert!(
2150 deps.iter().any(|d| d.name == "validate"),
2151 "process should depend on validate via self.validate(). Deps: {:?}",
2152 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2153 );
2154 }
2155
2156 #[test]
2157 fn test_dot_chain_this_resolution() {
2158 let (dir, registry) = create_test_repo();
2159 let root = dir.path();
2160
2161 write_file(root, "service.ts", "\
2162class UserService {
2163 process() {
2164 return this.validate();
2165 }
2166 validate() {
2167 return true;
2168 }
2169}
2170");
2171
2172 let (graph, _) = EntityGraph::build(root, &["service.ts".into()], ®istry);
2173
2174 let process_id = graph.entities.keys()
2175 .find(|id| id.contains("process"))
2176 .expect("process entity should exist");
2177 let deps = graph.get_dependencies(process_id);
2178 assert!(
2179 deps.iter().any(|d| d.name == "validate"),
2180 "process should depend on validate via this.validate(). Deps: {:?}",
2181 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2182 );
2183 }
2184
2185 #[test]
2186 fn test_dot_chain_class_static() {
2187 let (dir, registry) = create_test_repo();
2188 let root = dir.path();
2189
2190 write_file(root, "utils.ts", "\
2191class MathUtils {
2192 static compute() { return 1; }
2193}
2194function caller() { return MathUtils.compute(); }
2195");
2196
2197 let (graph, _) = EntityGraph::build(root, &["utils.ts".into()], ®istry);
2198
2199 let caller_id = graph.entities.keys()
2200 .find(|id| id.contains("caller"))
2201 .expect("caller entity should exist");
2202 let deps = graph.get_dependencies(caller_id);
2203 assert!(
2204 deps.iter().any(|d| d.name == "compute"),
2205 "caller should depend on compute via MathUtils.compute(). Deps: {:?}",
2206 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2207 );
2208 }
2209
2210 #[test]
2211 fn test_js_ts_import_resolution() {
2212 let (dir, registry) = create_test_repo();
2213 let root = dir.path();
2214
2215 write_file(root, "helper.ts", "\
2216export function helper() { return 1; }
2217");
2218 write_file(root, "main.ts", "\
2219import { helper } from './helper';
2220export function main() { return helper(); }
2221");
2222
2223 let (graph, _) = EntityGraph::build(
2224 root,
2225 &["helper.ts".into(), "main.ts".into()],
2226 ®istry,
2227 );
2228
2229 let main_id = graph.entities.keys()
2230 .find(|id| id.contains("main"))
2231 .expect("main entity should exist");
2232 let deps = graph.get_dependencies(main_id);
2233 assert!(
2234 deps.iter().any(|d| d.name == "helper"),
2235 "main should depend on helper via JS import. Deps: {:?}",
2236 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2237 );
2238 }
2239
2240 #[test]
2241 fn test_dot_chain_no_false_edges() {
2242 let (dir, registry) = create_test_repo();
2243 let root = dir.path();
2244
2245 write_file(root, "a.py", "\
2248class ClassA:
2249 def run(self):
2250 return self.process()
2251
2252 def process(self):
2253 return 1
2254");
2255 write_file(root, "b.py", "\
2256class ClassB:
2257 def process(self):
2258 return 2
2259");
2260
2261 let (graph, _) = EntityGraph::build(
2262 root,
2263 &["a.py".into(), "b.py".into()],
2264 ®istry,
2265 );
2266
2267 let run_id = graph.entities.keys()
2268 .find(|id| id.contains("run"))
2269 .expect("run entity should exist");
2270 let deps = graph.get_dependencies(run_id);
2271 for dep in &deps {
2273 if dep.name == "process" {
2274 assert!(
2275 dep.file_path == "a.py",
2276 "run's process dep should be in a.py, not {}",
2277 dep.file_path
2278 );
2279 }
2280 }
2281 }
2282
2283 #[test]
2284 fn test_dot_chain_fallback() {
2285 let (dir, registry) = create_test_repo();
2286 let root = dir.path();
2287
2288 write_file(root, "app.ts", "\
2292export function helper() { return 1; }
2293export function caller() {
2294 const val = helper();
2295 return val;
2296}
2297");
2298
2299 let (graph, _) = EntityGraph::build(root, &["app.ts".into()], ®istry);
2300
2301 let caller_id = graph.entities.keys()
2302 .find(|id| id.contains("caller"))
2303 .expect("caller entity should exist");
2304 let deps = graph.get_dependencies(caller_id);
2305 assert!(
2306 deps.iter().any(|d| d.name == "helper"),
2307 "caller should still resolve helper via bag-of-words. Deps: {:?}",
2308 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2309 );
2310 }
2311
2312}