1use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::LazyLock;
13
14use rayon::prelude::*;
15use regex::Regex;
16use serde::{Deserialize, Serialize};
17
18use crate::git::types::{FileChange, FileStatus};
19use crate::model::entity::SemanticEntity;
20use crate::parser::registry::ParserRegistry;
21use crate::parser::scope_resolve;
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
25#[serde(rename_all = "camelCase")]
26pub struct EntityRef {
27 pub from_entity: String,
28 pub to_entity: String,
29 pub ref_type: RefType,
30}
31
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34#[serde(rename_all = "lowercase")]
35pub enum RefType {
36 Calls,
38 TypeRef,
40 Imports,
42}
43
44#[derive(Debug)]
46pub struct EntityGraph {
47 pub entities: HashMap<String, EntityInfo>,
49 pub edges: Vec<EntityRef>,
51 pub dependents: HashMap<String, Vec<String>>,
53 pub dependencies: HashMap<String, Vec<String>>,
55}
56
57#[derive(Debug, Clone, Serialize, Deserialize)]
59#[serde(rename_all = "camelCase")]
60pub struct EntityInfo {
61 pub id: String,
62 pub name: String,
63 pub entity_type: String,
64 pub file_path: String,
65 #[serde(skip_serializing_if = "Option::is_none")]
66 pub parent_id: Option<String>,
67 pub start_line: usize,
68 pub end_line: usize,
69}
70
71impl EntityGraph {
72 pub fn from_parts(entities: HashMap<String, EntityInfo>, edges: Vec<EntityRef>) -> Self {
74 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
75 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
76 for edge in &edges {
77 dependents
78 .entry(edge.to_entity.clone())
79 .or_default()
80 .push(edge.from_entity.clone());
81 dependencies
82 .entry(edge.from_entity.clone())
83 .or_default()
84 .push(edge.to_entity.clone());
85 }
86 EntityGraph {
87 entities,
88 edges,
89 dependents,
90 dependencies,
91 }
92 }
93
94 pub fn build(
100 root: &Path,
101 file_paths: &[String],
102 registry: &ParserRegistry,
103 ) -> Self {
104 let all_entities: Vec<SemanticEntity> = file_paths
106 .par_iter()
107 .filter_map(|file_path| {
108 let full_path = root.join(file_path);
109 let content = std::fs::read_to_string(&full_path).ok()?;
110 let plugin = registry.get_plugin_with_content(file_path, &content)?;
111 Some(plugin.extract_entities(&content, file_path))
112 })
113 .flatten()
114 .collect();
115
116 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
118 let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
119
120 for entity in &all_entities {
121 symbol_table
122 .entry(entity.name.clone())
123 .or_default()
124 .push(entity.id.clone());
125
126 entity_map.insert(
127 entity.id.clone(),
128 EntityInfo {
129 id: entity.id.clone(),
130 name: entity.name.clone(),
131 entity_type: entity.entity_type.clone(),
132 file_path: entity.file_path.clone(),
133 parent_id: entity.parent_id.clone(),
134 start_line: entity.start_line,
135 end_line: entity.end_line,
136 },
137 );
138 }
139
140 let parent_child_pairs: HashSet<(&str, &str)> = all_entities
142 .iter()
143 .filter_map(|e| {
144 e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
145 })
146 .collect();
147
148 let class_child_names: HashSet<(&str, &str)> = all_entities
150 .iter()
151 .filter_map(|e| {
152 e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
153 })
154 .collect();
155
156 let class_entity_names: HashSet<&str> = all_entities
159 .iter()
160 .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
161 .map(|e| e.name.as_str())
162 .collect();
163
164 let id_to_name: HashMap<&str, &str> = all_entities
166 .iter()
167 .map(|e| (e.id.as_str(), e.name.as_str()))
168 .collect();
169
170 let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
173 let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
174
175 for entity in &all_entities {
176 if let Some(ref pid) = entity.parent_id {
177 if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
178 if class_entity_names.contains(parent_name) {
179 enclosing_class.insert(entity.id.as_str(), parent_name);
180 class_members
181 .entry(parent_name)
182 .or_default()
183 .push((entity.name.as_str(), entity.id.as_str()));
184 }
185 }
186 }
187 }
188
189 let import_table = build_import_table(root, file_paths, &symbol_table, &entity_map);
192
193 let has_scope_lang = file_paths.iter().any(|f| {
195 f.ends_with(".py") || f.ends_with(".ts") || f.ends_with(".tsx")
196 || f.ends_with(".js") || f.ends_with(".jsx")
197 || f.ends_with(".rs") || f.ends_with(".go")
198 });
199 let (scope_edges, scope_resolved_entities) = if has_scope_lang {
200 let result = scope_resolve::resolve_with_scopes(root, file_paths, &all_entities, &entity_map);
201 let resolved_entity_ids: HashSet<String> = result.edges.iter()
202 .map(|(from, _, _)| from.clone())
203 .collect();
204 (result.edges, resolved_entity_ids)
205 } else {
206 (vec![], HashSet::new())
207 };
208
209 let resolved_refs: Vec<(String, String, RefType)> = all_entities
214 .par_iter()
215 .flat_map(|entity| {
216 if scope_resolved_entities.contains(&entity.id) {
218 return vec![];
219 }
220
221 let mut entity_edges = Vec::new();
222 let mut consumed_words: HashSet<String> = HashSet::new();
223
224 let stripped = strip_comments_and_strings(&entity.content);
226 let dot_chains = extract_dot_chains(&stripped);
227
228 for (receiver, member) in &dot_chains {
229 if *receiver == "self" || *receiver == "this" {
230 if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
232 if let Some(members) = class_members.get(class_name) {
233 for (n, tid) in members {
234 if *n == *member && *tid != entity.id.as_str() {
235 entity_edges.push((
236 entity.id.clone(),
237 tid.to_string(),
238 RefType::Calls,
239 ));
240 consumed_words.insert(member.to_string());
241 break;
242 }
243 }
244 }
245 }
246 } else if class_entity_names.contains(*receiver) {
247 if let Some(members) = class_members.get(*receiver) {
249 for (n, tid) in members {
250 if *n == *member {
251 entity_edges.push((
252 entity.id.clone(),
253 tid.to_string(),
254 RefType::Calls,
255 ));
256 consumed_words.insert(member.to_string());
257 consumed_words.insert(receiver.to_string());
258 break;
259 }
260 }
261 }
262 }
263 }
265
266 let refs = extract_references_from_content(&entity.content, &entity.name);
268 for ref_name in refs {
269 if consumed_words.contains(ref_name) {
270 continue;
271 }
272
273 if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
275 continue;
276 }
277
278 let import_key = (entity.file_path.clone(), ref_name.to_string());
281 if let Some(import_target_id) = import_table.get(&import_key) {
282 if import_target_id != &entity.id
283 && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
284 && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
285 {
286 let ref_type = infer_ref_type(&entity.content, &ref_name);
287 entity_edges.push((
288 entity.id.clone(),
289 import_target_id.clone(),
290 ref_type,
291 ));
292 }
293 continue;
294 }
295
296 if let Some(target_ids) = symbol_table.get(ref_name) {
297 let target = target_ids
300 .iter()
301 .find(|id| {
302 *id != &entity.id
303 && entity_map
304 .get(*id)
305 .map_or(false, |e| e.file_path == entity.file_path)
306 });
307
308 if let Some(target_id) = target {
309 if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
311 || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
312 {
313 continue;
314 }
315 let ref_type = infer_ref_type(&entity.content, &ref_name);
316 entity_edges.push((
317 entity.id.clone(),
318 target_id.clone(),
319 ref_type,
320 ));
321 }
322 }
323 }
324 entity_edges
325 })
326 .collect();
327
328 let mut all_resolved: Vec<(String, String, RefType)> = scope_edges;
330 all_resolved.extend(resolved_refs);
331 let mut seen_edges: HashSet<(String, String)> = HashSet::new();
332 all_resolved.retain(|e| seen_edges.insert((e.0.clone(), e.1.clone())));
333
334 let mut edges: Vec<EntityRef> = Vec::with_capacity(all_resolved.len());
336 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
337 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
338
339 for (from_entity, to_entity, ref_type) in all_resolved {
340 dependents
341 .entry(to_entity.clone())
342 .or_default()
343 .push(from_entity.clone());
344 dependencies
345 .entry(from_entity.clone())
346 .or_default()
347 .push(to_entity.clone());
348 edges.push(EntityRef {
349 from_entity,
350 to_entity,
351 ref_type,
352 });
353 }
354
355 EntityGraph {
356 entities: entity_map,
357 edges,
358 dependents,
359 dependencies,
360 }
361 }
362
363 pub fn build_incremental(
369 root: &Path,
370 stale_files: &[String],
371 all_file_paths: &[String],
372 cached_entities: Vec<SemanticEntity>,
373 cached_edges: Vec<EntityRef>,
374 registry: &ParserRegistry,
375 ) -> (Self, Vec<SemanticEntity>) {
376 let stale_set: HashSet<&str> = stale_files.iter().map(|s| s.as_str()).collect();
378
379 let new_entities: Vec<SemanticEntity> = stale_files
381 .par_iter()
382 .filter_map(|file_path| {
383 let full_path = root.join(file_path);
384 let content = std::fs::read_to_string(&full_path).ok()?;
385 let plugin = registry.get_plugin_with_content(file_path, &content)?;
386 Some(plugin.extract_entities(&content, file_path))
387 })
388 .flatten()
389 .collect();
390
391 let all_entities: Vec<SemanticEntity> = cached_entities
393 .into_iter()
394 .chain(new_entities.into_iter())
395 .collect();
396
397 let stale_entity_ids: HashSet<&str> = all_entities
399 .iter()
400 .filter(|e| stale_set.contains(e.file_path.as_str()))
401 .map(|e| e.id.as_str())
402 .collect();
403
404 let mut affected_clean_ids: HashSet<String> = HashSet::new();
406 for edge in &cached_edges {
407 if stale_entity_ids.contains(edge.to_entity.as_str()) {
408 if !stale_entity_ids.contains(edge.from_entity.as_str()) {
409 affected_clean_ids.insert(edge.from_entity.clone());
410 }
411 }
412 }
413
414 let kept_edges: Vec<EntityRef> = cached_edges
416 .into_iter()
417 .filter(|e| {
418 !stale_entity_ids.contains(e.from_entity.as_str())
419 && !stale_entity_ids.contains(e.to_entity.as_str())
420 && !affected_clean_ids.contains(&e.from_entity)
421 })
422 .collect();
423
424 let needs_resolution: HashSet<&str> = all_entities
426 .iter()
427 .filter(|e| {
428 stale_entity_ids.contains(e.id.as_str())
429 || affected_clean_ids.contains(&e.id)
430 })
431 .map(|e| e.id.as_str())
432 .collect();
433
434 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
439 let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
440
441 for entity in &all_entities {
442 symbol_table
443 .entry(entity.name.clone())
444 .or_default()
445 .push(entity.id.clone());
446 entity_map.insert(
447 entity.id.clone(),
448 EntityInfo {
449 id: entity.id.clone(),
450 name: entity.name.clone(),
451 entity_type: entity.entity_type.clone(),
452 file_path: entity.file_path.clone(),
453 parent_id: entity.parent_id.clone(),
454 start_line: entity.start_line,
455 end_line: entity.end_line,
456 },
457 );
458 }
459
460 let parent_child_pairs: HashSet<(&str, &str)> = all_entities
462 .iter()
463 .filter_map(|e| {
464 e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
465 })
466 .collect();
467
468 let class_child_names: HashSet<(&str, &str)> = all_entities
469 .iter()
470 .filter_map(|e| {
471 e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
472 })
473 .collect();
474
475 let class_entity_names: HashSet<&str> = all_entities
476 .iter()
477 .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
478 .map(|e| e.name.as_str())
479 .collect();
480
481 let id_to_name: HashMap<&str, &str> = all_entities
482 .iter()
483 .map(|e| (e.id.as_str(), e.name.as_str()))
484 .collect();
485
486 let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
487 let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
488
489 for entity in &all_entities {
490 if let Some(ref pid) = entity.parent_id {
491 if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
492 if class_entity_names.contains(parent_name) {
493 enclosing_class.insert(entity.id.as_str(), parent_name);
494 class_members
495 .entry(parent_name)
496 .or_default()
497 .push((entity.name.as_str(), entity.id.as_str()));
498 }
499 }
500 }
501 }
502
503 let import_table = build_import_table(root, all_file_paths, &symbol_table, &entity_map);
505
506 let resolve_file_paths: Vec<String> = all_file_paths
508 .iter()
509 .filter(|f| {
510 stale_set.contains(f.as_str()) || all_entities.iter().any(|e| {
512 e.file_path == **f && affected_clean_ids.contains(&e.id)
513 })
514 })
515 .cloned()
516 .collect();
517
518 let has_scope_lang = resolve_file_paths.iter().any(|f| {
519 f.ends_with(".py") || f.ends_with(".ts") || f.ends_with(".tsx")
520 || f.ends_with(".js") || f.ends_with(".jsx")
521 || f.ends_with(".rs") || f.ends_with(".go")
522 });
523 let (scope_edges, scope_resolved_entities) = if has_scope_lang {
524 let result = scope_resolve::resolve_with_scopes(root, &resolve_file_paths, &all_entities, &entity_map);
525 let resolved_entity_ids: HashSet<String> = result.edges.iter()
526 .map(|(from, _, _)| from.clone())
527 .collect();
528 (result.edges, resolved_entity_ids)
529 } else {
530 (vec![], HashSet::new())
531 };
532
533 let resolved_refs: Vec<(String, String, RefType)> = all_entities
535 .par_iter()
536 .filter(|e| needs_resolution.contains(e.id.as_str()))
537 .flat_map(|entity| {
538 if scope_resolved_entities.contains(&entity.id) {
539 return vec![];
540 }
541
542 let mut entity_edges = Vec::new();
543 let mut consumed_words: HashSet<String> = HashSet::new();
544
545 let stripped = strip_comments_and_strings(&entity.content);
547 let dot_chains = extract_dot_chains(&stripped);
548
549 for (receiver, member) in &dot_chains {
550 if *receiver == "self" || *receiver == "this" {
551 if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
552 if let Some(members) = class_members.get(class_name) {
553 for (n, tid) in members {
554 if *n == *member && *tid != entity.id.as_str() {
555 entity_edges.push((
556 entity.id.clone(),
557 tid.to_string(),
558 RefType::Calls,
559 ));
560 consumed_words.insert(member.to_string());
561 break;
562 }
563 }
564 }
565 }
566 } else if class_entity_names.contains(*receiver) {
567 if let Some(members) = class_members.get(*receiver) {
568 for (n, tid) in members {
569 if *n == *member {
570 entity_edges.push((
571 entity.id.clone(),
572 tid.to_string(),
573 RefType::Calls,
574 ));
575 consumed_words.insert(member.to_string());
576 consumed_words.insert(receiver.to_string());
577 break;
578 }
579 }
580 }
581 }
582 }
583
584 let refs = extract_references_from_content(&entity.content, &entity.name);
586 for ref_name in refs {
587 if consumed_words.contains(ref_name) {
588 continue;
589 }
590 if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
591 continue;
592 }
593
594 let import_key = (entity.file_path.clone(), ref_name.to_string());
595 if let Some(import_target_id) = import_table.get(&import_key) {
596 if import_target_id != &entity.id
597 && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
598 && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
599 {
600 let ref_type = infer_ref_type(&entity.content, &ref_name);
601 entity_edges.push((
602 entity.id.clone(),
603 import_target_id.clone(),
604 ref_type,
605 ));
606 }
607 continue;
608 }
609
610 if let Some(target_ids) = symbol_table.get(ref_name) {
611 let target = target_ids
612 .iter()
613 .find(|id| {
614 *id != &entity.id
615 && entity_map
616 .get(*id)
617 .map_or(false, |e| e.file_path == entity.file_path)
618 });
619
620 if let Some(target_id) = target {
621 if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
622 || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
623 {
624 continue;
625 }
626 let ref_type = infer_ref_type(&entity.content, &ref_name);
627 entity_edges.push((
628 entity.id.clone(),
629 target_id.clone(),
630 ref_type,
631 ));
632 }
633 }
634 }
635 entity_edges
636 })
637 .collect();
638
639 let mut all_resolved: Vec<(String, String, RefType)> = scope_edges;
641 all_resolved.extend(resolved_refs);
642 let mut seen_edges: HashSet<(String, String)> = HashSet::new();
643 all_resolved.retain(|e| seen_edges.insert((e.0.clone(), e.1.clone())));
644
645 let mut edges: Vec<EntityRef> = Vec::with_capacity(kept_edges.len() + all_resolved.len());
647 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
648 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
649
650 let mut all_edge_pairs: HashSet<(String, String)> = HashSet::new();
652
653 for edge in kept_edges {
655 all_edge_pairs.insert((edge.from_entity.clone(), edge.to_entity.clone()));
656 dependents
657 .entry(edge.to_entity.clone())
658 .or_default()
659 .push(edge.from_entity.clone());
660 dependencies
661 .entry(edge.from_entity.clone())
662 .or_default()
663 .push(edge.to_entity.clone());
664 edges.push(edge);
665 }
666
667 for (from_entity, to_entity, ref_type) in all_resolved {
669 if !all_edge_pairs.insert((from_entity.clone(), to_entity.clone())) {
670 continue;
671 }
672 dependents
673 .entry(to_entity.clone())
674 .or_default()
675 .push(from_entity.clone());
676 dependencies
677 .entry(from_entity.clone())
678 .or_default()
679 .push(to_entity.clone());
680 edges.push(EntityRef {
681 from_entity,
682 to_entity,
683 ref_type,
684 });
685 }
686
687 let graph = EntityGraph {
688 entities: entity_map,
689 edges,
690 dependents,
691 dependencies,
692 };
693
694 (graph, all_entities)
695 }
696
697 pub fn get_dependents(&self, entity_id: &str) -> Vec<&EntityInfo> {
699 self.dependents
700 .get(entity_id)
701 .map(|ids| {
702 ids.iter()
703 .filter_map(|id| self.entities.get(id))
704 .collect()
705 })
706 .unwrap_or_default()
707 }
708
709 pub fn get_dependencies(&self, entity_id: &str) -> Vec<&EntityInfo> {
711 self.dependencies
712 .get(entity_id)
713 .map(|ids| {
714 ids.iter()
715 .filter_map(|id| self.entities.get(id))
716 .collect()
717 })
718 .unwrap_or_default()
719 }
720
721 pub fn impact_analysis(&self, entity_id: &str) -> Vec<&EntityInfo> {
724 self.impact_analysis_capped(entity_id, 10_000)
725 }
726
727 pub fn impact_analysis_capped(&self, entity_id: &str, max_visited: usize) -> Vec<&EntityInfo> {
730 let mut visited: HashSet<&str> = HashSet::new();
731 let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
732 let mut result = Vec::new();
733
734 let start_key = match self.entities.get_key_value(entity_id) {
735 Some((k, _)) => k.as_str(),
736 None => return result,
737 };
738
739 queue.push_back(start_key);
740 visited.insert(start_key);
741
742 while let Some(current) = queue.pop_front() {
743 if result.len() >= max_visited {
744 break;
745 }
746 if let Some(deps) = self.dependents.get(current) {
747 for dep in deps {
748 if visited.insert(dep.as_str()) {
749 if let Some(info) = self.entities.get(dep.as_str()) {
750 result.push(info);
751 }
752 queue.push_back(dep.as_str());
753 if result.len() >= max_visited {
754 break;
755 }
756 }
757 }
758 }
759 }
760
761 result
762 }
763
764 pub fn impact_count(&self, entity_id: &str, max_count: usize) -> usize {
767 let mut visited: HashSet<&str> = HashSet::new();
768 let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
769 let mut count = 0;
770
771 let start_key = match self.entities.get_key_value(entity_id) {
773 Some((k, _)) => k.as_str(),
774 None => return 0,
775 };
776
777 queue.push_back(start_key);
778 visited.insert(start_key);
779
780 while let Some(current) = queue.pop_front() {
781 if count >= max_count {
782 break;
783 }
784 if let Some(deps) = self.dependents.get(current) {
785 for dep in deps {
786 if visited.insert(dep.as_str()) {
787 count += 1;
788 queue.push_back(dep.as_str());
789 if count >= max_count {
790 break;
791 }
792 }
793 }
794 }
795 }
796
797 count
798 }
799
800 pub fn filter_test_entities(&self, entities: &[crate::model::entity::SemanticEntity]) -> HashSet<String> {
803 let mut test_ids = HashSet::new();
804 for entity in entities {
805 if is_test_entity(entity) {
806 test_ids.insert(entity.id.clone());
807 }
808 }
809 test_ids
810 }
811
812 pub fn test_impact(
815 &self,
816 entity_id: &str,
817 all_entities: &[crate::model::entity::SemanticEntity],
818 ) -> Vec<&EntityInfo> {
819 let test_ids = self.filter_test_entities(all_entities);
820 let impact = self.impact_analysis(entity_id);
821 impact
822 .into_iter()
823 .filter(|info| test_ids.contains(&info.id))
824 .collect()
825 }
826
827 pub fn update_from_changes(
838 &mut self,
839 changed_files: &[FileChange],
840 root: &Path,
841 registry: &ParserRegistry,
842 ) {
843 let mut affected_files: HashSet<String> = HashSet::new();
844 let mut new_entities: Vec<SemanticEntity> = Vec::new();
845
846 for change in changed_files {
847 affected_files.insert(change.file_path.clone());
848 if let Some(ref old_path) = change.old_file_path {
849 affected_files.insert(old_path.clone());
850 }
851
852 match change.status {
853 FileStatus::Deleted => {
854 self.remove_entities_for_file(&change.file_path);
855 }
856 FileStatus::Renamed => {
857 if let Some(ref old_path) = change.old_file_path {
859 self.remove_entities_for_file(old_path);
860 }
861 if let Some(entities) = self.extract_file_entities(
863 &change.file_path,
864 change.after_content.as_deref(),
865 root,
866 registry,
867 ) {
868 new_entities.extend(entities);
869 }
870 }
871 FileStatus::Added | FileStatus::Modified => {
872 self.remove_entities_for_file(&change.file_path);
874 if let Some(entities) = self.extract_file_entities(
876 &change.file_path,
877 change.after_content.as_deref(),
878 root,
879 registry,
880 ) {
881 new_entities.extend(entities);
882 }
883 }
884 }
885 }
886
887 for entity in &new_entities {
889 self.entities.insert(
890 entity.id.clone(),
891 EntityInfo {
892 id: entity.id.clone(),
893 name: entity.name.clone(),
894 entity_type: entity.entity_type.clone(),
895 file_path: entity.file_path.clone(),
896 parent_id: entity.parent_id.clone(),
897 start_line: entity.start_line,
898 end_line: entity.end_line,
899 },
900 );
901 }
902
903 let symbol_table = self.build_symbol_table();
905
906 for entity in &new_entities {
908 self.resolve_entity_references(entity, &symbol_table);
909 }
910
911 let changed_entity_names: HashSet<String> = new_entities
914 .iter()
915 .map(|e| e.name.clone())
916 .collect();
917
918 let entities_to_recheck: Vec<String> = self
920 .entities
921 .values()
922 .filter(|e| !affected_files.contains(&e.file_path))
923 .filter(|e| {
924 self.dependencies
925 .get(&e.id)
926 .map_or(false, |deps| {
927 deps.iter().any(|dep_id| {
928 self.entities
929 .get(dep_id)
930 .map_or(false, |dep| changed_entity_names.contains(&dep.name))
931 })
932 })
933 })
934 .map(|e| e.id.clone())
935 .collect();
936
937 let _ = entities_to_recheck; }
944
945 fn extract_file_entities(
947 &self,
948 file_path: &str,
949 content: Option<&str>,
950 root: &Path,
951 registry: &ParserRegistry,
952 ) -> Option<Vec<SemanticEntity>> {
953 let content = if let Some(c) = content {
954 c.to_string()
955 } else {
956 let full_path = root.join(file_path);
957 std::fs::read_to_string(&full_path).ok()?
958 };
959
960 let plugin = registry.get_plugin_with_content(file_path, &content)?;
961
962 Some(plugin.extract_entities(&content, file_path))
963 }
964
965 fn remove_entities_for_file(&mut self, file_path: &str) {
967 let ids_to_remove: Vec<String> = self
969 .entities
970 .values()
971 .filter(|e| e.file_path == file_path)
972 .map(|e| e.id.clone())
973 .collect();
974
975 let id_set: HashSet<&str> = ids_to_remove.iter().map(|s| s.as_str()).collect();
976
977 for id in &ids_to_remove {
979 self.entities.remove(id);
980 }
981
982 self.edges
984 .retain(|e| !id_set.contains(e.from_entity.as_str()) && !id_set.contains(e.to_entity.as_str()));
985
986 for id in &ids_to_remove {
988 if let Some(deps) = self.dependencies.remove(id) {
990 for dep in &deps {
992 if let Some(dependents) = self.dependents.get_mut(dep) {
993 dependents.retain(|d| d != id);
994 }
995 }
996 }
997 if let Some(deps) = self.dependents.remove(id) {
999 for dep in &deps {
1001 if let Some(dependencies) = self.dependencies.get_mut(dep) {
1002 dependencies.retain(|d| d != id);
1003 }
1004 }
1005 }
1006 }
1007 }
1008
1009 fn build_symbol_table(&self) -> HashMap<String, Vec<String>> {
1011 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::new();
1012 for entity in self.entities.values() {
1013 symbol_table
1014 .entry(entity.name.clone())
1015 .or_default()
1016 .push(entity.id.clone());
1017 }
1018 symbol_table
1019 }
1020
1021 fn resolve_entity_references(
1023 &mut self,
1024 entity: &SemanticEntity,
1025 symbol_table: &HashMap<String, Vec<String>>,
1026 ) {
1027 let refs = extract_references_from_content(&entity.content, &entity.name);
1028
1029 for ref_name in refs {
1030 if let Some(target_ids) = symbol_table.get(ref_name) {
1031 let target = target_ids
1032 .iter()
1033 .find(|id| {
1034 *id != &entity.id
1035 && self
1036 .entities
1037 .get(*id)
1038 .map_or(false, |e| e.file_path == entity.file_path)
1039 })
1040 .or_else(|| target_ids.iter().find(|id| *id != &entity.id));
1041
1042 if let Some(target_id) = target {
1043 let ref_type = infer_ref_type(&entity.content, &ref_name);
1044 self.edges.push(EntityRef {
1045 from_entity: entity.id.clone(),
1046 to_entity: target_id.clone(),
1047 ref_type,
1048 });
1049 self.dependents
1050 .entry(target_id.clone())
1051 .or_default()
1052 .push(entity.id.clone());
1053 self.dependencies
1054 .entry(entity.id.clone())
1055 .or_default()
1056 .push(target_id.clone());
1057 }
1058 }
1059 }
1060 }
1061}
1062
1063fn is_test_entity(entity: &crate::model::entity::SemanticEntity) -> bool {
1065 let name = &entity.name;
1066 let path = &entity.file_path;
1067 let content = &entity.content;
1068
1069 if name.starts_with("test_") || name.starts_with("Test") || name.ends_with("_test") || name.ends_with("Test") {
1071 return true;
1072 }
1073 if name.starts_with("it_") || name.starts_with("describe_") || name.starts_with("spec_") {
1074 return true;
1075 }
1076
1077 let path_lower = path.to_lowercase();
1079 let in_test_file = path_lower.contains("/test/")
1080 || path_lower.contains("/tests/")
1081 || path_lower.contains("/spec/")
1082 || path_lower.contains("_test.")
1083 || path_lower.contains(".test.")
1084 || path_lower.contains("_spec.")
1085 || path_lower.contains(".spec.");
1086
1087 let has_test_marker = content.contains("#[test]")
1089 || content.contains("#[cfg(test)]")
1090 || content.contains("@Test")
1091 || content.contains("@pytest")
1092 || content.contains("@test")
1093 || content.contains("describe(")
1094 || content.contains("it(")
1095 || content.contains("test(");
1096
1097 in_test_file && has_test_marker
1098}
1099
1100fn build_import_table(
1105 root: &Path,
1106 file_paths: &[String],
1107 symbol_table: &HashMap<String, Vec<String>>,
1108 entity_map: &HashMap<String, EntityInfo>,
1109) -> HashMap<(String, String), String> {
1110 let mut import_table: HashMap<(String, String), String> = HashMap::new();
1111
1112 for file_path in file_paths {
1113 let full_path = root.join(file_path);
1114 let content = match std::fs::read_to_string(&full_path) {
1115 Ok(c) => c,
1116 Err(_) => continue,
1117 };
1118
1119 let mut logical_lines: Vec<String> = Vec::new();
1122 let mut current_line = String::new();
1123 let mut in_parens = false;
1124
1125 for line in content.lines() {
1126 let trimmed = line.trim();
1127 if in_parens {
1128 let clean = trimmed.trim_end_matches(|c: char| c == ')' || c == ',');
1130 let clean = clean.split('#').next().unwrap_or(clean).trim();
1131 if !clean.is_empty() && clean != "(" {
1132 current_line.push_str(", ");
1133 current_line.push_str(clean);
1134 }
1135 if trimmed.contains(')') {
1136 in_parens = false;
1137 logical_lines.push(std::mem::take(&mut current_line));
1138 }
1139 } else if trimmed.starts_with("from ") && trimmed.contains(" import ") {
1140 if trimmed.contains('(') && !trimmed.contains(')') {
1141 in_parens = true;
1143 let before_paren = trimmed.split('(').next().unwrap_or(trimmed);
1145 current_line = before_paren.trim().to_string();
1146 if let Some(after) = trimmed.split('(').nth(1) {
1148 let after = after.trim().trim_end_matches(')').trim();
1149 if !after.is_empty() {
1150 current_line.push(' ');
1151 current_line.push_str(after);
1152 }
1153 }
1154 } else {
1155 logical_lines.push(trimmed.to_string());
1156 }
1157 }
1158 }
1159
1160 for logical_line in &logical_lines {
1161 if let Some(rest) = logical_line.strip_prefix("from ") {
1162 let import_match = rest.find(" import ")
1164 .map(|pos| (pos, 8))
1165 .or_else(|| rest.find(" import,").map(|pos| (pos, 8)));
1166 if let Some((import_pos, skip)) = import_match {
1167 let module_path = &rest[..import_pos];
1168 let names_str = &rest[import_pos + skip..];
1169
1170 let source_module = module_path
1171 .trim_start_matches('.')
1172 .rsplit('.')
1173 .next()
1174 .unwrap_or(module_path.trim_start_matches('.'));
1175
1176 for name_part in names_str.split(',') {
1177 let name_part = name_part.trim();
1178 let imported_name = name_part.split_whitespace().next().unwrap_or(name_part);
1179 let imported_name = imported_name.trim_matches(|c: char| c == '(' || c == ')' || c == ',');
1181 if imported_name.is_empty() {
1182 continue;
1183 }
1184
1185 if let Some(target_ids) = symbol_table.get(imported_name) {
1186 let target = target_ids.iter().find(|id| {
1187 entity_map.get(*id).map_or(false, |e| {
1188 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1189 let stem = stem.strip_suffix(".py")
1190 .or_else(|| stem.strip_suffix(".ts"))
1191 .or_else(|| stem.strip_suffix(".js"))
1192 .or_else(|| stem.strip_suffix(".rs"))
1193 .unwrap_or(stem);
1194 stem == source_module
1195 })
1196 });
1197 if let Some(target_id) = target {
1198 import_table.insert(
1199 (file_path.clone(), imported_name.to_string()),
1200 target_id.clone(),
1201 );
1202 }
1203 }
1204 }
1205 }
1206 }
1207 }
1208
1209 let is_js_ts = file_path.ends_with(".js") || file_path.ends_with(".ts")
1212 || file_path.ends_with(".jsx") || file_path.ends_with(".tsx");
1213
1214 if is_js_ts {
1215 static JS_NAMED_RE: LazyLock<Regex> = LazyLock::new(|| {
1216 Regex::new(r#"import\s*\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#).unwrap()
1217 });
1218 static JS_DEFAULT_RE: LazyLock<Regex> = LazyLock::new(|| {
1219 Regex::new(r#"import\s+(?:type\s+)?([A-Za-z_]\w*)\s+from\s*['"]([^'"]+)['"]"#).unwrap()
1220 });
1221
1222 for cap in JS_NAMED_RE.captures_iter(&content) {
1223 let names_str = cap.get(1).unwrap().as_str();
1224 let module_path = cap.get(2).unwrap().as_str();
1225 let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1226 let source_module = strip_js_ext(source_module);
1227
1228 for name_part in names_str.split(',') {
1229 let name_part = name_part.trim();
1230 if name_part.is_empty() { continue; }
1231
1232 let (original_name, local_name) = if let Some(pos) = name_part.find(" as ") {
1234 let orig = name_part[..pos].trim();
1235 let local = name_part[pos + 4..].trim();
1236 let orig = orig.strip_prefix("type ").unwrap_or(orig);
1237 (orig, local)
1238 } else {
1239 let name = name_part.strip_prefix("type ").unwrap_or(name_part);
1240 (name, name)
1241 };
1242
1243 if original_name.is_empty() || local_name.is_empty() { continue; }
1244
1245 if let Some(target_ids) = symbol_table.get(original_name) {
1246 let target = target_ids.iter().find(|id| {
1247 entity_map.get(*id).map_or(false, |e| {
1248 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1249 let stem = strip_file_ext(stem);
1250 stem == source_module
1251 })
1252 });
1253 if let Some(target_id) = target {
1254 import_table.insert(
1255 (file_path.clone(), local_name.to_string()),
1256 target_id.clone(),
1257 );
1258 }
1259 }
1260 }
1261 }
1262
1263 for cap in JS_DEFAULT_RE.captures_iter(&content) {
1264 let local_name = cap.get(1).unwrap().as_str();
1265 let module_path = cap.get(2).unwrap().as_str();
1266 let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1267 let source_module = strip_js_ext(source_module);
1268
1269 if let Some(target_ids) = symbol_table.get(local_name) {
1270 let target = target_ids.iter().find(|id| {
1271 entity_map.get(*id).map_or(false, |e| {
1272 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1273 let stem = strip_file_ext(stem);
1274 stem == source_module
1275 })
1276 });
1277 if let Some(target_id) = target {
1278 import_table.insert(
1279 (file_path.clone(), local_name.to_string()),
1280 target_id.clone(),
1281 );
1282 }
1283 }
1284 }
1285 }
1286
1287 let is_rust = file_path.ends_with(".rs");
1290 if is_rust {
1291 static RUST_USE_SIMPLE_RE: LazyLock<Regex> = LazyLock::new(|| {
1292 Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*;").unwrap()
1296 });
1297 static RUST_USE_GROUP_RE: LazyLock<Regex> = LazyLock::new(|| {
1298 Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)::\{([^}]+)\}\s*;").unwrap()
1301 });
1302
1303 for cap in RUST_USE_SIMPLE_RE.captures_iter(&content) {
1306 let full_path_str = cap.get(1).unwrap().as_str();
1307 let parts: Vec<&str> = full_path_str.split("::").collect();
1308 if parts.is_empty() { continue; }
1309
1310 let imported_name = parts[parts.len() - 1];
1312 let source_module = if parts.len() >= 2 {
1314 parts[parts.len() - 2]
1315 } else {
1316 parts[0]
1317 };
1318
1319 resolve_rust_import(
1320 file_path, imported_name, source_module,
1321 symbol_table, entity_map, &mut import_table,
1322 );
1323 }
1324
1325 for cap in RUST_USE_GROUP_RE.captures_iter(&content) {
1326 let module_path = cap.get(1).unwrap().as_str();
1327 let names_str = cap.get(2).unwrap().as_str();
1328
1329 let source_module = module_path.rsplit("::").next().unwrap_or(module_path);
1331
1332 for name_part in names_str.split(',') {
1333 let name_part = name_part.trim();
1334 let (original, local) = if let Some(pos) = name_part.find(" as ") {
1336 (&name_part[..pos], name_part[pos + 4..].trim())
1337 } else {
1338 (name_part, name_part)
1339 };
1340 let original = original.trim();
1341 let local = local.trim();
1342 if original.is_empty() || local.is_empty() { continue; }
1343
1344 resolve_rust_import(
1345 file_path, original, source_module,
1346 symbol_table, entity_map, &mut import_table,
1347 );
1348 if local != original {
1350 if let Some(target) = import_table.get(&(file_path.clone(), original.to_string())).cloned() {
1351 import_table.insert(
1352 (file_path.clone(), local.to_string()),
1353 target,
1354 );
1355 }
1356 }
1357 }
1358 }
1359 }
1360
1361 let is_go = file_path.ends_with(".go");
1364 if is_go {
1365 static GO_IMPORT_RE: LazyLock<Regex> = LazyLock::new(|| {
1366 Regex::new(r#"(?m)"([^"]+)""#).unwrap()
1367 });
1368
1369 let import_section = extract_go_import_section(&content);
1371 for cap in GO_IMPORT_RE.captures_iter(&import_section) {
1372 let import_path = cap.get(1).unwrap().as_str();
1373 let pkg_name = import_path.rsplit('/').next().unwrap_or(import_path);
1374
1375 for (name, target_ids) in symbol_table.iter() {
1377 for target_id in target_ids {
1378 if let Some(entity) = entity_map.get(target_id) {
1379 let stem = entity.file_path.rsplit('/').next().unwrap_or(&entity.file_path);
1380 let stem = strip_file_ext(stem);
1381 if stem == pkg_name || entity.file_path.contains(&format!("{}/", pkg_name)) {
1383 import_table.insert(
1384 (file_path.clone(), name.clone()),
1385 target_id.clone(),
1386 );
1387 }
1388 }
1389 }
1390 }
1391 }
1392 }
1393 }
1394
1395 import_table
1396}
1397
1398fn resolve_rust_import(
1401 file_path: &str,
1402 imported_name: &str,
1403 source_module: &str,
1404 symbol_table: &HashMap<String, Vec<String>>,
1405 entity_map: &HashMap<String, EntityInfo>,
1406 import_table: &mut HashMap<(String, String), String>,
1407) {
1408 if let Some(target_ids) = symbol_table.get(imported_name) {
1409 let target = target_ids.iter().find(|id| {
1410 entity_map.get(*id).map_or(false, |e| {
1411 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1412 let stem = strip_file_ext(stem);
1413 stem == source_module
1414 })
1415 });
1416 if let Some(target_id) = target {
1417 import_table.insert(
1418 (file_path.to_string(), imported_name.to_string()),
1419 target_id.clone(),
1420 );
1421 }
1422 }
1423}
1424
1425fn extract_go_import_section(content: &str) -> String {
1427 let mut result = String::new();
1428 let mut in_import_block = false;
1429 for line in content.lines() {
1430 let trimmed = line.trim();
1431 if trimmed.starts_with("import (") {
1432 in_import_block = true;
1433 continue;
1434 }
1435 if trimmed.starts_with("import \"") || trimmed.starts_with("import `") {
1436 result.push_str(trimmed);
1437 result.push('\n');
1438 continue;
1439 }
1440 if in_import_block {
1441 if trimmed == ")" {
1442 in_import_block = false;
1443 } else {
1444 result.push_str(trimmed);
1445 result.push('\n');
1446 }
1447 }
1448 }
1449 result
1450}
1451
1452fn strip_js_ext(s: &str) -> &str {
1454 s.strip_suffix(".js")
1455 .or_else(|| s.strip_suffix(".ts"))
1456 .or_else(|| s.strip_suffix(".jsx"))
1457 .or_else(|| s.strip_suffix(".tsx"))
1458 .unwrap_or(s)
1459}
1460
1461fn strip_file_ext(s: &str) -> &str {
1463 s.strip_suffix(".py")
1464 .or_else(|| s.strip_suffix(".ts"))
1465 .or_else(|| s.strip_suffix(".js"))
1466 .or_else(|| s.strip_suffix(".tsx"))
1467 .or_else(|| s.strip_suffix(".jsx"))
1468 .or_else(|| s.strip_suffix(".rs"))
1469 .unwrap_or(s)
1470}
1471
1472fn strip_comments_and_strings(content: &str) -> String {
1475 let bytes = content.as_bytes();
1476 let len = bytes.len();
1477 let mut result = vec![b' '; len];
1478 let mut i = 0;
1479
1480 while i < len {
1481 if i + 2 < len && bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1483 i += 3;
1484 while i + 2 < len {
1485 if bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1486 i += 3;
1487 break;
1488 }
1489 i += 1;
1490 }
1491 continue;
1492 }
1493 if i + 2 < len && bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1494 i += 3;
1495 while i + 2 < len {
1496 if bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1497 i += 3;
1498 break;
1499 }
1500 i += 1;
1501 }
1502 continue;
1503 }
1504 if bytes[i] == b'"' {
1506 i += 1;
1507 while i < len {
1508 if bytes[i] == b'\\' { i += 2; continue; }
1509 if bytes[i] == b'"' { i += 1; break; }
1510 i += 1;
1511 }
1512 continue;
1513 }
1514 if bytes[i] == b'\'' {
1516 i += 1;
1517 while i < len {
1518 if bytes[i] == b'\\' { i += 2; continue; }
1519 if bytes[i] == b'\'' { i += 1; break; }
1520 i += 1;
1521 }
1522 continue;
1523 }
1524 if bytes[i] == b'#' {
1526 while i < len && bytes[i] != b'\n' { i += 1; }
1527 continue;
1528 }
1529 if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'/' {
1531 while i < len && bytes[i] != b'\n' { i += 1; }
1532 continue;
1533 }
1534 if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
1536 i += 2;
1537 while i + 1 < len {
1538 if bytes[i] == b'*' && bytes[i + 1] == b'/' { i += 2; break; }
1539 i += 1;
1540 }
1541 continue;
1542 }
1543 result[i] = bytes[i];
1545 i += 1;
1546 }
1547
1548 String::from_utf8_lossy(&result).into_owned()
1549}
1550
1551fn extract_dot_chains<'a>(content: &'a str) -> Vec<(&'a str, &'a str)> {
1554 static DOT_CHAIN_RE: LazyLock<Regex> = LazyLock::new(|| {
1555 Regex::new(r"\b([A-Za-z_]\w*)\.([A-Za-z_]\w*)").unwrap()
1556 });
1557
1558 let mut chains = Vec::new();
1559 let mut seen: HashSet<(&str, &str)> = HashSet::new();
1560 for cap in DOT_CHAIN_RE.captures_iter(content) {
1561 let receiver = cap.get(1).unwrap().as_str();
1562 let member = cap.get(2).unwrap().as_str();
1563 if seen.insert((receiver, member)) {
1564 chains.push((receiver, member));
1565 }
1566 }
1567 chains
1568}
1569
1570fn extract_references_from_content<'a>(content: &'a str, own_name: &str) -> Vec<&'a str> {
1574 let stripped = strip_comments_and_strings(content);
1577 let stripped_words: HashSet<&str> = stripped
1578 .split(|c: char| !c.is_alphanumeric() && c != '_')
1579 .filter(|w| !w.is_empty())
1580 .collect();
1581
1582 let mut refs = Vec::new();
1583 let mut seen: HashSet<&str> = HashSet::new();
1584
1585 for word in content.split(|c: char| !c.is_alphanumeric() && c != '_') {
1586 if word.is_empty() || word == own_name {
1587 continue;
1588 }
1589 if is_keyword(word) || word.len() < 2 {
1590 continue;
1591 }
1592 if word.starts_with(|c: char| c.is_lowercase()) && word.len() < 3 {
1594 continue;
1595 }
1596 if !word.starts_with(|c: char| c.is_alphabetic() || c == '_') {
1597 continue;
1598 }
1599 if is_common_local_name(word) {
1601 continue;
1602 }
1603 if !stripped_words.contains(word) {
1605 continue;
1606 }
1607 if seen.insert(word) {
1608 refs.push(word);
1609 }
1610 }
1611
1612 refs
1613}
1614
1615static COMMON_LOCAL_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1616 [
1617 "result", "results", "data", "config", "value", "values",
1618 "item", "items", "input", "output", "args", "opts",
1619 "name", "path", "file", "line", "count", "index",
1620 "temp", "prev", "next", "curr", "current", "node",
1621 "left", "right", "root", "head", "tail", "body",
1622 "text", "content", "source", "target", "entry",
1623 "error", "errors", "message", "response", "request",
1624 "context", "state", "props", "event", "handler",
1625 "callback", "options", "params", "query", "list",
1626 "base", "info", "meta", "kind", "mode", "flag",
1627 "size", "length", "width", "height", "start", "stop",
1628 "begin", "done", "found", "status", "code", "test",
1629 ].into_iter().collect()
1630});
1631
1632fn is_common_local_name(word: &str) -> bool {
1635 COMMON_LOCAL_NAMES.contains(word)
1636}
1637
1638fn infer_ref_type(content: &str, ref_name: &str) -> RefType {
1640 let bytes = content.as_bytes();
1643 let name_bytes = ref_name.as_bytes();
1644 let mut search_start = 0;
1645 while let Some(rel_pos) = content[search_start..].find(ref_name) {
1646 let pos = search_start + rel_pos;
1647 let after = pos + name_bytes.len();
1648 if after < bytes.len() && bytes[after] == b'(' {
1650 let is_boundary = pos == 0 || {
1652 let prev = bytes[pos - 1];
1653 !prev.is_ascii_alphanumeric() && prev != b'_'
1654 };
1655 if is_boundary {
1656 return RefType::Calls;
1657 }
1658 }
1659 search_start = pos + 1;
1661 while search_start < content.len() && !content.is_char_boundary(search_start) {
1662 search_start += 1;
1663 }
1664 }
1665
1666 for line in content.lines() {
1668 let trimmed = line.trim();
1669 if (trimmed.starts_with("import ") || trimmed.starts_with("use ")
1670 || trimmed.starts_with("from ") || trimmed.starts_with("require("))
1671 && trimmed.contains(ref_name)
1672 {
1673 return RefType::Imports;
1674 }
1675 }
1676
1677 RefType::TypeRef
1679}
1680
1681static KEYWORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1682 [
1683 "if", "else", "for", "while", "do", "switch", "case", "break",
1685 "continue", "return", "try", "catch", "finally", "throw",
1686 "new", "delete", "typeof", "instanceof", "in", "of",
1687 "true", "false", "null", "undefined", "void", "this",
1688 "super", "class", "extends", "implements", "interface",
1689 "enum", "const", "let", "var", "function", "async",
1690 "await", "yield", "import", "export", "default", "from",
1691 "as", "static", "public", "private", "protected",
1692 "abstract", "final", "override",
1693 "fn", "pub", "mod", "use", "struct", "impl", "trait",
1695 "where", "type", "self", "Self", "mut", "ref", "match",
1696 "loop", "move", "unsafe", "extern", "crate", "dyn",
1697 "def", "elif", "except", "raise", "with",
1699 "pass", "lambda", "nonlocal", "global", "assert",
1700 "True", "False", "and", "or", "not", "is",
1701 "func", "package", "range", "select", "chan", "go",
1703 "defer", "map", "make", "append", "len", "cap",
1704 "auto", "register", "volatile", "sizeof", "typedef",
1706 "template", "typename", "namespace", "virtual", "inline",
1707 "constexpr", "nullptr", "noexcept", "explicit", "friend",
1708 "operator", "using", "cout", "endl", "cerr", "cin",
1709 "printf", "scanf", "malloc", "free", "NULL", "include",
1710 "ifdef", "ifndef", "endif", "define", "pragma",
1711 "end", "then", "elsif", "unless", "until",
1713 "begin", "rescue", "ensure", "when", "require",
1714 "attr_accessor", "attr_reader", "attr_writer",
1715 "puts", "nil", "module", "defined",
1716 "internal", "sealed", "readonly",
1718 "partial", "delegate", "event", "params", "out",
1719 "object", "decimal", "sbyte", "ushort", "uint",
1720 "ulong", "nint", "nuint", "dynamic",
1721 "get", "set", "value", "init", "record",
1722 "string", "number", "boolean", "int", "float", "double",
1724 "bool", "char", "byte", "i8", "i16", "i32", "i64",
1725 "u8", "u16", "u32", "u64", "f32", "f64", "usize",
1726 "isize", "str", "String", "Vec", "Option", "Result",
1727 "Box", "Arc", "Rc", "HashMap", "HashSet", "Some",
1728 "Ok", "Err",
1729 ].into_iter().collect()
1730});
1731
1732fn is_keyword(word: &str) -> bool {
1733 KEYWORDS.contains(word)
1734}
1735
1736#[cfg(test)]
1737mod tests {
1738 use super::*;
1739 use crate::git::types::{FileChange, FileStatus};
1740 use std::io::Write;
1741 use tempfile::TempDir;
1742
1743 fn create_test_repo() -> (TempDir, ParserRegistry) {
1744 let dir = TempDir::new().unwrap();
1745 let registry = crate::parser::plugins::create_default_registry();
1746 (dir, registry)
1747 }
1748
1749 fn write_file(dir: &Path, name: &str, content: &str) {
1750 let path = dir.join(name);
1751 if let Some(parent) = path.parent() {
1752 std::fs::create_dir_all(parent).unwrap();
1753 }
1754 let mut f = std::fs::File::create(path).unwrap();
1755 f.write_all(content.as_bytes()).unwrap();
1756 }
1757
1758 #[test]
1759 fn test_incremental_add_file() {
1760 let (dir, registry) = create_test_repo();
1761 let root = dir.path();
1762
1763 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1765 write_file(root, "b.ts", "export function bar() { return 1; }\n");
1766
1767 let mut graph = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
1768 assert_eq!(graph.entities.len(), 2);
1769
1770 write_file(root, "c.ts", "export function baz() { return foo(); }\n");
1772 graph.update_from_changes(
1773 &[FileChange {
1774 file_path: "c.ts".into(),
1775 status: FileStatus::Added,
1776 old_file_path: None,
1777 before_content: None,
1778 after_content: None, }],
1780 root,
1781 ®istry,
1782 );
1783
1784 assert_eq!(graph.entities.len(), 3);
1785 assert!(graph.entities.contains_key("c.ts::function::baz"));
1786 let baz_deps = graph.get_dependencies("c.ts::function::baz");
1788 assert!(
1789 baz_deps.iter().any(|d| d.name == "foo"),
1790 "baz should depend on foo. Deps: {:?}",
1791 baz_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1792 );
1793 }
1794
1795 #[test]
1796 fn test_incremental_delete_file() {
1797 let (dir, registry) = create_test_repo();
1798 let root = dir.path();
1799
1800 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1801 write_file(root, "b.ts", "export function bar() { return 1; }\n");
1802
1803 let mut graph = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
1804 assert_eq!(graph.entities.len(), 2);
1805
1806 graph.update_from_changes(
1808 &[FileChange {
1809 file_path: "b.ts".into(),
1810 status: FileStatus::Deleted,
1811 old_file_path: None,
1812 before_content: None,
1813 after_content: None,
1814 }],
1815 root,
1816 ®istry,
1817 );
1818
1819 assert_eq!(graph.entities.len(), 1);
1820 assert!(!graph.entities.contains_key("b.ts::function::bar"));
1821 let foo_deps = graph.get_dependencies("a.ts::function::foo");
1823 assert!(
1824 foo_deps.is_empty(),
1825 "foo's deps should be empty after bar deleted. Deps: {:?}",
1826 foo_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1827 );
1828 }
1829
1830 #[test]
1831 fn test_incremental_modify_file() {
1832 let (dir, registry) = create_test_repo();
1833 let root = dir.path();
1834
1835 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1836 write_file(root, "b.ts", "export function bar() { return 1; }\nexport function baz() { return 2; }\n");
1837
1838 let mut graph = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
1839 assert_eq!(graph.entities.len(), 3);
1840
1841 write_file(root, "a.ts", "export function foo() { return baz(); }\n");
1843 graph.update_from_changes(
1844 &[FileChange {
1845 file_path: "a.ts".into(),
1846 status: FileStatus::Modified,
1847 old_file_path: None,
1848 before_content: None,
1849 after_content: None,
1850 }],
1851 root,
1852 ®istry,
1853 );
1854
1855 assert_eq!(graph.entities.len(), 3);
1856 let foo_deps = graph.get_dependencies("a.ts::function::foo");
1858 let dep_names: Vec<&str> = foo_deps.iter().map(|d| d.name.as_str()).collect();
1859 assert!(dep_names.contains(&"baz"), "foo should depend on baz after modification. Deps: {:?}", dep_names);
1860 assert!(!dep_names.contains(&"bar"), "foo should no longer depend on bar. Deps: {:?}", dep_names);
1861 }
1862
1863 #[test]
1864 fn test_incremental_with_content() {
1865 let (dir, registry) = create_test_repo();
1866 let root = dir.path();
1867
1868 write_file(root, "a.ts", "export function foo() { return 1; }\n");
1869 let mut graph = EntityGraph::build(root, &["a.ts".into()], ®istry);
1870 assert_eq!(graph.entities.len(), 1);
1871
1872 graph.update_from_changes(
1874 &[FileChange {
1875 file_path: "b.ts".into(),
1876 status: FileStatus::Added,
1877 old_file_path: None,
1878 before_content: None,
1879 after_content: Some("export function bar() { return foo(); }\n".into()),
1880 }],
1881 root,
1882 ®istry,
1883 );
1884
1885 assert_eq!(graph.entities.len(), 2);
1886 let bar_deps = graph.get_dependencies("b.ts::function::bar");
1887 assert!(bar_deps.iter().any(|d| d.name == "foo"));
1888 }
1889
1890 #[test]
1891 fn test_extract_references() {
1892 let content = "function processData(input) {\n const result = validateInput(input);\n return transform(result);\n}";
1893 let refs = extract_references_from_content(content, "processData");
1894 assert!(refs.contains(&"validateInput"));
1895 assert!(refs.contains(&"transform"));
1896 assert!(!refs.contains(&"processData")); }
1898
1899 #[test]
1900 fn test_extract_references_skips_keywords() {
1901 let content = "function foo() { if (true) { return false; } }";
1902 let refs = extract_references_from_content(content, "foo");
1903 assert!(!refs.contains(&"if"));
1904 assert!(!refs.contains(&"true"));
1905 assert!(!refs.contains(&"return"));
1906 assert!(!refs.contains(&"false"));
1907 }
1908
1909 #[test]
1910 fn test_infer_ref_type_call() {
1911 assert_eq!(
1912 infer_ref_type("validateInput(data)", "validateInput"),
1913 RefType::Calls,
1914 );
1915 }
1916
1917 #[test]
1918 fn test_infer_ref_type_type() {
1919 assert_eq!(
1920 infer_ref_type("let x: MyType = something", "MyType"),
1921 RefType::TypeRef,
1922 );
1923 }
1924
1925 #[test]
1926 fn test_infer_ref_type_multibyte_utf8() {
1927 assert_eq!(
1929 infer_ref_type("let café = foo(x)", "foo"),
1930 RefType::Calls,
1931 );
1932 assert_eq!(
1933 infer_ref_type("class HandicapfrPublicationFieldsEnum:\n É = 1\n bar()", "bar"),
1934 RefType::Calls,
1935 );
1936 assert_eq!(
1938 infer_ref_type("// 日本語コメント\nlet x = 1", "missing"),
1939 RefType::TypeRef,
1940 );
1941 }
1942
1943 #[test]
1944 fn test_dot_chain_self_resolution() {
1945 let (dir, registry) = create_test_repo();
1946 let root = dir.path();
1947
1948 write_file(root, "service.py", "\
1949class MyService:
1950 def process(self):
1951 return self.validate()
1952
1953 def validate(self):
1954 return True
1955");
1956
1957 let graph = EntityGraph::build(root, &["service.py".into()], ®istry);
1958
1959 let process_id = graph.entities.keys()
1961 .find(|id| id.contains("process"))
1962 .expect("process entity should exist");
1963 let deps = graph.get_dependencies(process_id);
1964 assert!(
1965 deps.iter().any(|d| d.name == "validate"),
1966 "process should depend on validate via self.validate(). Deps: {:?}",
1967 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1968 );
1969 }
1970
1971 #[test]
1972 fn test_dot_chain_this_resolution() {
1973 let (dir, registry) = create_test_repo();
1974 let root = dir.path();
1975
1976 write_file(root, "service.ts", "\
1977class UserService {
1978 process() {
1979 return this.validate();
1980 }
1981 validate() {
1982 return true;
1983 }
1984}
1985");
1986
1987 let graph = EntityGraph::build(root, &["service.ts".into()], ®istry);
1988
1989 let process_id = graph.entities.keys()
1990 .find(|id| id.contains("process"))
1991 .expect("process entity should exist");
1992 let deps = graph.get_dependencies(process_id);
1993 assert!(
1994 deps.iter().any(|d| d.name == "validate"),
1995 "process should depend on validate via this.validate(). Deps: {:?}",
1996 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1997 );
1998 }
1999
2000 #[test]
2001 fn test_dot_chain_class_static() {
2002 let (dir, registry) = create_test_repo();
2003 let root = dir.path();
2004
2005 write_file(root, "utils.ts", "\
2006class MathUtils {
2007 static compute() { return 1; }
2008}
2009function caller() { return MathUtils.compute(); }
2010");
2011
2012 let graph = EntityGraph::build(root, &["utils.ts".into()], ®istry);
2013
2014 let caller_id = graph.entities.keys()
2015 .find(|id| id.contains("caller"))
2016 .expect("caller entity should exist");
2017 let deps = graph.get_dependencies(caller_id);
2018 assert!(
2019 deps.iter().any(|d| d.name == "compute"),
2020 "caller should depend on compute via MathUtils.compute(). Deps: {:?}",
2021 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2022 );
2023 }
2024
2025 #[test]
2026 fn test_js_ts_import_resolution() {
2027 let (dir, registry) = create_test_repo();
2028 let root = dir.path();
2029
2030 write_file(root, "helper.ts", "\
2031export function helper() { return 1; }
2032");
2033 write_file(root, "main.ts", "\
2034import { helper } from './helper';
2035export function main() { return helper(); }
2036");
2037
2038 let graph = EntityGraph::build(
2039 root,
2040 &["helper.ts".into(), "main.ts".into()],
2041 ®istry,
2042 );
2043
2044 let main_id = graph.entities.keys()
2045 .find(|id| id.contains("main"))
2046 .expect("main entity should exist");
2047 let deps = graph.get_dependencies(main_id);
2048 assert!(
2049 deps.iter().any(|d| d.name == "helper"),
2050 "main should depend on helper via JS import. Deps: {:?}",
2051 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2052 );
2053 }
2054
2055 #[test]
2056 fn test_dot_chain_no_false_edges() {
2057 let (dir, registry) = create_test_repo();
2058 let root = dir.path();
2059
2060 write_file(root, "a.py", "\
2063class ClassA:
2064 def run(self):
2065 return self.process()
2066
2067 def process(self):
2068 return 1
2069");
2070 write_file(root, "b.py", "\
2071class ClassB:
2072 def process(self):
2073 return 2
2074");
2075
2076 let graph = EntityGraph::build(
2077 root,
2078 &["a.py".into(), "b.py".into()],
2079 ®istry,
2080 );
2081
2082 let run_id = graph.entities.keys()
2083 .find(|id| id.contains("run"))
2084 .expect("run entity should exist");
2085 let deps = graph.get_dependencies(run_id);
2086 for dep in &deps {
2088 if dep.name == "process" {
2089 assert!(
2090 dep.file_path == "a.py",
2091 "run's process dep should be in a.py, not {}",
2092 dep.file_path
2093 );
2094 }
2095 }
2096 }
2097
2098 #[test]
2099 fn test_dot_chain_fallback() {
2100 let (dir, registry) = create_test_repo();
2101 let root = dir.path();
2102
2103 write_file(root, "app.ts", "\
2107export function helper() { return 1; }
2108export function caller() {
2109 const val = helper();
2110 return val;
2111}
2112");
2113
2114 let graph = EntityGraph::build(root, &["app.ts".into()], ®istry);
2115
2116 let caller_id = graph.entities.keys()
2117 .find(|id| id.contains("caller"))
2118 .expect("caller entity should exist");
2119 let deps = graph.get_dependencies(caller_id);
2120 assert!(
2121 deps.iter().any(|d| d.name == "helper"),
2122 "caller should still resolve helper via bag-of-words. Deps: {:?}",
2123 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2124 );
2125 }
2126
2127}