1use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::LazyLock;
13
14use rayon::prelude::*;
15use regex::Regex;
16use serde::{Deserialize, Serialize};
17
18use crate::git::types::{FileChange, FileStatus};
19use crate::model::entity::SemanticEntity;
20use crate::parser::registry::ParserRegistry;
21use crate::parser::scope_resolve;
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
25#[serde(rename_all = "camelCase")]
26pub struct EntityRef {
27 pub from_entity: String,
28 pub to_entity: String,
29 pub ref_type: RefType,
30}
31
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34#[serde(rename_all = "lowercase")]
35pub enum RefType {
36 Calls,
38 TypeRef,
40 Imports,
42}
43
44#[derive(Debug)]
46pub struct EntityGraph {
47 pub entities: HashMap<String, EntityInfo>,
49 pub edges: Vec<EntityRef>,
51 pub dependents: HashMap<String, Vec<String>>,
53 pub dependencies: HashMap<String, Vec<String>>,
55}
56
57#[derive(Debug, Clone, Serialize, Deserialize)]
59#[serde(rename_all = "camelCase")]
60pub struct EntityInfo {
61 pub id: String,
62 pub name: String,
63 pub entity_type: String,
64 pub file_path: String,
65 #[serde(skip_serializing_if = "Option::is_none")]
66 pub parent_id: Option<String>,
67 pub start_line: usize,
68 pub end_line: usize,
69}
70
71impl EntityGraph {
72 pub fn from_parts(entities: HashMap<String, EntityInfo>, edges: Vec<EntityRef>) -> Self {
74 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
75 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
76 for edge in &edges {
77 dependents
78 .entry(edge.to_entity.clone())
79 .or_default()
80 .push(edge.from_entity.clone());
81 dependencies
82 .entry(edge.from_entity.clone())
83 .or_default()
84 .push(edge.to_entity.clone());
85 }
86 EntityGraph {
87 entities,
88 edges,
89 dependents,
90 dependencies,
91 }
92 }
93
94 pub fn build(
100 root: &Path,
101 file_paths: &[String],
102 registry: &ParserRegistry,
103 ) -> (Self, Vec<SemanticEntity>) {
104 let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = file_paths
107 .par_iter()
108 .filter_map(|file_path| {
109 let full_path = root.join(file_path);
110 let content = std::fs::read_to_string(&full_path).ok()?;
111 let (entities, tree) = registry.extract_entities_with_tree(file_path, &content)?;
112 let parsed = tree.map(|t| (file_path.clone(), content, t));
113 Some((entities, parsed))
114 })
115 .collect();
116
117 let mut all_entities: Vec<SemanticEntity> = Vec::new();
118 let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
119 for (entities, parsed) in per_file {
120 all_entities.extend(entities);
121 if let Some(p) = parsed {
122 parsed_files.push(p);
123 }
124 }
125
126 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
128 let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
129
130 for entity in &all_entities {
131 symbol_table
132 .entry(entity.name.clone())
133 .or_default()
134 .push(entity.id.clone());
135
136 entity_map.insert(
137 entity.id.clone(),
138 EntityInfo {
139 id: entity.id.clone(),
140 name: entity.name.clone(),
141 entity_type: entity.entity_type.clone(),
142 file_path: entity.file_path.clone(),
143 parent_id: entity.parent_id.clone(),
144 start_line: entity.start_line,
145 end_line: entity.end_line,
146 },
147 );
148 }
149
150 let parent_child_pairs: HashSet<(&str, &str)> = all_entities
152 .iter()
153 .filter_map(|e| {
154 e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
155 })
156 .collect();
157
158 let class_child_names: HashSet<(&str, &str)> = all_entities
160 .iter()
161 .filter_map(|e| {
162 e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
163 })
164 .collect();
165
166 let class_entity_names: HashSet<&str> = all_entities
169 .iter()
170 .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
171 .map(|e| e.name.as_str())
172 .collect();
173
174 let id_to_name: HashMap<&str, &str> = all_entities
176 .iter()
177 .map(|e| (e.id.as_str(), e.name.as_str()))
178 .collect();
179
180 let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
183 let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
184
185 for entity in &all_entities {
186 if let Some(ref pid) = entity.parent_id {
187 if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
188 if class_entity_names.contains(parent_name) {
189 enclosing_class.insert(entity.id.as_str(), parent_name);
190 class_members
191 .entry(parent_name)
192 .or_default()
193 .push((entity.name.as_str(), entity.id.as_str()));
194 }
195 }
196 }
197 }
198
199 let import_table = build_import_table(root, file_paths, &symbol_table, &entity_map);
202
203 let has_scope_lang = file_paths.iter().any(|f| {
205 let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
206 crate::parser::plugins::code::languages::get_language_config(ext)
207 .and_then(|c| c.scope_resolve)
208 .is_some()
209 });
210 let (scope_edges, scope_resolved_entities) = if has_scope_lang {
211 let result = scope_resolve::resolve_with_scopes(root, file_paths, &all_entities, &entity_map, Some(parsed_files));
212 let resolved_entity_ids: HashSet<String> = result.edges.iter()
213 .map(|(from, _, _)| from.clone())
214 .collect();
215 (result.edges, resolved_entity_ids)
216 } else {
217 (vec![], HashSet::new())
218 };
219
220 let resolved_refs: Vec<(String, String, RefType)> = all_entities
225 .par_iter()
226 .flat_map(|entity| {
227 if scope_resolved_entities.contains(&entity.id) {
229 return vec![];
230 }
231
232 let mut entity_edges = Vec::new();
233 let mut consumed_words: HashSet<String> = HashSet::new();
234
235 let stripped = strip_comments_and_strings(&entity.content);
237 let dot_chains = extract_dot_chains(&stripped);
238
239 for (receiver, member) in &dot_chains {
240 if *receiver == "self" || *receiver == "this" {
241 if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
243 if let Some(members) = class_members.get(class_name) {
244 for (n, tid) in members {
245 if *n == *member && *tid != entity.id.as_str() {
246 entity_edges.push((
247 entity.id.clone(),
248 tid.to_string(),
249 RefType::Calls,
250 ));
251 consumed_words.insert(member.to_string());
252 break;
253 }
254 }
255 }
256 }
257 } else if class_entity_names.contains(*receiver) {
258 if let Some(members) = class_members.get(*receiver) {
260 for (n, tid) in members {
261 if *n == *member {
262 entity_edges.push((
263 entity.id.clone(),
264 tid.to_string(),
265 RefType::Calls,
266 ));
267 consumed_words.insert(member.to_string());
268 consumed_words.insert(receiver.to_string());
269 break;
270 }
271 }
272 }
273 }
274 }
276
277 let refs = extract_references_from_content(&entity.content, &entity.name);
279 for ref_name in refs {
280 if consumed_words.contains(ref_name) {
281 continue;
282 }
283
284 if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
286 continue;
287 }
288
289 let import_key = (entity.file_path.clone(), ref_name.to_string());
292 if let Some(import_target_id) = import_table.get(&import_key) {
293 if import_target_id != &entity.id
294 && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
295 && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
296 {
297 let ref_type = infer_ref_type(&entity.content, &ref_name);
298 entity_edges.push((
299 entity.id.clone(),
300 import_target_id.clone(),
301 ref_type,
302 ));
303 }
304 continue;
305 }
306
307 if let Some(target_ids) = symbol_table.get(ref_name) {
308 let target = target_ids
311 .iter()
312 .find(|id| {
313 *id != &entity.id
314 && entity_map
315 .get(*id)
316 .map_or(false, |e| e.file_path == entity.file_path)
317 });
318
319 if let Some(target_id) = target {
320 if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
322 || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
323 {
324 continue;
325 }
326 let ref_type = infer_ref_type(&entity.content, &ref_name);
327 entity_edges.push((
328 entity.id.clone(),
329 target_id.clone(),
330 ref_type,
331 ));
332 }
333 }
334 }
335 entity_edges
336 })
337 .collect();
338
339 let mut all_resolved: Vec<(String, String, RefType)> = scope_edges;
341 all_resolved.extend(resolved_refs);
342 let mut seen_edges: HashSet<(String, String)> = HashSet::new();
343 all_resolved.retain(|e| seen_edges.insert((e.0.clone(), e.1.clone())));
344
345 let mut edges: Vec<EntityRef> = Vec::with_capacity(all_resolved.len());
347 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
348 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
349
350 for (from_entity, to_entity, ref_type) in all_resolved {
351 dependents
352 .entry(to_entity.clone())
353 .or_default()
354 .push(from_entity.clone());
355 dependencies
356 .entry(from_entity.clone())
357 .or_default()
358 .push(to_entity.clone());
359 edges.push(EntityRef {
360 from_entity,
361 to_entity,
362 ref_type,
363 });
364 }
365
366 let graph = EntityGraph {
367 entities: entity_map,
368 edges,
369 dependents,
370 dependencies,
371 };
372
373 (graph, all_entities)
374 }
375
376 pub fn build_incremental(
382 root: &Path,
383 stale_files: &[String],
384 all_file_paths: &[String],
385 cached_entities: Vec<SemanticEntity>,
386 cached_edges: Vec<EntityRef>,
387 stale_file_cached_entities: Vec<SemanticEntity>,
388 registry: &ParserRegistry,
389 ) -> (Self, Vec<SemanticEntity>) {
390 let stale_set: HashSet<&str> = stale_files.iter().map(|s| s.as_str()).collect();
392
393 let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = stale_files
395 .par_iter()
396 .filter_map(|file_path| {
397 let full_path = root.join(file_path);
398 let content = std::fs::read_to_string(&full_path).ok()?;
399 let (entities, tree) = registry.extract_entities_with_tree(file_path, &content)?;
400 let parsed = tree.map(|t| (file_path.clone(), content, t));
401 Some((entities, parsed))
402 })
403 .collect();
404
405 let mut new_entities: Vec<SemanticEntity> = Vec::new();
406 let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
407 for (entities, parsed) in per_file {
408 new_entities.extend(entities);
409 if let Some(p) = parsed {
410 parsed_files.push(p);
411 }
412 }
413
414 let cached_hashes: HashMap<&str, &str> = stale_file_cached_entities
417 .iter()
418 .map(|e| (e.id.as_str(), e.content_hash.as_str()))
419 .collect();
420
421 let mut truly_changed_ids: HashSet<String> = HashSet::new();
423 let mut content_clean_ids: HashSet<String> = HashSet::new();
424 for entity in &new_entities {
425 match cached_hashes.get(entity.id.as_str()) {
426 Some(old_hash) if *old_hash == entity.content_hash.as_str() => {
427 content_clean_ids.insert(entity.id.clone());
428 }
429 _ => {
430 truly_changed_ids.insert(entity.id.clone());
432 }
433 }
434 }
435
436 let new_entity_ids: HashSet<&str> = new_entities.iter().map(|e| e.id.as_str()).collect();
438 let deleted_ids: HashSet<&str> = stale_file_cached_entities
439 .iter()
440 .filter(|e| !new_entity_ids.contains(e.id.as_str()))
441 .map(|e| e.id.as_str())
442 .collect();
443
444 let all_entities: Vec<SemanticEntity> = cached_entities
446 .into_iter()
447 .chain(new_entities.into_iter())
448 .collect();
449
450 let mut affected_clean_ids: HashSet<String> = HashSet::new();
452 for edge in &cached_edges {
453 let to_truly_changed = truly_changed_ids.contains(&edge.to_entity)
454 || deleted_ids.contains(edge.to_entity.as_str());
455 if to_truly_changed && !stale_set.contains(
456 all_entities.iter()
457 .find(|e| e.id == edge.from_entity)
458 .map(|e| e.file_path.as_str())
459 .unwrap_or("")
460 ) {
461 affected_clean_ids.insert(edge.from_entity.clone());
462 }
463 }
464
465 let stale_entity_ids: HashSet<&str> = all_entities
467 .iter()
468 .filter(|e| stale_set.contains(e.file_path.as_str()))
469 .map(|e| e.id.as_str())
470 .collect();
471
472 let kept_edges: Vec<EntityRef> = cached_edges
476 .into_iter()
477 .filter(|e| {
478 let from_stale = stale_entity_ids.contains(e.from_entity.as_str());
479 let to_stale = stale_entity_ids.contains(e.to_entity.as_str());
480
481 if !from_stale && !to_stale && !affected_clean_ids.contains(&e.from_entity) {
482 return true;
484 }
485 if content_clean_ids.contains(&e.from_entity)
486 && !truly_changed_ids.contains(&e.to_entity)
487 && !deleted_ids.contains(e.to_entity.as_str())
488 && !affected_clean_ids.contains(&e.from_entity)
489 {
490 return true;
492 }
493 false
494 })
495 .collect();
496
497 let needs_resolution: HashSet<&str> = all_entities
500 .iter()
501 .filter(|e| {
502 truly_changed_ids.contains(&e.id)
503 || affected_clean_ids.contains(&e.id)
504 })
505 .map(|e| e.id.as_str())
506 .collect();
507
508 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
513 let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
514
515 for entity in &all_entities {
516 symbol_table
517 .entry(entity.name.clone())
518 .or_default()
519 .push(entity.id.clone());
520 entity_map.insert(
521 entity.id.clone(),
522 EntityInfo {
523 id: entity.id.clone(),
524 name: entity.name.clone(),
525 entity_type: entity.entity_type.clone(),
526 file_path: entity.file_path.clone(),
527 parent_id: entity.parent_id.clone(),
528 start_line: entity.start_line,
529 end_line: entity.end_line,
530 },
531 );
532 }
533
534 let parent_child_pairs: HashSet<(&str, &str)> = all_entities
536 .iter()
537 .filter_map(|e| {
538 e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
539 })
540 .collect();
541
542 let class_child_names: HashSet<(&str, &str)> = all_entities
543 .iter()
544 .filter_map(|e| {
545 e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
546 })
547 .collect();
548
549 let class_entity_names: HashSet<&str> = all_entities
550 .iter()
551 .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
552 .map(|e| e.name.as_str())
553 .collect();
554
555 let id_to_name: HashMap<&str, &str> = all_entities
556 .iter()
557 .map(|e| (e.id.as_str(), e.name.as_str()))
558 .collect();
559
560 let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
561 let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
562
563 for entity in &all_entities {
564 if let Some(ref pid) = entity.parent_id {
565 if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
566 if class_entity_names.contains(parent_name) {
567 enclosing_class.insert(entity.id.as_str(), parent_name);
568 class_members
569 .entry(parent_name)
570 .or_default()
571 .push((entity.name.as_str(), entity.id.as_str()));
572 }
573 }
574 }
575 }
576
577 let import_table = build_import_table(root, all_file_paths, &symbol_table, &entity_map);
579
580 let resolve_file_paths: Vec<String> = all_file_paths
582 .iter()
583 .filter(|f| {
584 stale_set.contains(f.as_str()) || all_entities.iter().any(|e| {
586 e.file_path == **f && affected_clean_ids.contains(&e.id)
587 })
588 })
589 .cloned()
590 .collect();
591
592 let has_scope_lang = resolve_file_paths.iter().any(|f| {
593 let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
594 crate::parser::plugins::code::languages::get_language_config(ext)
595 .and_then(|c| c.scope_resolve)
596 .is_some()
597 });
598 let (scope_edges, scope_resolved_entities) = if has_scope_lang {
599 let resolve_set: HashSet<&str> = resolve_file_paths.iter().map(|s| s.as_str()).collect();
601 let relevant_parsed: Vec<(String, String, tree_sitter::Tree)> = parsed_files
602 .into_iter()
603 .filter(|(fp, _, _)| resolve_set.contains(fp.as_str()))
604 .collect();
605 let pre = if relevant_parsed.is_empty() { None } else { Some(relevant_parsed) };
606 let result = scope_resolve::resolve_with_scopes(root, &resolve_file_paths, &all_entities, &entity_map, pre);
607 let resolved_entity_ids: HashSet<String> = result.edges.iter()
608 .map(|(from, _, _)| from.clone())
609 .collect();
610 (result.edges, resolved_entity_ids)
611 } else {
612 (vec![], HashSet::new())
613 };
614
615 let resolved_refs: Vec<(String, String, RefType)> = all_entities
617 .par_iter()
618 .filter(|e| needs_resolution.contains(e.id.as_str()))
619 .flat_map(|entity| {
620 if scope_resolved_entities.contains(&entity.id) {
621 return vec![];
622 }
623
624 let mut entity_edges = Vec::new();
625 let mut consumed_words: HashSet<String> = HashSet::new();
626
627 let stripped = strip_comments_and_strings(&entity.content);
629 let dot_chains = extract_dot_chains(&stripped);
630
631 for (receiver, member) in &dot_chains {
632 if *receiver == "self" || *receiver == "this" {
633 if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
634 if let Some(members) = class_members.get(class_name) {
635 for (n, tid) in members {
636 if *n == *member && *tid != entity.id.as_str() {
637 entity_edges.push((
638 entity.id.clone(),
639 tid.to_string(),
640 RefType::Calls,
641 ));
642 consumed_words.insert(member.to_string());
643 break;
644 }
645 }
646 }
647 }
648 } else if class_entity_names.contains(*receiver) {
649 if let Some(members) = class_members.get(*receiver) {
650 for (n, tid) in members {
651 if *n == *member {
652 entity_edges.push((
653 entity.id.clone(),
654 tid.to_string(),
655 RefType::Calls,
656 ));
657 consumed_words.insert(member.to_string());
658 consumed_words.insert(receiver.to_string());
659 break;
660 }
661 }
662 }
663 }
664 }
665
666 let refs = extract_references_from_content(&entity.content, &entity.name);
668 for ref_name in refs {
669 if consumed_words.contains(ref_name) {
670 continue;
671 }
672 if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
673 continue;
674 }
675
676 let import_key = (entity.file_path.clone(), ref_name.to_string());
677 if let Some(import_target_id) = import_table.get(&import_key) {
678 if import_target_id != &entity.id
679 && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
680 && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
681 {
682 let ref_type = infer_ref_type(&entity.content, &ref_name);
683 entity_edges.push((
684 entity.id.clone(),
685 import_target_id.clone(),
686 ref_type,
687 ));
688 }
689 continue;
690 }
691
692 if let Some(target_ids) = symbol_table.get(ref_name) {
693 let target = target_ids
694 .iter()
695 .find(|id| {
696 *id != &entity.id
697 && entity_map
698 .get(*id)
699 .map_or(false, |e| e.file_path == entity.file_path)
700 });
701
702 if let Some(target_id) = target {
703 if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
704 || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
705 {
706 continue;
707 }
708 let ref_type = infer_ref_type(&entity.content, &ref_name);
709 entity_edges.push((
710 entity.id.clone(),
711 target_id.clone(),
712 ref_type,
713 ));
714 }
715 }
716 }
717 entity_edges
718 })
719 .collect();
720
721 let mut all_resolved: Vec<(String, String, RefType)> = scope_edges;
723 all_resolved.extend(resolved_refs);
724 let mut seen_edges: HashSet<(String, String)> = HashSet::new();
725 all_resolved.retain(|e| seen_edges.insert((e.0.clone(), e.1.clone())));
726
727 let mut edges: Vec<EntityRef> = Vec::with_capacity(kept_edges.len() + all_resolved.len());
729 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
730 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
731
732 let mut all_edge_pairs: HashSet<(String, String)> = HashSet::new();
734
735 for edge in kept_edges {
737 all_edge_pairs.insert((edge.from_entity.clone(), edge.to_entity.clone()));
738 dependents
739 .entry(edge.to_entity.clone())
740 .or_default()
741 .push(edge.from_entity.clone());
742 dependencies
743 .entry(edge.from_entity.clone())
744 .or_default()
745 .push(edge.to_entity.clone());
746 edges.push(edge);
747 }
748
749 for (from_entity, to_entity, ref_type) in all_resolved {
751 if !all_edge_pairs.insert((from_entity.clone(), to_entity.clone())) {
752 continue;
753 }
754 dependents
755 .entry(to_entity.clone())
756 .or_default()
757 .push(from_entity.clone());
758 dependencies
759 .entry(from_entity.clone())
760 .or_default()
761 .push(to_entity.clone());
762 edges.push(EntityRef {
763 from_entity,
764 to_entity,
765 ref_type,
766 });
767 }
768
769 let graph = EntityGraph {
770 entities: entity_map,
771 edges,
772 dependents,
773 dependencies,
774 };
775
776 (graph, all_entities)
777 }
778
779 pub fn get_dependents(&self, entity_id: &str) -> Vec<&EntityInfo> {
781 self.dependents
782 .get(entity_id)
783 .map(|ids| {
784 ids.iter()
785 .filter_map(|id| self.entities.get(id))
786 .collect()
787 })
788 .unwrap_or_default()
789 }
790
791 pub fn get_dependencies(&self, entity_id: &str) -> Vec<&EntityInfo> {
793 self.dependencies
794 .get(entity_id)
795 .map(|ids| {
796 ids.iter()
797 .filter_map(|id| self.entities.get(id))
798 .collect()
799 })
800 .unwrap_or_default()
801 }
802
803 pub fn impact_analysis(&self, entity_id: &str) -> Vec<&EntityInfo> {
806 self.impact_analysis_capped(entity_id, 10_000)
807 }
808
809 pub fn impact_analysis_bounded(&self, entity_id: &str, max_depth: usize) -> Vec<(&EntityInfo, usize)> {
812 let mut visited: HashSet<&str> = HashSet::new();
813 let mut queue: std::collections::VecDeque<(&str, usize)> = std::collections::VecDeque::new();
814 let mut result = Vec::new();
815
816 let start_key = match self.entities.get_key_value(entity_id) {
817 Some((k, _)) => k.as_str(),
818 None => return result,
819 };
820
821 queue.push_back((start_key, 0));
822 visited.insert(start_key);
823
824 while let Some((current, depth)) = queue.pop_front() {
825 if let Some(deps) = self.dependents.get(current) {
826 let next_depth = depth + 1;
827 if max_depth > 0 && next_depth > max_depth {
828 continue;
829 }
830 for dep in deps {
831 if visited.insert(dep.as_str()) {
832 if let Some(info) = self.entities.get(dep.as_str()) {
833 result.push((info, next_depth));
834 }
835 queue.push_back((dep.as_str(), next_depth));
836 }
837 }
838 }
839 }
840
841 result
842 }
843
844 pub fn impact_analysis_capped(&self, entity_id: &str, max_visited: usize) -> Vec<&EntityInfo> {
847 let mut visited: HashSet<&str> = HashSet::new();
848 let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
849 let mut result = Vec::new();
850
851 let start_key = match self.entities.get_key_value(entity_id) {
852 Some((k, _)) => k.as_str(),
853 None => return result,
854 };
855
856 queue.push_back(start_key);
857 visited.insert(start_key);
858
859 while let Some(current) = queue.pop_front() {
860 if result.len() >= max_visited {
861 break;
862 }
863 if let Some(deps) = self.dependents.get(current) {
864 for dep in deps {
865 if visited.insert(dep.as_str()) {
866 if let Some(info) = self.entities.get(dep.as_str()) {
867 result.push(info);
868 }
869 queue.push_back(dep.as_str());
870 if result.len() >= max_visited {
871 break;
872 }
873 }
874 }
875 }
876 }
877
878 result
879 }
880
881 pub fn impact_count(&self, entity_id: &str, max_count: usize) -> usize {
884 let mut visited: HashSet<&str> = HashSet::new();
885 let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
886 let mut count = 0;
887
888 let start_key = match self.entities.get_key_value(entity_id) {
890 Some((k, _)) => k.as_str(),
891 None => return 0,
892 };
893
894 queue.push_back(start_key);
895 visited.insert(start_key);
896
897 while let Some(current) = queue.pop_front() {
898 if count >= max_count {
899 break;
900 }
901 if let Some(deps) = self.dependents.get(current) {
902 for dep in deps {
903 if visited.insert(dep.as_str()) {
904 count += 1;
905 queue.push_back(dep.as_str());
906 if count >= max_count {
907 break;
908 }
909 }
910 }
911 }
912 }
913
914 count
915 }
916
917 pub fn filter_test_entities(&self, entities: &[crate::model::entity::SemanticEntity]) -> HashSet<String> {
920 let mut test_ids = HashSet::new();
921 for entity in entities {
922 if is_test_entity(entity) {
923 test_ids.insert(entity.id.clone());
924 }
925 }
926 test_ids
927 }
928
929 pub fn test_impact(
932 &self,
933 entity_id: &str,
934 all_entities: &[crate::model::entity::SemanticEntity],
935 ) -> Vec<&EntityInfo> {
936 let test_ids = self.filter_test_entities(all_entities);
937 let impact = self.impact_analysis(entity_id);
938 impact
939 .into_iter()
940 .filter(|info| test_ids.contains(&info.id))
941 .collect()
942 }
943
944 pub fn update_from_changes(
955 &mut self,
956 changed_files: &[FileChange],
957 root: &Path,
958 registry: &ParserRegistry,
959 ) {
960 let mut affected_files: HashSet<String> = HashSet::new();
961 let mut new_entities: Vec<SemanticEntity> = Vec::new();
962
963 for change in changed_files {
964 affected_files.insert(change.file_path.clone());
965 if let Some(ref old_path) = change.old_file_path {
966 affected_files.insert(old_path.clone());
967 }
968
969 match change.status {
970 FileStatus::Deleted => {
971 self.remove_entities_for_file(&change.file_path);
972 }
973 FileStatus::Renamed => {
974 if let Some(ref old_path) = change.old_file_path {
976 self.remove_entities_for_file(old_path);
977 }
978 if let Some(entities) = self.extract_file_entities(
980 &change.file_path,
981 change.after_content.as_deref(),
982 root,
983 registry,
984 ) {
985 new_entities.extend(entities);
986 }
987 }
988 FileStatus::Added | FileStatus::Modified => {
989 self.remove_entities_for_file(&change.file_path);
991 if let Some(entities) = self.extract_file_entities(
993 &change.file_path,
994 change.after_content.as_deref(),
995 root,
996 registry,
997 ) {
998 new_entities.extend(entities);
999 }
1000 }
1001 }
1002 }
1003
1004 for entity in &new_entities {
1006 self.entities.insert(
1007 entity.id.clone(),
1008 EntityInfo {
1009 id: entity.id.clone(),
1010 name: entity.name.clone(),
1011 entity_type: entity.entity_type.clone(),
1012 file_path: entity.file_path.clone(),
1013 parent_id: entity.parent_id.clone(),
1014 start_line: entity.start_line,
1015 end_line: entity.end_line,
1016 },
1017 );
1018 }
1019
1020 let symbol_table = self.build_symbol_table();
1022
1023 for entity in &new_entities {
1025 self.resolve_entity_references(entity, &symbol_table);
1026 }
1027
1028 let changed_entity_names: HashSet<String> = new_entities
1031 .iter()
1032 .map(|e| e.name.clone())
1033 .collect();
1034
1035 let entities_to_recheck: Vec<String> = self
1037 .entities
1038 .values()
1039 .filter(|e| !affected_files.contains(&e.file_path))
1040 .filter(|e| {
1041 self.dependencies
1042 .get(&e.id)
1043 .map_or(false, |deps| {
1044 deps.iter().any(|dep_id| {
1045 self.entities
1046 .get(dep_id)
1047 .map_or(false, |dep| changed_entity_names.contains(&dep.name))
1048 })
1049 })
1050 })
1051 .map(|e| e.id.clone())
1052 .collect();
1053
1054 let _ = entities_to_recheck; }
1061
1062 fn extract_file_entities(
1064 &self,
1065 file_path: &str,
1066 content: Option<&str>,
1067 root: &Path,
1068 registry: &ParserRegistry,
1069 ) -> Option<Vec<SemanticEntity>> {
1070 let content = if let Some(c) = content {
1071 c.to_string()
1072 } else {
1073 let full_path = root.join(file_path);
1074 std::fs::read_to_string(&full_path).ok()?
1075 };
1076
1077 Some(registry.extract_entities(file_path, &content))
1078 }
1079
1080 fn remove_entities_for_file(&mut self, file_path: &str) {
1082 let ids_to_remove: Vec<String> = self
1084 .entities
1085 .values()
1086 .filter(|e| e.file_path == file_path)
1087 .map(|e| e.id.clone())
1088 .collect();
1089
1090 let id_set: HashSet<&str> = ids_to_remove.iter().map(|s| s.as_str()).collect();
1091
1092 for id in &ids_to_remove {
1094 self.entities.remove(id);
1095 }
1096
1097 self.edges
1099 .retain(|e| !id_set.contains(e.from_entity.as_str()) && !id_set.contains(e.to_entity.as_str()));
1100
1101 for id in &ids_to_remove {
1103 if let Some(deps) = self.dependencies.remove(id) {
1105 for dep in &deps {
1107 if let Some(dependents) = self.dependents.get_mut(dep) {
1108 dependents.retain(|d| d != id);
1109 }
1110 }
1111 }
1112 if let Some(deps) = self.dependents.remove(id) {
1114 for dep in &deps {
1116 if let Some(dependencies) = self.dependencies.get_mut(dep) {
1117 dependencies.retain(|d| d != id);
1118 }
1119 }
1120 }
1121 }
1122 }
1123
1124 fn build_symbol_table(&self) -> HashMap<String, Vec<String>> {
1126 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::new();
1127 for entity in self.entities.values() {
1128 symbol_table
1129 .entry(entity.name.clone())
1130 .or_default()
1131 .push(entity.id.clone());
1132 }
1133 symbol_table
1134 }
1135
1136 fn resolve_entity_references(
1138 &mut self,
1139 entity: &SemanticEntity,
1140 symbol_table: &HashMap<String, Vec<String>>,
1141 ) {
1142 let refs = extract_references_from_content(&entity.content, &entity.name);
1143
1144 for ref_name in refs {
1145 if let Some(target_ids) = symbol_table.get(ref_name) {
1146 let target = target_ids
1147 .iter()
1148 .find(|id| {
1149 *id != &entity.id
1150 && self
1151 .entities
1152 .get(*id)
1153 .map_or(false, |e| e.file_path == entity.file_path)
1154 })
1155 .or_else(|| target_ids.iter().find(|id| *id != &entity.id));
1156
1157 if let Some(target_id) = target {
1158 let ref_type = infer_ref_type(&entity.content, &ref_name);
1159 self.edges.push(EntityRef {
1160 from_entity: entity.id.clone(),
1161 to_entity: target_id.clone(),
1162 ref_type,
1163 });
1164 self.dependents
1165 .entry(target_id.clone())
1166 .or_default()
1167 .push(entity.id.clone());
1168 self.dependencies
1169 .entry(entity.id.clone())
1170 .or_default()
1171 .push(target_id.clone());
1172 }
1173 }
1174 }
1175 }
1176}
1177
1178fn is_test_entity(entity: &crate::model::entity::SemanticEntity) -> bool {
1180 let name = &entity.name;
1181 let path = &entity.file_path;
1182 let content = &entity.content;
1183
1184 if name.starts_with("test_") || name.starts_with("Test") || name.ends_with("_test") || name.ends_with("Test") {
1186 return true;
1187 }
1188 if name.starts_with("it_") || name.starts_with("describe_") || name.starts_with("spec_") {
1189 return true;
1190 }
1191
1192 let path_lower = path.to_lowercase();
1194 let in_test_file = path_lower.contains("/test/")
1195 || path_lower.contains("/tests/")
1196 || path_lower.contains("/spec/")
1197 || path_lower.contains("_test.")
1198 || path_lower.contains(".test.")
1199 || path_lower.contains("_spec.")
1200 || path_lower.contains(".spec.");
1201
1202 let has_test_marker = content.contains("#[test]")
1204 || content.contains("#[cfg(test)]")
1205 || content.contains("@Test")
1206 || content.contains("@pytest")
1207 || content.contains("@test")
1208 || content.contains("describe(")
1209 || content.contains("it(")
1210 || content.contains("test(");
1211
1212 in_test_file && has_test_marker
1213}
1214
1215fn build_import_table(
1220 root: &Path,
1221 file_paths: &[String],
1222 symbol_table: &HashMap<String, Vec<String>>,
1223 entity_map: &HashMap<String, EntityInfo>,
1224) -> HashMap<(String, String), String> {
1225 let mut import_table: HashMap<(String, String), String> = HashMap::new();
1226
1227 for file_path in file_paths {
1228 let full_path = root.join(file_path);
1229 let content = match std::fs::read_to_string(&full_path) {
1230 Ok(c) => c,
1231 Err(_) => continue,
1232 };
1233
1234 let mut logical_lines: Vec<String> = Vec::new();
1237 let mut current_line = String::new();
1238 let mut in_parens = false;
1239
1240 for line in content.lines() {
1241 let trimmed = line.trim();
1242 if in_parens {
1243 let clean = trimmed.trim_end_matches(|c: char| c == ')' || c == ',');
1245 let clean = clean.split('#').next().unwrap_or(clean).trim();
1246 if !clean.is_empty() && clean != "(" {
1247 current_line.push_str(", ");
1248 current_line.push_str(clean);
1249 }
1250 if trimmed.contains(')') {
1251 in_parens = false;
1252 logical_lines.push(std::mem::take(&mut current_line));
1253 }
1254 } else if trimmed.starts_with("from ") && trimmed.contains(" import ") {
1255 if trimmed.contains('(') && !trimmed.contains(')') {
1256 in_parens = true;
1258 let before_paren = trimmed.split('(').next().unwrap_or(trimmed);
1260 current_line = before_paren.trim().to_string();
1261 if let Some(after) = trimmed.split('(').nth(1) {
1263 let after = after.trim().trim_end_matches(')').trim();
1264 if !after.is_empty() {
1265 current_line.push(' ');
1266 current_line.push_str(after);
1267 }
1268 }
1269 } else {
1270 logical_lines.push(trimmed.to_string());
1271 }
1272 }
1273 }
1274
1275 for logical_line in &logical_lines {
1276 if let Some(rest) = logical_line.strip_prefix("from ") {
1277 let import_match = rest.find(" import ")
1279 .map(|pos| (pos, 8))
1280 .or_else(|| rest.find(" import,").map(|pos| (pos, 8)));
1281 if let Some((import_pos, skip)) = import_match {
1282 let module_path = &rest[..import_pos];
1283 let names_str = &rest[import_pos + skip..];
1284
1285 let source_module = module_path
1286 .trim_start_matches('.')
1287 .rsplit('.')
1288 .next()
1289 .unwrap_or(module_path.trim_start_matches('.'));
1290
1291 for name_part in names_str.split(',') {
1292 let name_part = name_part.trim();
1293 let imported_name = name_part.split_whitespace().next().unwrap_or(name_part);
1294 let imported_name = imported_name.trim_matches(|c: char| c == '(' || c == ')' || c == ',');
1296 if imported_name.is_empty() {
1297 continue;
1298 }
1299
1300 if let Some(target_ids) = symbol_table.get(imported_name) {
1301 let target = target_ids.iter().find(|id| {
1302 entity_map.get(*id).map_or(false, |e| {
1303 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1304 let stem = stem.strip_suffix(".py")
1305 .or_else(|| stem.strip_suffix(".ts"))
1306 .or_else(|| stem.strip_suffix(".js"))
1307 .or_else(|| stem.strip_suffix(".rs"))
1308 .unwrap_or(stem);
1309 stem == source_module
1310 })
1311 });
1312 if let Some(target_id) = target {
1313 import_table.insert(
1314 (file_path.clone(), imported_name.to_string()),
1315 target_id.clone(),
1316 );
1317 }
1318 }
1319 }
1320 }
1321 }
1322 }
1323
1324 let is_js_ts = file_path.ends_with(".js") || file_path.ends_with(".ts")
1327 || file_path.ends_with(".jsx") || file_path.ends_with(".tsx");
1328
1329 if is_js_ts {
1330 static JS_NAMED_RE: LazyLock<Regex> = LazyLock::new(|| {
1331 Regex::new(r#"import\s*\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#).unwrap()
1332 });
1333 static JS_DEFAULT_RE: LazyLock<Regex> = LazyLock::new(|| {
1334 Regex::new(r#"import\s+(?:type\s+)?([A-Za-z_]\w*)\s+from\s*['"]([^'"]+)['"]"#).unwrap()
1335 });
1336
1337 for cap in JS_NAMED_RE.captures_iter(&content) {
1338 let names_str = cap.get(1).unwrap().as_str();
1339 let module_path = cap.get(2).unwrap().as_str();
1340 let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1341 let source_module = strip_js_ext(source_module);
1342
1343 for name_part in names_str.split(',') {
1344 let name_part = name_part.trim();
1345 if name_part.is_empty() { continue; }
1346
1347 let (original_name, local_name) = if let Some(pos) = name_part.find(" as ") {
1349 let orig = name_part[..pos].trim();
1350 let local = name_part[pos + 4..].trim();
1351 let orig = orig.strip_prefix("type ").unwrap_or(orig);
1352 (orig, local)
1353 } else {
1354 let name = name_part.strip_prefix("type ").unwrap_or(name_part);
1355 (name, name)
1356 };
1357
1358 if original_name.is_empty() || local_name.is_empty() { continue; }
1359
1360 if let Some(target_ids) = symbol_table.get(original_name) {
1361 let target = target_ids.iter().find(|id| {
1362 entity_map.get(*id).map_or(false, |e| {
1363 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1364 let stem = strip_file_ext(stem);
1365 stem == source_module
1366 })
1367 });
1368 if let Some(target_id) = target {
1369 import_table.insert(
1370 (file_path.clone(), local_name.to_string()),
1371 target_id.clone(),
1372 );
1373 }
1374 }
1375 }
1376 }
1377
1378 for cap in JS_DEFAULT_RE.captures_iter(&content) {
1379 let local_name = cap.get(1).unwrap().as_str();
1380 let module_path = cap.get(2).unwrap().as_str();
1381 let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1382 let source_module = strip_js_ext(source_module);
1383
1384 if let Some(target_ids) = symbol_table.get(local_name) {
1385 let target = target_ids.iter().find(|id| {
1386 entity_map.get(*id).map_or(false, |e| {
1387 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1388 let stem = strip_file_ext(stem);
1389 stem == source_module
1390 })
1391 });
1392 if let Some(target_id) = target {
1393 import_table.insert(
1394 (file_path.clone(), local_name.to_string()),
1395 target_id.clone(),
1396 );
1397 }
1398 }
1399 }
1400 }
1401
1402 let is_rust = file_path.ends_with(".rs");
1405 if is_rust {
1406 static RUST_USE_SIMPLE_RE: LazyLock<Regex> = LazyLock::new(|| {
1407 Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*;").unwrap()
1411 });
1412 static RUST_USE_GROUP_RE: LazyLock<Regex> = LazyLock::new(|| {
1413 Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)::\{([^}]+)\}\s*;").unwrap()
1416 });
1417
1418 for cap in RUST_USE_SIMPLE_RE.captures_iter(&content) {
1421 let full_path_str = cap.get(1).unwrap().as_str();
1422 let parts: Vec<&str> = full_path_str.split("::").collect();
1423 if parts.is_empty() { continue; }
1424
1425 let imported_name = parts[parts.len() - 1];
1427 let source_module = if parts.len() >= 2 {
1429 parts[parts.len() - 2]
1430 } else {
1431 parts[0]
1432 };
1433
1434 resolve_rust_import(
1435 file_path, imported_name, source_module,
1436 symbol_table, entity_map, &mut import_table,
1437 );
1438 }
1439
1440 for cap in RUST_USE_GROUP_RE.captures_iter(&content) {
1441 let module_path = cap.get(1).unwrap().as_str();
1442 let names_str = cap.get(2).unwrap().as_str();
1443
1444 let source_module = module_path.rsplit("::").next().unwrap_or(module_path);
1446
1447 for name_part in names_str.split(',') {
1448 let name_part = name_part.trim();
1449 let (original, local) = if let Some(pos) = name_part.find(" as ") {
1451 (&name_part[..pos], name_part[pos + 4..].trim())
1452 } else {
1453 (name_part, name_part)
1454 };
1455 let original = original.trim();
1456 let local = local.trim();
1457 if original.is_empty() || local.is_empty() { continue; }
1458
1459 resolve_rust_import(
1460 file_path, original, source_module,
1461 symbol_table, entity_map, &mut import_table,
1462 );
1463 if local != original {
1465 if let Some(target) = import_table.get(&(file_path.clone(), original.to_string())).cloned() {
1466 import_table.insert(
1467 (file_path.clone(), local.to_string()),
1468 target,
1469 );
1470 }
1471 }
1472 }
1473 }
1474 }
1475
1476 let is_go = file_path.ends_with(".go");
1479 if is_go {
1480 static GO_IMPORT_RE: LazyLock<Regex> = LazyLock::new(|| {
1481 Regex::new(r#"(?m)"([^"]+)""#).unwrap()
1482 });
1483
1484 let import_section = extract_go_import_section(&content);
1486 for cap in GO_IMPORT_RE.captures_iter(&import_section) {
1487 let import_path = cap.get(1).unwrap().as_str();
1488 let pkg_name = import_path.rsplit('/').next().unwrap_or(import_path);
1489
1490 for (name, target_ids) in symbol_table.iter() {
1492 for target_id in target_ids {
1493 if let Some(entity) = entity_map.get(target_id) {
1494 let stem = entity.file_path.rsplit('/').next().unwrap_or(&entity.file_path);
1495 let stem = strip_file_ext(stem);
1496 if stem == pkg_name || entity.file_path.contains(&format!("{}/", pkg_name)) {
1498 import_table.insert(
1499 (file_path.clone(), name.clone()),
1500 target_id.clone(),
1501 );
1502 }
1503 }
1504 }
1505 }
1506 }
1507 }
1508 }
1509
1510 import_table
1511}
1512
1513fn resolve_rust_import(
1516 file_path: &str,
1517 imported_name: &str,
1518 source_module: &str,
1519 symbol_table: &HashMap<String, Vec<String>>,
1520 entity_map: &HashMap<String, EntityInfo>,
1521 import_table: &mut HashMap<(String, String), String>,
1522) {
1523 if let Some(target_ids) = symbol_table.get(imported_name) {
1524 let target = target_ids.iter().find(|id| {
1525 entity_map.get(*id).map_or(false, |e| {
1526 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1527 let stem = strip_file_ext(stem);
1528 stem == source_module
1529 })
1530 });
1531 if let Some(target_id) = target {
1532 import_table.insert(
1533 (file_path.to_string(), imported_name.to_string()),
1534 target_id.clone(),
1535 );
1536 }
1537 }
1538}
1539
1540fn extract_go_import_section(content: &str) -> String {
1542 let mut result = String::new();
1543 let mut in_import_block = false;
1544 for line in content.lines() {
1545 let trimmed = line.trim();
1546 if trimmed.starts_with("import (") {
1547 in_import_block = true;
1548 continue;
1549 }
1550 if trimmed.starts_with("import \"") || trimmed.starts_with("import `") {
1551 result.push_str(trimmed);
1552 result.push('\n');
1553 continue;
1554 }
1555 if in_import_block {
1556 if trimmed == ")" {
1557 in_import_block = false;
1558 } else {
1559 result.push_str(trimmed);
1560 result.push('\n');
1561 }
1562 }
1563 }
1564 result
1565}
1566
1567fn strip_js_ext(s: &str) -> &str {
1569 s.strip_suffix(".js")
1570 .or_else(|| s.strip_suffix(".ts"))
1571 .or_else(|| s.strip_suffix(".jsx"))
1572 .or_else(|| s.strip_suffix(".tsx"))
1573 .unwrap_or(s)
1574}
1575
1576fn strip_file_ext(s: &str) -> &str {
1578 s.strip_suffix(".py")
1579 .or_else(|| s.strip_suffix(".ts"))
1580 .or_else(|| s.strip_suffix(".js"))
1581 .or_else(|| s.strip_suffix(".tsx"))
1582 .or_else(|| s.strip_suffix(".jsx"))
1583 .or_else(|| s.strip_suffix(".rs"))
1584 .unwrap_or(s)
1585}
1586
1587fn strip_comments_and_strings(content: &str) -> String {
1590 let bytes = content.as_bytes();
1591 let len = bytes.len();
1592 let mut result = vec![b' '; len];
1593 let mut i = 0;
1594
1595 while i < len {
1596 if i + 2 < len && bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1598 i += 3;
1599 while i + 2 < len {
1600 if bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1601 i += 3;
1602 break;
1603 }
1604 i += 1;
1605 }
1606 continue;
1607 }
1608 if i + 2 < len && bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1609 i += 3;
1610 while i + 2 < len {
1611 if bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1612 i += 3;
1613 break;
1614 }
1615 i += 1;
1616 }
1617 continue;
1618 }
1619 if bytes[i] == b'"' {
1621 i += 1;
1622 while i < len {
1623 if bytes[i] == b'\\' { i += 2; continue; }
1624 if bytes[i] == b'"' { i += 1; break; }
1625 i += 1;
1626 }
1627 continue;
1628 }
1629 if bytes[i] == b'\'' {
1631 i += 1;
1632 while i < len {
1633 if bytes[i] == b'\\' { i += 2; continue; }
1634 if bytes[i] == b'\'' { i += 1; break; }
1635 i += 1;
1636 }
1637 continue;
1638 }
1639 if bytes[i] == b'#' {
1641 while i < len && bytes[i] != b'\n' { i += 1; }
1642 continue;
1643 }
1644 if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'/' {
1646 while i < len && bytes[i] != b'\n' { i += 1; }
1647 continue;
1648 }
1649 if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
1651 i += 2;
1652 while i + 1 < len {
1653 if bytes[i] == b'*' && bytes[i + 1] == b'/' { i += 2; break; }
1654 i += 1;
1655 }
1656 continue;
1657 }
1658 result[i] = bytes[i];
1660 i += 1;
1661 }
1662
1663 String::from_utf8_lossy(&result).into_owned()
1664}
1665
1666fn extract_dot_chains<'a>(content: &'a str) -> Vec<(&'a str, &'a str)> {
1669 static DOT_CHAIN_RE: LazyLock<Regex> = LazyLock::new(|| {
1670 Regex::new(r"\b([A-Za-z_]\w*)\.([A-Za-z_]\w*)").unwrap()
1671 });
1672
1673 let mut chains = Vec::new();
1674 let mut seen: HashSet<(&str, &str)> = HashSet::new();
1675 for cap in DOT_CHAIN_RE.captures_iter(content) {
1676 let receiver = cap.get(1).unwrap().as_str();
1677 let member = cap.get(2).unwrap().as_str();
1678 if seen.insert((receiver, member)) {
1679 chains.push((receiver, member));
1680 }
1681 }
1682 chains
1683}
1684
1685fn extract_references_from_content<'a>(content: &'a str, own_name: &str) -> Vec<&'a str> {
1689 let stripped = strip_comments_and_strings(content);
1692 let stripped_words: HashSet<&str> = stripped
1693 .split(|c: char| !c.is_alphanumeric() && c != '_')
1694 .filter(|w| !w.is_empty())
1695 .collect();
1696
1697 let mut refs = Vec::new();
1698 let mut seen: HashSet<&str> = HashSet::new();
1699
1700 for word in content.split(|c: char| !c.is_alphanumeric() && c != '_') {
1701 if word.is_empty() || word == own_name {
1702 continue;
1703 }
1704 if is_keyword(word) || word.len() < 2 {
1705 continue;
1706 }
1707 if word.starts_with(|c: char| c.is_lowercase()) && word.len() < 3 {
1709 continue;
1710 }
1711 if !word.starts_with(|c: char| c.is_alphabetic() || c == '_') {
1712 continue;
1713 }
1714 if is_common_local_name(word) {
1716 continue;
1717 }
1718 if !stripped_words.contains(word) {
1720 continue;
1721 }
1722 if seen.insert(word) {
1723 refs.push(word);
1724 }
1725 }
1726
1727 refs
1728}
1729
1730static COMMON_LOCAL_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1731 [
1732 "result", "results", "data", "config", "value", "values",
1733 "item", "items", "input", "output", "args", "opts",
1734 "name", "path", "file", "line", "count", "index",
1735 "temp", "prev", "next", "curr", "current", "node",
1736 "left", "right", "root", "head", "tail", "body",
1737 "text", "content", "source", "target", "entry",
1738 "error", "errors", "message", "response", "request",
1739 "context", "state", "props", "event", "handler",
1740 "callback", "options", "params", "query", "list",
1741 "base", "info", "meta", "kind", "mode", "flag",
1742 "size", "length", "width", "height", "start", "stop",
1743 "begin", "done", "found", "status", "code",
1744 ].into_iter().collect()
1745});
1746
1747fn is_common_local_name(word: &str) -> bool {
1750 COMMON_LOCAL_NAMES.contains(word)
1751}
1752
1753fn infer_ref_type(content: &str, ref_name: &str) -> RefType {
1755 let bytes = content.as_bytes();
1758 let name_bytes = ref_name.as_bytes();
1759 let mut search_start = 0;
1760 while let Some(rel_pos) = content[search_start..].find(ref_name) {
1761 let pos = search_start + rel_pos;
1762 let after = pos + name_bytes.len();
1763 if after < bytes.len() && bytes[after] == b'(' {
1765 let is_boundary = pos == 0 || {
1767 let prev = bytes[pos - 1];
1768 !prev.is_ascii_alphanumeric() && prev != b'_'
1769 };
1770 if is_boundary {
1771 return RefType::Calls;
1772 }
1773 }
1774 search_start = pos + 1;
1776 while search_start < content.len() && !content.is_char_boundary(search_start) {
1777 search_start += 1;
1778 }
1779 }
1780
1781 for line in content.lines() {
1783 let trimmed = line.trim();
1784 if (trimmed.starts_with("import ") || trimmed.starts_with("use ")
1785 || trimmed.starts_with("from ") || trimmed.starts_with("require("))
1786 && trimmed.contains(ref_name)
1787 {
1788 return RefType::Imports;
1789 }
1790 }
1791
1792 RefType::TypeRef
1794}
1795
1796static KEYWORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1797 [
1798 "if", "else", "for", "while", "do", "switch", "case", "break",
1800 "continue", "return", "try", "catch", "finally", "throw",
1801 "new", "delete", "typeof", "instanceof", "in", "of",
1802 "true", "false", "null", "undefined", "void", "this",
1803 "super", "class", "extends", "implements", "interface",
1804 "enum", "const", "let", "var", "function", "async",
1805 "await", "yield", "import", "export", "default", "from",
1806 "as", "static", "public", "private", "protected",
1807 "abstract", "final", "override",
1808 "fn", "pub", "mod", "use", "struct", "impl", "trait",
1810 "where", "type", "self", "Self", "mut", "ref", "match",
1811 "loop", "move", "unsafe", "extern", "crate", "dyn",
1812 "def", "elif", "except", "raise", "with",
1814 "pass", "lambda", "nonlocal", "global", "assert",
1815 "True", "False", "and", "or", "not", "is",
1816 "func", "package", "range", "select", "chan", "go",
1818 "defer", "map", "make", "append", "len", "cap",
1819 "auto", "register", "volatile", "sizeof", "typedef",
1821 "template", "typename", "namespace", "virtual", "inline",
1822 "constexpr", "nullptr", "noexcept", "explicit", "friend",
1823 "operator", "using", "cout", "endl", "cerr", "cin",
1824 "printf", "scanf", "malloc", "free", "NULL", "include",
1825 "ifdef", "ifndef", "endif", "define", "pragma",
1826 "end", "then", "elsif", "unless", "until",
1828 "begin", "rescue", "ensure", "when", "require",
1829 "attr_accessor", "attr_reader", "attr_writer",
1830 "puts", "nil", "module", "defined",
1831 "internal", "sealed", "readonly",
1833 "partial", "delegate", "event", "params", "out",
1834 "object", "decimal", "sbyte", "ushort", "uint",
1835 "ulong", "nint", "nuint", "dynamic",
1836 "get", "set", "value", "init", "record",
1837 "string", "number", "boolean", "int", "float", "double",
1839 "bool", "char", "byte", "i8", "i16", "i32", "i64",
1840 "u8", "u16", "u32", "u64", "f32", "f64", "usize",
1841 "isize", "str", "String", "Vec", "Option", "Result",
1842 "Box", "Arc", "Rc", "HashMap", "HashSet", "Some",
1843 "Ok", "Err",
1844 ].into_iter().collect()
1845});
1846
1847fn is_keyword(word: &str) -> bool {
1848 KEYWORDS.contains(word)
1849}
1850
1851#[cfg(test)]
1852mod tests {
1853 use super::*;
1854 use crate::git::types::{FileChange, FileStatus};
1855 use std::io::Write;
1856 use tempfile::TempDir;
1857
1858 fn create_test_repo() -> (TempDir, ParserRegistry) {
1859 let dir = TempDir::new().unwrap();
1860 let registry = crate::parser::plugins::create_default_registry();
1861 (dir, registry)
1862 }
1863
1864 fn write_file(dir: &Path, name: &str, content: &str) {
1865 let path = dir.join(name);
1866 if let Some(parent) = path.parent() {
1867 std::fs::create_dir_all(parent).unwrap();
1868 }
1869 let mut f = std::fs::File::create(path).unwrap();
1870 f.write_all(content.as_bytes()).unwrap();
1871 }
1872
1873 #[test]
1874 fn test_incremental_add_file() {
1875 let (dir, registry) = create_test_repo();
1876 let root = dir.path();
1877
1878 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1880 write_file(root, "b.ts", "export function bar() { return 1; }\n");
1881
1882 let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
1883 assert_eq!(graph.entities.len(), 2);
1884
1885 write_file(root, "c.ts", "export function baz() { return foo(); }\n");
1887 graph.update_from_changes(
1888 &[FileChange {
1889 file_path: "c.ts".into(),
1890 status: FileStatus::Added,
1891 old_file_path: None,
1892 before_content: None,
1893 after_content: None, }],
1895 root,
1896 ®istry,
1897 );
1898
1899 assert_eq!(graph.entities.len(), 3);
1900 assert!(graph.entities.contains_key("c.ts::function::baz"));
1901 let baz_deps = graph.get_dependencies("c.ts::function::baz");
1903 assert!(
1904 baz_deps.iter().any(|d| d.name == "foo"),
1905 "baz should depend on foo. Deps: {:?}",
1906 baz_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1907 );
1908 }
1909
1910 #[test]
1911 fn test_incremental_delete_file() {
1912 let (dir, registry) = create_test_repo();
1913 let root = dir.path();
1914
1915 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1916 write_file(root, "b.ts", "export function bar() { return 1; }\n");
1917
1918 let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
1919 assert_eq!(graph.entities.len(), 2);
1920
1921 graph.update_from_changes(
1923 &[FileChange {
1924 file_path: "b.ts".into(),
1925 status: FileStatus::Deleted,
1926 old_file_path: None,
1927 before_content: None,
1928 after_content: None,
1929 }],
1930 root,
1931 ®istry,
1932 );
1933
1934 assert_eq!(graph.entities.len(), 1);
1935 assert!(!graph.entities.contains_key("b.ts::function::bar"));
1936 let foo_deps = graph.get_dependencies("a.ts::function::foo");
1938 assert!(
1939 foo_deps.is_empty(),
1940 "foo's deps should be empty after bar deleted. Deps: {:?}",
1941 foo_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1942 );
1943 }
1944
1945 #[test]
1946 fn test_incremental_modify_file() {
1947 let (dir, registry) = create_test_repo();
1948 let root = dir.path();
1949
1950 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1951 write_file(root, "b.ts", "export function bar() { return 1; }\nexport function baz() { return 2; }\n");
1952
1953 let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
1954 assert_eq!(graph.entities.len(), 3);
1955
1956 write_file(root, "a.ts", "export function foo() { return baz(); }\n");
1958 graph.update_from_changes(
1959 &[FileChange {
1960 file_path: "a.ts".into(),
1961 status: FileStatus::Modified,
1962 old_file_path: None,
1963 before_content: None,
1964 after_content: None,
1965 }],
1966 root,
1967 ®istry,
1968 );
1969
1970 assert_eq!(graph.entities.len(), 3);
1971 let foo_deps = graph.get_dependencies("a.ts::function::foo");
1973 let dep_names: Vec<&str> = foo_deps.iter().map(|d| d.name.as_str()).collect();
1974 assert!(dep_names.contains(&"baz"), "foo should depend on baz after modification. Deps: {:?}", dep_names);
1975 assert!(!dep_names.contains(&"bar"), "foo should no longer depend on bar. Deps: {:?}", dep_names);
1976 }
1977
1978 #[test]
1979 fn test_incremental_with_content() {
1980 let (dir, registry) = create_test_repo();
1981 let root = dir.path();
1982
1983 write_file(root, "a.ts", "export function foo() { return 1; }\n");
1984 let (mut graph, _) = EntityGraph::build(root, &["a.ts".into()], ®istry);
1985 assert_eq!(graph.entities.len(), 1);
1986
1987 graph.update_from_changes(
1989 &[FileChange {
1990 file_path: "b.ts".into(),
1991 status: FileStatus::Added,
1992 old_file_path: None,
1993 before_content: None,
1994 after_content: Some("export function bar() { return foo(); }\n".into()),
1995 }],
1996 root,
1997 ®istry,
1998 );
1999
2000 assert_eq!(graph.entities.len(), 2);
2001 let bar_deps = graph.get_dependencies("b.ts::function::bar");
2002 assert!(bar_deps.iter().any(|d| d.name == "foo"));
2003 }
2004
2005 #[test]
2006 fn test_extract_references() {
2007 let content = "function processData(input) {\n const result = validateInput(input);\n return transform(result);\n}";
2008 let refs = extract_references_from_content(content, "processData");
2009 assert!(refs.contains(&"validateInput"));
2010 assert!(refs.contains(&"transform"));
2011 assert!(!refs.contains(&"processData")); }
2013
2014 #[test]
2015 fn test_extract_references_skips_keywords() {
2016 let content = "function foo() { if (true) { return false; } }";
2017 let refs = extract_references_from_content(content, "foo");
2018 assert!(!refs.contains(&"if"));
2019 assert!(!refs.contains(&"true"));
2020 assert!(!refs.contains(&"return"));
2021 assert!(!refs.contains(&"false"));
2022 }
2023
2024 #[test]
2025 fn test_infer_ref_type_call() {
2026 assert_eq!(
2027 infer_ref_type("validateInput(data)", "validateInput"),
2028 RefType::Calls,
2029 );
2030 }
2031
2032 #[test]
2033 fn test_infer_ref_type_type() {
2034 assert_eq!(
2035 infer_ref_type("let x: MyType = something", "MyType"),
2036 RefType::TypeRef,
2037 );
2038 }
2039
2040 #[test]
2041 fn test_infer_ref_type_multibyte_utf8() {
2042 assert_eq!(
2044 infer_ref_type("let café = foo(x)", "foo"),
2045 RefType::Calls,
2046 );
2047 assert_eq!(
2048 infer_ref_type("class HandicapfrPublicationFieldsEnum:\n É = 1\n bar()", "bar"),
2049 RefType::Calls,
2050 );
2051 assert_eq!(
2053 infer_ref_type("// 日本語コメント\nlet x = 1", "missing"),
2054 RefType::TypeRef,
2055 );
2056 }
2057
2058 #[test]
2059 fn test_dot_chain_self_resolution() {
2060 let (dir, registry) = create_test_repo();
2061 let root = dir.path();
2062
2063 write_file(root, "service.py", "\
2064class MyService:
2065 def process(self):
2066 return self.validate()
2067
2068 def validate(self):
2069 return True
2070");
2071
2072 let (graph, _) = EntityGraph::build(root, &["service.py".into()], ®istry);
2073
2074 let process_id = graph.entities.keys()
2076 .find(|id| id.contains("process"))
2077 .expect("process entity should exist");
2078 let deps = graph.get_dependencies(process_id);
2079 assert!(
2080 deps.iter().any(|d| d.name == "validate"),
2081 "process should depend on validate via self.validate(). Deps: {:?}",
2082 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2083 );
2084 }
2085
2086 #[test]
2087 fn test_dot_chain_this_resolution() {
2088 let (dir, registry) = create_test_repo();
2089 let root = dir.path();
2090
2091 write_file(root, "service.ts", "\
2092class UserService {
2093 process() {
2094 return this.validate();
2095 }
2096 validate() {
2097 return true;
2098 }
2099}
2100");
2101
2102 let (graph, _) = EntityGraph::build(root, &["service.ts".into()], ®istry);
2103
2104 let process_id = graph.entities.keys()
2105 .find(|id| id.contains("process"))
2106 .expect("process entity should exist");
2107 let deps = graph.get_dependencies(process_id);
2108 assert!(
2109 deps.iter().any(|d| d.name == "validate"),
2110 "process should depend on validate via this.validate(). Deps: {:?}",
2111 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2112 );
2113 }
2114
2115 #[test]
2116 fn test_dot_chain_class_static() {
2117 let (dir, registry) = create_test_repo();
2118 let root = dir.path();
2119
2120 write_file(root, "utils.ts", "\
2121class MathUtils {
2122 static compute() { return 1; }
2123}
2124function caller() { return MathUtils.compute(); }
2125");
2126
2127 let (graph, _) = EntityGraph::build(root, &["utils.ts".into()], ®istry);
2128
2129 let caller_id = graph.entities.keys()
2130 .find(|id| id.contains("caller"))
2131 .expect("caller entity should exist");
2132 let deps = graph.get_dependencies(caller_id);
2133 assert!(
2134 deps.iter().any(|d| d.name == "compute"),
2135 "caller should depend on compute via MathUtils.compute(). Deps: {:?}",
2136 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2137 );
2138 }
2139
2140 #[test]
2141 fn test_js_ts_import_resolution() {
2142 let (dir, registry) = create_test_repo();
2143 let root = dir.path();
2144
2145 write_file(root, "helper.ts", "\
2146export function helper() { return 1; }
2147");
2148 write_file(root, "main.ts", "\
2149import { helper } from './helper';
2150export function main() { return helper(); }
2151");
2152
2153 let (graph, _) = EntityGraph::build(
2154 root,
2155 &["helper.ts".into(), "main.ts".into()],
2156 ®istry,
2157 );
2158
2159 let main_id = graph.entities.keys()
2160 .find(|id| id.contains("main"))
2161 .expect("main entity should exist");
2162 let deps = graph.get_dependencies(main_id);
2163 assert!(
2164 deps.iter().any(|d| d.name == "helper"),
2165 "main should depend on helper via JS import. Deps: {:?}",
2166 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2167 );
2168 }
2169
2170 #[test]
2171 fn test_dot_chain_no_false_edges() {
2172 let (dir, registry) = create_test_repo();
2173 let root = dir.path();
2174
2175 write_file(root, "a.py", "\
2178class ClassA:
2179 def run(self):
2180 return self.process()
2181
2182 def process(self):
2183 return 1
2184");
2185 write_file(root, "b.py", "\
2186class ClassB:
2187 def process(self):
2188 return 2
2189");
2190
2191 let (graph, _) = EntityGraph::build(
2192 root,
2193 &["a.py".into(), "b.py".into()],
2194 ®istry,
2195 );
2196
2197 let run_id = graph.entities.keys()
2198 .find(|id| id.contains("run"))
2199 .expect("run entity should exist");
2200 let deps = graph.get_dependencies(run_id);
2201 for dep in &deps {
2203 if dep.name == "process" {
2204 assert!(
2205 dep.file_path == "a.py",
2206 "run's process dep should be in a.py, not {}",
2207 dep.file_path
2208 );
2209 }
2210 }
2211 }
2212
2213 #[test]
2214 fn test_dot_chain_fallback() {
2215 let (dir, registry) = create_test_repo();
2216 let root = dir.path();
2217
2218 write_file(root, "app.ts", "\
2222export function helper() { return 1; }
2223export function caller() {
2224 const val = helper();
2225 return val;
2226}
2227");
2228
2229 let (graph, _) = EntityGraph::build(root, &["app.ts".into()], ®istry);
2230
2231 let caller_id = graph.entities.keys()
2232 .find(|id| id.contains("caller"))
2233 .expect("caller entity should exist");
2234 let deps = graph.get_dependencies(caller_id);
2235 assert!(
2236 deps.iter().any(|d| d.name == "helper"),
2237 "caller should still resolve helper via bag-of-words. Deps: {:?}",
2238 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2239 );
2240 }
2241
2242}