1use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::LazyLock;
13
14use rayon::prelude::*;
15use regex::Regex;
16use serde::{Deserialize, Serialize};
17
18use crate::git::types::{FileChange, FileStatus};
19use crate::model::entity::SemanticEntity;
20use crate::parser::registry::ParserRegistry;
21use crate::parser::scope_resolve;
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
25#[serde(rename_all = "camelCase")]
26pub struct EntityRef {
27 pub from_entity: String,
28 pub to_entity: String,
29 pub ref_type: RefType,
30}
31
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34#[serde(rename_all = "lowercase")]
35pub enum RefType {
36 Calls,
38 TypeRef,
40 Imports,
42}
43
44#[derive(Debug)]
46pub struct EntityGraph {
47 pub entities: HashMap<String, EntityInfo>,
49 pub edges: Vec<EntityRef>,
51 pub dependents: HashMap<String, Vec<String>>,
53 pub dependencies: HashMap<String, Vec<String>>,
55}
56
57#[derive(Debug, Clone, Serialize, Deserialize)]
59#[serde(rename_all = "camelCase")]
60pub struct EntityInfo {
61 pub id: String,
62 pub name: String,
63 pub entity_type: String,
64 pub file_path: String,
65 #[serde(skip_serializing_if = "Option::is_none")]
66 pub parent_id: Option<String>,
67 pub start_line: usize,
68 pub end_line: usize,
69}
70
71impl EntityGraph {
72 pub fn from_parts(entities: HashMap<String, EntityInfo>, edges: Vec<EntityRef>) -> Self {
74 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
75 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
76 for edge in &edges {
77 dependents
78 .entry(edge.to_entity.clone())
79 .or_default()
80 .push(edge.from_entity.clone());
81 dependencies
82 .entry(edge.from_entity.clone())
83 .or_default()
84 .push(edge.to_entity.clone());
85 }
86 EntityGraph {
87 entities,
88 edges,
89 dependents,
90 dependencies,
91 }
92 }
93
94 pub fn build(
100 root: &Path,
101 file_paths: &[String],
102 registry: &ParserRegistry,
103 ) -> (Self, Vec<SemanticEntity>) {
104 let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = file_paths
107 .par_iter()
108 .filter_map(|file_path| {
109 let full_path = root.join(file_path);
110 let content = std::fs::read_to_string(&full_path).ok()?;
111 let plugin = registry.get_plugin_with_content(file_path, &content)?;
112 let (entities, tree) = plugin.extract_entities_with_tree(&content, file_path);
113 let parsed = tree.map(|t| (file_path.clone(), content, t));
114 Some((entities, parsed))
115 })
116 .collect();
117
118 let mut all_entities: Vec<SemanticEntity> = Vec::new();
119 let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
120 for (entities, parsed) in per_file {
121 all_entities.extend(entities);
122 if let Some(p) = parsed {
123 parsed_files.push(p);
124 }
125 }
126
127 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
129 let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
130
131 for entity in &all_entities {
132 symbol_table
133 .entry(entity.name.clone())
134 .or_default()
135 .push(entity.id.clone());
136
137 entity_map.insert(
138 entity.id.clone(),
139 EntityInfo {
140 id: entity.id.clone(),
141 name: entity.name.clone(),
142 entity_type: entity.entity_type.clone(),
143 file_path: entity.file_path.clone(),
144 parent_id: entity.parent_id.clone(),
145 start_line: entity.start_line,
146 end_line: entity.end_line,
147 },
148 );
149 }
150
151 let parent_child_pairs: HashSet<(&str, &str)> = all_entities
153 .iter()
154 .filter_map(|e| {
155 e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
156 })
157 .collect();
158
159 let class_child_names: HashSet<(&str, &str)> = all_entities
161 .iter()
162 .filter_map(|e| {
163 e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
164 })
165 .collect();
166
167 let class_entity_names: HashSet<&str> = all_entities
170 .iter()
171 .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
172 .map(|e| e.name.as_str())
173 .collect();
174
175 let id_to_name: HashMap<&str, &str> = all_entities
177 .iter()
178 .map(|e| (e.id.as_str(), e.name.as_str()))
179 .collect();
180
181 let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
184 let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
185
186 for entity in &all_entities {
187 if let Some(ref pid) = entity.parent_id {
188 if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
189 if class_entity_names.contains(parent_name) {
190 enclosing_class.insert(entity.id.as_str(), parent_name);
191 class_members
192 .entry(parent_name)
193 .or_default()
194 .push((entity.name.as_str(), entity.id.as_str()));
195 }
196 }
197 }
198 }
199
200 let import_table = build_import_table(root, file_paths, &symbol_table, &entity_map);
203
204 let has_scope_lang = file_paths.iter().any(|f| {
206 let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
207 crate::parser::plugins::code::languages::get_language_config(ext)
208 .and_then(|c| c.scope_resolve)
209 .is_some()
210 });
211 let (scope_edges, scope_resolved_entities) = if has_scope_lang {
212 let result = scope_resolve::resolve_with_scopes(root, file_paths, &all_entities, &entity_map, Some(parsed_files));
213 let resolved_entity_ids: HashSet<String> = result.edges.iter()
214 .map(|(from, _, _)| from.clone())
215 .collect();
216 (result.edges, resolved_entity_ids)
217 } else {
218 (vec![], HashSet::new())
219 };
220
221 let resolved_refs: Vec<(String, String, RefType)> = all_entities
226 .par_iter()
227 .flat_map(|entity| {
228 if scope_resolved_entities.contains(&entity.id) {
230 return vec![];
231 }
232
233 let mut entity_edges = Vec::new();
234 let mut consumed_words: HashSet<String> = HashSet::new();
235
236 let stripped = strip_comments_and_strings(&entity.content);
238 let dot_chains = extract_dot_chains(&stripped);
239
240 for (receiver, member) in &dot_chains {
241 if *receiver == "self" || *receiver == "this" {
242 if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
244 if let Some(members) = class_members.get(class_name) {
245 for (n, tid) in members {
246 if *n == *member && *tid != entity.id.as_str() {
247 entity_edges.push((
248 entity.id.clone(),
249 tid.to_string(),
250 RefType::Calls,
251 ));
252 consumed_words.insert(member.to_string());
253 break;
254 }
255 }
256 }
257 }
258 } else if class_entity_names.contains(*receiver) {
259 if let Some(members) = class_members.get(*receiver) {
261 for (n, tid) in members {
262 if *n == *member {
263 entity_edges.push((
264 entity.id.clone(),
265 tid.to_string(),
266 RefType::Calls,
267 ));
268 consumed_words.insert(member.to_string());
269 consumed_words.insert(receiver.to_string());
270 break;
271 }
272 }
273 }
274 }
275 }
277
278 let refs = extract_references_from_content(&entity.content, &entity.name);
280 for ref_name in refs {
281 if consumed_words.contains(ref_name) {
282 continue;
283 }
284
285 if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
287 continue;
288 }
289
290 let import_key = (entity.file_path.clone(), ref_name.to_string());
293 if let Some(import_target_id) = import_table.get(&import_key) {
294 if import_target_id != &entity.id
295 && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
296 && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
297 {
298 let ref_type = infer_ref_type(&entity.content, &ref_name);
299 entity_edges.push((
300 entity.id.clone(),
301 import_target_id.clone(),
302 ref_type,
303 ));
304 }
305 continue;
306 }
307
308 if let Some(target_ids) = symbol_table.get(ref_name) {
309 let target = target_ids
312 .iter()
313 .find(|id| {
314 *id != &entity.id
315 && entity_map
316 .get(*id)
317 .map_or(false, |e| e.file_path == entity.file_path)
318 });
319
320 if let Some(target_id) = target {
321 if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
323 || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
324 {
325 continue;
326 }
327 let ref_type = infer_ref_type(&entity.content, &ref_name);
328 entity_edges.push((
329 entity.id.clone(),
330 target_id.clone(),
331 ref_type,
332 ));
333 }
334 }
335 }
336 entity_edges
337 })
338 .collect();
339
340 let mut all_resolved: Vec<(String, String, RefType)> = scope_edges;
342 all_resolved.extend(resolved_refs);
343 let mut seen_edges: HashSet<(String, String)> = HashSet::new();
344 all_resolved.retain(|e| seen_edges.insert((e.0.clone(), e.1.clone())));
345
346 let mut edges: Vec<EntityRef> = Vec::with_capacity(all_resolved.len());
348 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
349 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
350
351 for (from_entity, to_entity, ref_type) in all_resolved {
352 dependents
353 .entry(to_entity.clone())
354 .or_default()
355 .push(from_entity.clone());
356 dependencies
357 .entry(from_entity.clone())
358 .or_default()
359 .push(to_entity.clone());
360 edges.push(EntityRef {
361 from_entity,
362 to_entity,
363 ref_type,
364 });
365 }
366
367 let graph = EntityGraph {
368 entities: entity_map,
369 edges,
370 dependents,
371 dependencies,
372 };
373
374 (graph, all_entities)
375 }
376
377 pub fn build_incremental(
383 root: &Path,
384 stale_files: &[String],
385 all_file_paths: &[String],
386 cached_entities: Vec<SemanticEntity>,
387 cached_edges: Vec<EntityRef>,
388 stale_file_cached_entities: Vec<SemanticEntity>,
389 registry: &ParserRegistry,
390 ) -> (Self, Vec<SemanticEntity>) {
391 let stale_set: HashSet<&str> = stale_files.iter().map(|s| s.as_str()).collect();
393
394 let per_file: Vec<(Vec<SemanticEntity>, Option<(String, String, tree_sitter::Tree)>)> = stale_files
396 .par_iter()
397 .filter_map(|file_path| {
398 let full_path = root.join(file_path);
399 let content = std::fs::read_to_string(&full_path).ok()?;
400 let plugin = registry.get_plugin_with_content(file_path, &content)?;
401 let (entities, tree) = plugin.extract_entities_with_tree(&content, file_path);
402 let parsed = tree.map(|t| (file_path.clone(), content, t));
403 Some((entities, parsed))
404 })
405 .collect();
406
407 let mut new_entities: Vec<SemanticEntity> = Vec::new();
408 let mut parsed_files: Vec<(String, String, tree_sitter::Tree)> = Vec::new();
409 for (entities, parsed) in per_file {
410 new_entities.extend(entities);
411 if let Some(p) = parsed {
412 parsed_files.push(p);
413 }
414 }
415
416 let cached_hashes: HashMap<&str, &str> = stale_file_cached_entities
419 .iter()
420 .map(|e| (e.id.as_str(), e.content_hash.as_str()))
421 .collect();
422
423 let mut truly_changed_ids: HashSet<String> = HashSet::new();
425 let mut content_clean_ids: HashSet<String> = HashSet::new();
426 for entity in &new_entities {
427 match cached_hashes.get(entity.id.as_str()) {
428 Some(old_hash) if *old_hash == entity.content_hash.as_str() => {
429 content_clean_ids.insert(entity.id.clone());
430 }
431 _ => {
432 truly_changed_ids.insert(entity.id.clone());
434 }
435 }
436 }
437
438 let new_entity_ids: HashSet<&str> = new_entities.iter().map(|e| e.id.as_str()).collect();
440 let deleted_ids: HashSet<&str> = stale_file_cached_entities
441 .iter()
442 .filter(|e| !new_entity_ids.contains(e.id.as_str()))
443 .map(|e| e.id.as_str())
444 .collect();
445
446 let all_entities: Vec<SemanticEntity> = cached_entities
448 .into_iter()
449 .chain(new_entities.into_iter())
450 .collect();
451
452 let mut affected_clean_ids: HashSet<String> = HashSet::new();
454 for edge in &cached_edges {
455 let to_truly_changed = truly_changed_ids.contains(&edge.to_entity)
456 || deleted_ids.contains(edge.to_entity.as_str());
457 if to_truly_changed && !stale_set.contains(
458 all_entities.iter()
459 .find(|e| e.id == edge.from_entity)
460 .map(|e| e.file_path.as_str())
461 .unwrap_or("")
462 ) {
463 affected_clean_ids.insert(edge.from_entity.clone());
464 }
465 }
466
467 let stale_entity_ids: HashSet<&str> = all_entities
469 .iter()
470 .filter(|e| stale_set.contains(e.file_path.as_str()))
471 .map(|e| e.id.as_str())
472 .collect();
473
474 let kept_edges: Vec<EntityRef> = cached_edges
478 .into_iter()
479 .filter(|e| {
480 let from_stale = stale_entity_ids.contains(e.from_entity.as_str());
481 let to_stale = stale_entity_ids.contains(e.to_entity.as_str());
482
483 if !from_stale && !to_stale && !affected_clean_ids.contains(&e.from_entity) {
484 return true;
486 }
487 if content_clean_ids.contains(&e.from_entity)
488 && !truly_changed_ids.contains(&e.to_entity)
489 && !deleted_ids.contains(e.to_entity.as_str())
490 && !affected_clean_ids.contains(&e.from_entity)
491 {
492 return true;
494 }
495 false
496 })
497 .collect();
498
499 let needs_resolution: HashSet<&str> = all_entities
502 .iter()
503 .filter(|e| {
504 truly_changed_ids.contains(&e.id)
505 || affected_clean_ids.contains(&e.id)
506 })
507 .map(|e| e.id.as_str())
508 .collect();
509
510 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
515 let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
516
517 for entity in &all_entities {
518 symbol_table
519 .entry(entity.name.clone())
520 .or_default()
521 .push(entity.id.clone());
522 entity_map.insert(
523 entity.id.clone(),
524 EntityInfo {
525 id: entity.id.clone(),
526 name: entity.name.clone(),
527 entity_type: entity.entity_type.clone(),
528 file_path: entity.file_path.clone(),
529 parent_id: entity.parent_id.clone(),
530 start_line: entity.start_line,
531 end_line: entity.end_line,
532 },
533 );
534 }
535
536 let parent_child_pairs: HashSet<(&str, &str)> = all_entities
538 .iter()
539 .filter_map(|e| {
540 e.parent_id.as_ref().map(|pid| (pid.as_str(), e.id.as_str()))
541 })
542 .collect();
543
544 let class_child_names: HashSet<(&str, &str)> = all_entities
545 .iter()
546 .filter_map(|e| {
547 e.parent_id.as_ref().map(|pid| (pid.as_str(), e.name.as_str()))
548 })
549 .collect();
550
551 let class_entity_names: HashSet<&str> = all_entities
552 .iter()
553 .filter(|e| matches!(e.entity_type.as_str(), "class" | "struct" | "interface" | "class_type"))
554 .map(|e| e.name.as_str())
555 .collect();
556
557 let id_to_name: HashMap<&str, &str> = all_entities
558 .iter()
559 .map(|e| (e.id.as_str(), e.name.as_str()))
560 .collect();
561
562 let mut enclosing_class: HashMap<&str, &str> = HashMap::new();
563 let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
564
565 for entity in &all_entities {
566 if let Some(ref pid) = entity.parent_id {
567 if let Some(&parent_name) = id_to_name.get(pid.as_str()) {
568 if class_entity_names.contains(parent_name) {
569 enclosing_class.insert(entity.id.as_str(), parent_name);
570 class_members
571 .entry(parent_name)
572 .or_default()
573 .push((entity.name.as_str(), entity.id.as_str()));
574 }
575 }
576 }
577 }
578
579 let import_table = build_import_table(root, all_file_paths, &symbol_table, &entity_map);
581
582 let resolve_file_paths: Vec<String> = all_file_paths
584 .iter()
585 .filter(|f| {
586 stale_set.contains(f.as_str()) || all_entities.iter().any(|e| {
588 e.file_path == **f && affected_clean_ids.contains(&e.id)
589 })
590 })
591 .cloned()
592 .collect();
593
594 let has_scope_lang = resolve_file_paths.iter().any(|f| {
595 let ext = f.rfind('.').map(|i| &f[i..]).unwrap_or("");
596 crate::parser::plugins::code::languages::get_language_config(ext)
597 .and_then(|c| c.scope_resolve)
598 .is_some()
599 });
600 let (scope_edges, scope_resolved_entities) = if has_scope_lang {
601 let resolve_set: HashSet<&str> = resolve_file_paths.iter().map(|s| s.as_str()).collect();
603 let relevant_parsed: Vec<(String, String, tree_sitter::Tree)> = parsed_files
604 .into_iter()
605 .filter(|(fp, _, _)| resolve_set.contains(fp.as_str()))
606 .collect();
607 let pre = if relevant_parsed.is_empty() { None } else { Some(relevant_parsed) };
608 let result = scope_resolve::resolve_with_scopes(root, &resolve_file_paths, &all_entities, &entity_map, pre);
609 let resolved_entity_ids: HashSet<String> = result.edges.iter()
610 .map(|(from, _, _)| from.clone())
611 .collect();
612 (result.edges, resolved_entity_ids)
613 } else {
614 (vec![], HashSet::new())
615 };
616
617 let resolved_refs: Vec<(String, String, RefType)> = all_entities
619 .par_iter()
620 .filter(|e| needs_resolution.contains(e.id.as_str()))
621 .flat_map(|entity| {
622 if scope_resolved_entities.contains(&entity.id) {
623 return vec![];
624 }
625
626 let mut entity_edges = Vec::new();
627 let mut consumed_words: HashSet<String> = HashSet::new();
628
629 let stripped = strip_comments_and_strings(&entity.content);
631 let dot_chains = extract_dot_chains(&stripped);
632
633 for (receiver, member) in &dot_chains {
634 if *receiver == "self" || *receiver == "this" {
635 if let Some(class_name) = enclosing_class.get(entity.id.as_str()) {
636 if let Some(members) = class_members.get(class_name) {
637 for (n, tid) in members {
638 if *n == *member && *tid != entity.id.as_str() {
639 entity_edges.push((
640 entity.id.clone(),
641 tid.to_string(),
642 RefType::Calls,
643 ));
644 consumed_words.insert(member.to_string());
645 break;
646 }
647 }
648 }
649 }
650 } else if class_entity_names.contains(*receiver) {
651 if let Some(members) = class_members.get(*receiver) {
652 for (n, tid) in members {
653 if *n == *member {
654 entity_edges.push((
655 entity.id.clone(),
656 tid.to_string(),
657 RefType::Calls,
658 ));
659 consumed_words.insert(member.to_string());
660 consumed_words.insert(receiver.to_string());
661 break;
662 }
663 }
664 }
665 }
666 }
667
668 let refs = extract_references_from_content(&entity.content, &entity.name);
670 for ref_name in refs {
671 if consumed_words.contains(ref_name) {
672 continue;
673 }
674 if class_child_names.contains(&(entity.id.as_str(), ref_name)) {
675 continue;
676 }
677
678 let import_key = (entity.file_path.clone(), ref_name.to_string());
679 if let Some(import_target_id) = import_table.get(&import_key) {
680 if import_target_id != &entity.id
681 && !parent_child_pairs.contains(&(entity.id.as_str(), import_target_id.as_str()))
682 && !parent_child_pairs.contains(&(import_target_id.as_str(), entity.id.as_str()))
683 {
684 let ref_type = infer_ref_type(&entity.content, &ref_name);
685 entity_edges.push((
686 entity.id.clone(),
687 import_target_id.clone(),
688 ref_type,
689 ));
690 }
691 continue;
692 }
693
694 if let Some(target_ids) = symbol_table.get(ref_name) {
695 let target = target_ids
696 .iter()
697 .find(|id| {
698 *id != &entity.id
699 && entity_map
700 .get(*id)
701 .map_or(false, |e| e.file_path == entity.file_path)
702 });
703
704 if let Some(target_id) = target {
705 if parent_child_pairs.contains(&(entity.id.as_str(), target_id.as_str()))
706 || parent_child_pairs.contains(&(target_id.as_str(), entity.id.as_str()))
707 {
708 continue;
709 }
710 let ref_type = infer_ref_type(&entity.content, &ref_name);
711 entity_edges.push((
712 entity.id.clone(),
713 target_id.clone(),
714 ref_type,
715 ));
716 }
717 }
718 }
719 entity_edges
720 })
721 .collect();
722
723 let mut all_resolved: Vec<(String, String, RefType)> = scope_edges;
725 all_resolved.extend(resolved_refs);
726 let mut seen_edges: HashSet<(String, String)> = HashSet::new();
727 all_resolved.retain(|e| seen_edges.insert((e.0.clone(), e.1.clone())));
728
729 let mut edges: Vec<EntityRef> = Vec::with_capacity(kept_edges.len() + all_resolved.len());
731 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
732 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
733
734 let mut all_edge_pairs: HashSet<(String, String)> = HashSet::new();
736
737 for edge in kept_edges {
739 all_edge_pairs.insert((edge.from_entity.clone(), edge.to_entity.clone()));
740 dependents
741 .entry(edge.to_entity.clone())
742 .or_default()
743 .push(edge.from_entity.clone());
744 dependencies
745 .entry(edge.from_entity.clone())
746 .or_default()
747 .push(edge.to_entity.clone());
748 edges.push(edge);
749 }
750
751 for (from_entity, to_entity, ref_type) in all_resolved {
753 if !all_edge_pairs.insert((from_entity.clone(), to_entity.clone())) {
754 continue;
755 }
756 dependents
757 .entry(to_entity.clone())
758 .or_default()
759 .push(from_entity.clone());
760 dependencies
761 .entry(from_entity.clone())
762 .or_default()
763 .push(to_entity.clone());
764 edges.push(EntityRef {
765 from_entity,
766 to_entity,
767 ref_type,
768 });
769 }
770
771 let graph = EntityGraph {
772 entities: entity_map,
773 edges,
774 dependents,
775 dependencies,
776 };
777
778 (graph, all_entities)
779 }
780
781 pub fn get_dependents(&self, entity_id: &str) -> Vec<&EntityInfo> {
783 self.dependents
784 .get(entity_id)
785 .map(|ids| {
786 ids.iter()
787 .filter_map(|id| self.entities.get(id))
788 .collect()
789 })
790 .unwrap_or_default()
791 }
792
793 pub fn get_dependencies(&self, entity_id: &str) -> Vec<&EntityInfo> {
795 self.dependencies
796 .get(entity_id)
797 .map(|ids| {
798 ids.iter()
799 .filter_map(|id| self.entities.get(id))
800 .collect()
801 })
802 .unwrap_or_default()
803 }
804
805 pub fn impact_analysis(&self, entity_id: &str) -> Vec<&EntityInfo> {
808 self.impact_analysis_capped(entity_id, 10_000)
809 }
810
811 pub fn impact_analysis_capped(&self, entity_id: &str, max_visited: usize) -> Vec<&EntityInfo> {
814 let mut visited: HashSet<&str> = HashSet::new();
815 let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
816 let mut result = Vec::new();
817
818 let start_key = match self.entities.get_key_value(entity_id) {
819 Some((k, _)) => k.as_str(),
820 None => return result,
821 };
822
823 queue.push_back(start_key);
824 visited.insert(start_key);
825
826 while let Some(current) = queue.pop_front() {
827 if result.len() >= max_visited {
828 break;
829 }
830 if let Some(deps) = self.dependents.get(current) {
831 for dep in deps {
832 if visited.insert(dep.as_str()) {
833 if let Some(info) = self.entities.get(dep.as_str()) {
834 result.push(info);
835 }
836 queue.push_back(dep.as_str());
837 if result.len() >= max_visited {
838 break;
839 }
840 }
841 }
842 }
843 }
844
845 result
846 }
847
848 pub fn impact_count(&self, entity_id: &str, max_count: usize) -> usize {
851 let mut visited: HashSet<&str> = HashSet::new();
852 let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
853 let mut count = 0;
854
855 let start_key = match self.entities.get_key_value(entity_id) {
857 Some((k, _)) => k.as_str(),
858 None => return 0,
859 };
860
861 queue.push_back(start_key);
862 visited.insert(start_key);
863
864 while let Some(current) = queue.pop_front() {
865 if count >= max_count {
866 break;
867 }
868 if let Some(deps) = self.dependents.get(current) {
869 for dep in deps {
870 if visited.insert(dep.as_str()) {
871 count += 1;
872 queue.push_back(dep.as_str());
873 if count >= max_count {
874 break;
875 }
876 }
877 }
878 }
879 }
880
881 count
882 }
883
884 pub fn filter_test_entities(&self, entities: &[crate::model::entity::SemanticEntity]) -> HashSet<String> {
887 let mut test_ids = HashSet::new();
888 for entity in entities {
889 if is_test_entity(entity) {
890 test_ids.insert(entity.id.clone());
891 }
892 }
893 test_ids
894 }
895
896 pub fn test_impact(
899 &self,
900 entity_id: &str,
901 all_entities: &[crate::model::entity::SemanticEntity],
902 ) -> Vec<&EntityInfo> {
903 let test_ids = self.filter_test_entities(all_entities);
904 let impact = self.impact_analysis(entity_id);
905 impact
906 .into_iter()
907 .filter(|info| test_ids.contains(&info.id))
908 .collect()
909 }
910
911 pub fn update_from_changes(
922 &mut self,
923 changed_files: &[FileChange],
924 root: &Path,
925 registry: &ParserRegistry,
926 ) {
927 let mut affected_files: HashSet<String> = HashSet::new();
928 let mut new_entities: Vec<SemanticEntity> = Vec::new();
929
930 for change in changed_files {
931 affected_files.insert(change.file_path.clone());
932 if let Some(ref old_path) = change.old_file_path {
933 affected_files.insert(old_path.clone());
934 }
935
936 match change.status {
937 FileStatus::Deleted => {
938 self.remove_entities_for_file(&change.file_path);
939 }
940 FileStatus::Renamed => {
941 if let Some(ref old_path) = change.old_file_path {
943 self.remove_entities_for_file(old_path);
944 }
945 if let Some(entities) = self.extract_file_entities(
947 &change.file_path,
948 change.after_content.as_deref(),
949 root,
950 registry,
951 ) {
952 new_entities.extend(entities);
953 }
954 }
955 FileStatus::Added | FileStatus::Modified => {
956 self.remove_entities_for_file(&change.file_path);
958 if let Some(entities) = self.extract_file_entities(
960 &change.file_path,
961 change.after_content.as_deref(),
962 root,
963 registry,
964 ) {
965 new_entities.extend(entities);
966 }
967 }
968 }
969 }
970
971 for entity in &new_entities {
973 self.entities.insert(
974 entity.id.clone(),
975 EntityInfo {
976 id: entity.id.clone(),
977 name: entity.name.clone(),
978 entity_type: entity.entity_type.clone(),
979 file_path: entity.file_path.clone(),
980 parent_id: entity.parent_id.clone(),
981 start_line: entity.start_line,
982 end_line: entity.end_line,
983 },
984 );
985 }
986
987 let symbol_table = self.build_symbol_table();
989
990 for entity in &new_entities {
992 self.resolve_entity_references(entity, &symbol_table);
993 }
994
995 let changed_entity_names: HashSet<String> = new_entities
998 .iter()
999 .map(|e| e.name.clone())
1000 .collect();
1001
1002 let entities_to_recheck: Vec<String> = self
1004 .entities
1005 .values()
1006 .filter(|e| !affected_files.contains(&e.file_path))
1007 .filter(|e| {
1008 self.dependencies
1009 .get(&e.id)
1010 .map_or(false, |deps| {
1011 deps.iter().any(|dep_id| {
1012 self.entities
1013 .get(dep_id)
1014 .map_or(false, |dep| changed_entity_names.contains(&dep.name))
1015 })
1016 })
1017 })
1018 .map(|e| e.id.clone())
1019 .collect();
1020
1021 let _ = entities_to_recheck; }
1028
1029 fn extract_file_entities(
1031 &self,
1032 file_path: &str,
1033 content: Option<&str>,
1034 root: &Path,
1035 registry: &ParserRegistry,
1036 ) -> Option<Vec<SemanticEntity>> {
1037 let content = if let Some(c) = content {
1038 c.to_string()
1039 } else {
1040 let full_path = root.join(file_path);
1041 std::fs::read_to_string(&full_path).ok()?
1042 };
1043
1044 let plugin = registry.get_plugin_with_content(file_path, &content)?;
1045
1046 Some(plugin.extract_entities(&content, file_path))
1047 }
1048
1049 fn remove_entities_for_file(&mut self, file_path: &str) {
1051 let ids_to_remove: Vec<String> = self
1053 .entities
1054 .values()
1055 .filter(|e| e.file_path == file_path)
1056 .map(|e| e.id.clone())
1057 .collect();
1058
1059 let id_set: HashSet<&str> = ids_to_remove.iter().map(|s| s.as_str()).collect();
1060
1061 for id in &ids_to_remove {
1063 self.entities.remove(id);
1064 }
1065
1066 self.edges
1068 .retain(|e| !id_set.contains(e.from_entity.as_str()) && !id_set.contains(e.to_entity.as_str()));
1069
1070 for id in &ids_to_remove {
1072 if let Some(deps) = self.dependencies.remove(id) {
1074 for dep in &deps {
1076 if let Some(dependents) = self.dependents.get_mut(dep) {
1077 dependents.retain(|d| d != id);
1078 }
1079 }
1080 }
1081 if let Some(deps) = self.dependents.remove(id) {
1083 for dep in &deps {
1085 if let Some(dependencies) = self.dependencies.get_mut(dep) {
1086 dependencies.retain(|d| d != id);
1087 }
1088 }
1089 }
1090 }
1091 }
1092
1093 fn build_symbol_table(&self) -> HashMap<String, Vec<String>> {
1095 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::new();
1096 for entity in self.entities.values() {
1097 symbol_table
1098 .entry(entity.name.clone())
1099 .or_default()
1100 .push(entity.id.clone());
1101 }
1102 symbol_table
1103 }
1104
1105 fn resolve_entity_references(
1107 &mut self,
1108 entity: &SemanticEntity,
1109 symbol_table: &HashMap<String, Vec<String>>,
1110 ) {
1111 let refs = extract_references_from_content(&entity.content, &entity.name);
1112
1113 for ref_name in refs {
1114 if let Some(target_ids) = symbol_table.get(ref_name) {
1115 let target = target_ids
1116 .iter()
1117 .find(|id| {
1118 *id != &entity.id
1119 && self
1120 .entities
1121 .get(*id)
1122 .map_or(false, |e| e.file_path == entity.file_path)
1123 })
1124 .or_else(|| target_ids.iter().find(|id| *id != &entity.id));
1125
1126 if let Some(target_id) = target {
1127 let ref_type = infer_ref_type(&entity.content, &ref_name);
1128 self.edges.push(EntityRef {
1129 from_entity: entity.id.clone(),
1130 to_entity: target_id.clone(),
1131 ref_type,
1132 });
1133 self.dependents
1134 .entry(target_id.clone())
1135 .or_default()
1136 .push(entity.id.clone());
1137 self.dependencies
1138 .entry(entity.id.clone())
1139 .or_default()
1140 .push(target_id.clone());
1141 }
1142 }
1143 }
1144 }
1145}
1146
1147fn is_test_entity(entity: &crate::model::entity::SemanticEntity) -> bool {
1149 let name = &entity.name;
1150 let path = &entity.file_path;
1151 let content = &entity.content;
1152
1153 if name.starts_with("test_") || name.starts_with("Test") || name.ends_with("_test") || name.ends_with("Test") {
1155 return true;
1156 }
1157 if name.starts_with("it_") || name.starts_with("describe_") || name.starts_with("spec_") {
1158 return true;
1159 }
1160
1161 let path_lower = path.to_lowercase();
1163 let in_test_file = path_lower.contains("/test/")
1164 || path_lower.contains("/tests/")
1165 || path_lower.contains("/spec/")
1166 || path_lower.contains("_test.")
1167 || path_lower.contains(".test.")
1168 || path_lower.contains("_spec.")
1169 || path_lower.contains(".spec.");
1170
1171 let has_test_marker = content.contains("#[test]")
1173 || content.contains("#[cfg(test)]")
1174 || content.contains("@Test")
1175 || content.contains("@pytest")
1176 || content.contains("@test")
1177 || content.contains("describe(")
1178 || content.contains("it(")
1179 || content.contains("test(");
1180
1181 in_test_file && has_test_marker
1182}
1183
1184fn build_import_table(
1189 root: &Path,
1190 file_paths: &[String],
1191 symbol_table: &HashMap<String, Vec<String>>,
1192 entity_map: &HashMap<String, EntityInfo>,
1193) -> HashMap<(String, String), String> {
1194 let mut import_table: HashMap<(String, String), String> = HashMap::new();
1195
1196 for file_path in file_paths {
1197 let full_path = root.join(file_path);
1198 let content = match std::fs::read_to_string(&full_path) {
1199 Ok(c) => c,
1200 Err(_) => continue,
1201 };
1202
1203 let mut logical_lines: Vec<String> = Vec::new();
1206 let mut current_line = String::new();
1207 let mut in_parens = false;
1208
1209 for line in content.lines() {
1210 let trimmed = line.trim();
1211 if in_parens {
1212 let clean = trimmed.trim_end_matches(|c: char| c == ')' || c == ',');
1214 let clean = clean.split('#').next().unwrap_or(clean).trim();
1215 if !clean.is_empty() && clean != "(" {
1216 current_line.push_str(", ");
1217 current_line.push_str(clean);
1218 }
1219 if trimmed.contains(')') {
1220 in_parens = false;
1221 logical_lines.push(std::mem::take(&mut current_line));
1222 }
1223 } else if trimmed.starts_with("from ") && trimmed.contains(" import ") {
1224 if trimmed.contains('(') && !trimmed.contains(')') {
1225 in_parens = true;
1227 let before_paren = trimmed.split('(').next().unwrap_or(trimmed);
1229 current_line = before_paren.trim().to_string();
1230 if let Some(after) = trimmed.split('(').nth(1) {
1232 let after = after.trim().trim_end_matches(')').trim();
1233 if !after.is_empty() {
1234 current_line.push(' ');
1235 current_line.push_str(after);
1236 }
1237 }
1238 } else {
1239 logical_lines.push(trimmed.to_string());
1240 }
1241 }
1242 }
1243
1244 for logical_line in &logical_lines {
1245 if let Some(rest) = logical_line.strip_prefix("from ") {
1246 let import_match = rest.find(" import ")
1248 .map(|pos| (pos, 8))
1249 .or_else(|| rest.find(" import,").map(|pos| (pos, 8)));
1250 if let Some((import_pos, skip)) = import_match {
1251 let module_path = &rest[..import_pos];
1252 let names_str = &rest[import_pos + skip..];
1253
1254 let source_module = module_path
1255 .trim_start_matches('.')
1256 .rsplit('.')
1257 .next()
1258 .unwrap_or(module_path.trim_start_matches('.'));
1259
1260 for name_part in names_str.split(',') {
1261 let name_part = name_part.trim();
1262 let imported_name = name_part.split_whitespace().next().unwrap_or(name_part);
1263 let imported_name = imported_name.trim_matches(|c: char| c == '(' || c == ')' || c == ',');
1265 if imported_name.is_empty() {
1266 continue;
1267 }
1268
1269 if let Some(target_ids) = symbol_table.get(imported_name) {
1270 let target = target_ids.iter().find(|id| {
1271 entity_map.get(*id).map_or(false, |e| {
1272 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1273 let stem = stem.strip_suffix(".py")
1274 .or_else(|| stem.strip_suffix(".ts"))
1275 .or_else(|| stem.strip_suffix(".js"))
1276 .or_else(|| stem.strip_suffix(".rs"))
1277 .unwrap_or(stem);
1278 stem == source_module
1279 })
1280 });
1281 if let Some(target_id) = target {
1282 import_table.insert(
1283 (file_path.clone(), imported_name.to_string()),
1284 target_id.clone(),
1285 );
1286 }
1287 }
1288 }
1289 }
1290 }
1291 }
1292
1293 let is_js_ts = file_path.ends_with(".js") || file_path.ends_with(".ts")
1296 || file_path.ends_with(".jsx") || file_path.ends_with(".tsx");
1297
1298 if is_js_ts {
1299 static JS_NAMED_RE: LazyLock<Regex> = LazyLock::new(|| {
1300 Regex::new(r#"import\s*\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#).unwrap()
1301 });
1302 static JS_DEFAULT_RE: LazyLock<Regex> = LazyLock::new(|| {
1303 Regex::new(r#"import\s+(?:type\s+)?([A-Za-z_]\w*)\s+from\s*['"]([^'"]+)['"]"#).unwrap()
1304 });
1305
1306 for cap in JS_NAMED_RE.captures_iter(&content) {
1307 let names_str = cap.get(1).unwrap().as_str();
1308 let module_path = cap.get(2).unwrap().as_str();
1309 let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1310 let source_module = strip_js_ext(source_module);
1311
1312 for name_part in names_str.split(',') {
1313 let name_part = name_part.trim();
1314 if name_part.is_empty() { continue; }
1315
1316 let (original_name, local_name) = if let Some(pos) = name_part.find(" as ") {
1318 let orig = name_part[..pos].trim();
1319 let local = name_part[pos + 4..].trim();
1320 let orig = orig.strip_prefix("type ").unwrap_or(orig);
1321 (orig, local)
1322 } else {
1323 let name = name_part.strip_prefix("type ").unwrap_or(name_part);
1324 (name, name)
1325 };
1326
1327 if original_name.is_empty() || local_name.is_empty() { continue; }
1328
1329 if let Some(target_ids) = symbol_table.get(original_name) {
1330 let target = target_ids.iter().find(|id| {
1331 entity_map.get(*id).map_or(false, |e| {
1332 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1333 let stem = strip_file_ext(stem);
1334 stem == source_module
1335 })
1336 });
1337 if let Some(target_id) = target {
1338 import_table.insert(
1339 (file_path.clone(), local_name.to_string()),
1340 target_id.clone(),
1341 );
1342 }
1343 }
1344 }
1345 }
1346
1347 for cap in JS_DEFAULT_RE.captures_iter(&content) {
1348 let local_name = cap.get(1).unwrap().as_str();
1349 let module_path = cap.get(2).unwrap().as_str();
1350 let source_module = module_path.rsplit('/').next().unwrap_or(module_path);
1351 let source_module = strip_js_ext(source_module);
1352
1353 if let Some(target_ids) = symbol_table.get(local_name) {
1354 let target = target_ids.iter().find(|id| {
1355 entity_map.get(*id).map_or(false, |e| {
1356 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1357 let stem = strip_file_ext(stem);
1358 stem == source_module
1359 })
1360 });
1361 if let Some(target_id) = target {
1362 import_table.insert(
1363 (file_path.clone(), local_name.to_string()),
1364 target_id.clone(),
1365 );
1366 }
1367 }
1368 }
1369 }
1370
1371 let is_rust = file_path.ends_with(".rs");
1374 if is_rust {
1375 static RUST_USE_SIMPLE_RE: LazyLock<Regex> = LazyLock::new(|| {
1376 Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*;").unwrap()
1380 });
1381 static RUST_USE_GROUP_RE: LazyLock<Regex> = LazyLock::new(|| {
1382 Regex::new(r"(?m)^\s*use\s+(?:(?:crate|super|self)::)?([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)::\{([^}]+)\}\s*;").unwrap()
1385 });
1386
1387 for cap in RUST_USE_SIMPLE_RE.captures_iter(&content) {
1390 let full_path_str = cap.get(1).unwrap().as_str();
1391 let parts: Vec<&str> = full_path_str.split("::").collect();
1392 if parts.is_empty() { continue; }
1393
1394 let imported_name = parts[parts.len() - 1];
1396 let source_module = if parts.len() >= 2 {
1398 parts[parts.len() - 2]
1399 } else {
1400 parts[0]
1401 };
1402
1403 resolve_rust_import(
1404 file_path, imported_name, source_module,
1405 symbol_table, entity_map, &mut import_table,
1406 );
1407 }
1408
1409 for cap in RUST_USE_GROUP_RE.captures_iter(&content) {
1410 let module_path = cap.get(1).unwrap().as_str();
1411 let names_str = cap.get(2).unwrap().as_str();
1412
1413 let source_module = module_path.rsplit("::").next().unwrap_or(module_path);
1415
1416 for name_part in names_str.split(',') {
1417 let name_part = name_part.trim();
1418 let (original, local) = if let Some(pos) = name_part.find(" as ") {
1420 (&name_part[..pos], name_part[pos + 4..].trim())
1421 } else {
1422 (name_part, name_part)
1423 };
1424 let original = original.trim();
1425 let local = local.trim();
1426 if original.is_empty() || local.is_empty() { continue; }
1427
1428 resolve_rust_import(
1429 file_path, original, source_module,
1430 symbol_table, entity_map, &mut import_table,
1431 );
1432 if local != original {
1434 if let Some(target) = import_table.get(&(file_path.clone(), original.to_string())).cloned() {
1435 import_table.insert(
1436 (file_path.clone(), local.to_string()),
1437 target,
1438 );
1439 }
1440 }
1441 }
1442 }
1443 }
1444
1445 let is_go = file_path.ends_with(".go");
1448 if is_go {
1449 static GO_IMPORT_RE: LazyLock<Regex> = LazyLock::new(|| {
1450 Regex::new(r#"(?m)"([^"]+)""#).unwrap()
1451 });
1452
1453 let import_section = extract_go_import_section(&content);
1455 for cap in GO_IMPORT_RE.captures_iter(&import_section) {
1456 let import_path = cap.get(1).unwrap().as_str();
1457 let pkg_name = import_path.rsplit('/').next().unwrap_or(import_path);
1458
1459 for (name, target_ids) in symbol_table.iter() {
1461 for target_id in target_ids {
1462 if let Some(entity) = entity_map.get(target_id) {
1463 let stem = entity.file_path.rsplit('/').next().unwrap_or(&entity.file_path);
1464 let stem = strip_file_ext(stem);
1465 if stem == pkg_name || entity.file_path.contains(&format!("{}/", pkg_name)) {
1467 import_table.insert(
1468 (file_path.clone(), name.clone()),
1469 target_id.clone(),
1470 );
1471 }
1472 }
1473 }
1474 }
1475 }
1476 }
1477 }
1478
1479 import_table
1480}
1481
1482fn resolve_rust_import(
1485 file_path: &str,
1486 imported_name: &str,
1487 source_module: &str,
1488 symbol_table: &HashMap<String, Vec<String>>,
1489 entity_map: &HashMap<String, EntityInfo>,
1490 import_table: &mut HashMap<(String, String), String>,
1491) {
1492 if let Some(target_ids) = symbol_table.get(imported_name) {
1493 let target = target_ids.iter().find(|id| {
1494 entity_map.get(*id).map_or(false, |e| {
1495 let stem = e.file_path.rsplit('/').next().unwrap_or(&e.file_path);
1496 let stem = strip_file_ext(stem);
1497 stem == source_module
1498 })
1499 });
1500 if let Some(target_id) = target {
1501 import_table.insert(
1502 (file_path.to_string(), imported_name.to_string()),
1503 target_id.clone(),
1504 );
1505 }
1506 }
1507}
1508
1509fn extract_go_import_section(content: &str) -> String {
1511 let mut result = String::new();
1512 let mut in_import_block = false;
1513 for line in content.lines() {
1514 let trimmed = line.trim();
1515 if trimmed.starts_with("import (") {
1516 in_import_block = true;
1517 continue;
1518 }
1519 if trimmed.starts_with("import \"") || trimmed.starts_with("import `") {
1520 result.push_str(trimmed);
1521 result.push('\n');
1522 continue;
1523 }
1524 if in_import_block {
1525 if trimmed == ")" {
1526 in_import_block = false;
1527 } else {
1528 result.push_str(trimmed);
1529 result.push('\n');
1530 }
1531 }
1532 }
1533 result
1534}
1535
1536fn strip_js_ext(s: &str) -> &str {
1538 s.strip_suffix(".js")
1539 .or_else(|| s.strip_suffix(".ts"))
1540 .or_else(|| s.strip_suffix(".jsx"))
1541 .or_else(|| s.strip_suffix(".tsx"))
1542 .unwrap_or(s)
1543}
1544
1545fn strip_file_ext(s: &str) -> &str {
1547 s.strip_suffix(".py")
1548 .or_else(|| s.strip_suffix(".ts"))
1549 .or_else(|| s.strip_suffix(".js"))
1550 .or_else(|| s.strip_suffix(".tsx"))
1551 .or_else(|| s.strip_suffix(".jsx"))
1552 .or_else(|| s.strip_suffix(".rs"))
1553 .unwrap_or(s)
1554}
1555
1556fn strip_comments_and_strings(content: &str) -> String {
1559 let bytes = content.as_bytes();
1560 let len = bytes.len();
1561 let mut result = vec![b' '; len];
1562 let mut i = 0;
1563
1564 while i < len {
1565 if i + 2 < len && bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1567 i += 3;
1568 while i + 2 < len {
1569 if bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
1570 i += 3;
1571 break;
1572 }
1573 i += 1;
1574 }
1575 continue;
1576 }
1577 if i + 2 < len && bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1578 i += 3;
1579 while i + 2 < len {
1580 if bytes[i] == b'\'' && bytes[i + 1] == b'\'' && bytes[i + 2] == b'\'' {
1581 i += 3;
1582 break;
1583 }
1584 i += 1;
1585 }
1586 continue;
1587 }
1588 if bytes[i] == b'"' {
1590 i += 1;
1591 while i < len {
1592 if bytes[i] == b'\\' { i += 2; continue; }
1593 if bytes[i] == b'"' { i += 1; break; }
1594 i += 1;
1595 }
1596 continue;
1597 }
1598 if bytes[i] == b'\'' {
1600 i += 1;
1601 while i < len {
1602 if bytes[i] == b'\\' { i += 2; continue; }
1603 if bytes[i] == b'\'' { i += 1; break; }
1604 i += 1;
1605 }
1606 continue;
1607 }
1608 if bytes[i] == b'#' {
1610 while i < len && bytes[i] != b'\n' { i += 1; }
1611 continue;
1612 }
1613 if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'/' {
1615 while i < len && bytes[i] != b'\n' { i += 1; }
1616 continue;
1617 }
1618 if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
1620 i += 2;
1621 while i + 1 < len {
1622 if bytes[i] == b'*' && bytes[i + 1] == b'/' { i += 2; break; }
1623 i += 1;
1624 }
1625 continue;
1626 }
1627 result[i] = bytes[i];
1629 i += 1;
1630 }
1631
1632 String::from_utf8_lossy(&result).into_owned()
1633}
1634
1635fn extract_dot_chains<'a>(content: &'a str) -> Vec<(&'a str, &'a str)> {
1638 static DOT_CHAIN_RE: LazyLock<Regex> = LazyLock::new(|| {
1639 Regex::new(r"\b([A-Za-z_]\w*)\.([A-Za-z_]\w*)").unwrap()
1640 });
1641
1642 let mut chains = Vec::new();
1643 let mut seen: HashSet<(&str, &str)> = HashSet::new();
1644 for cap in DOT_CHAIN_RE.captures_iter(content) {
1645 let receiver = cap.get(1).unwrap().as_str();
1646 let member = cap.get(2).unwrap().as_str();
1647 if seen.insert((receiver, member)) {
1648 chains.push((receiver, member));
1649 }
1650 }
1651 chains
1652}
1653
1654fn extract_references_from_content<'a>(content: &'a str, own_name: &str) -> Vec<&'a str> {
1658 let stripped = strip_comments_and_strings(content);
1661 let stripped_words: HashSet<&str> = stripped
1662 .split(|c: char| !c.is_alphanumeric() && c != '_')
1663 .filter(|w| !w.is_empty())
1664 .collect();
1665
1666 let mut refs = Vec::new();
1667 let mut seen: HashSet<&str> = HashSet::new();
1668
1669 for word in content.split(|c: char| !c.is_alphanumeric() && c != '_') {
1670 if word.is_empty() || word == own_name {
1671 continue;
1672 }
1673 if is_keyword(word) || word.len() < 2 {
1674 continue;
1675 }
1676 if word.starts_with(|c: char| c.is_lowercase()) && word.len() < 3 {
1678 continue;
1679 }
1680 if !word.starts_with(|c: char| c.is_alphabetic() || c == '_') {
1681 continue;
1682 }
1683 if is_common_local_name(word) {
1685 continue;
1686 }
1687 if !stripped_words.contains(word) {
1689 continue;
1690 }
1691 if seen.insert(word) {
1692 refs.push(word);
1693 }
1694 }
1695
1696 refs
1697}
1698
1699static COMMON_LOCAL_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1700 [
1701 "result", "results", "data", "config", "value", "values",
1702 "item", "items", "input", "output", "args", "opts",
1703 "name", "path", "file", "line", "count", "index",
1704 "temp", "prev", "next", "curr", "current", "node",
1705 "left", "right", "root", "head", "tail", "body",
1706 "text", "content", "source", "target", "entry",
1707 "error", "errors", "message", "response", "request",
1708 "context", "state", "props", "event", "handler",
1709 "callback", "options", "params", "query", "list",
1710 "base", "info", "meta", "kind", "mode", "flag",
1711 "size", "length", "width", "height", "start", "stop",
1712 "begin", "done", "found", "status", "code",
1713 ].into_iter().collect()
1714});
1715
1716fn is_common_local_name(word: &str) -> bool {
1719 COMMON_LOCAL_NAMES.contains(word)
1720}
1721
1722fn infer_ref_type(content: &str, ref_name: &str) -> RefType {
1724 let bytes = content.as_bytes();
1727 let name_bytes = ref_name.as_bytes();
1728 let mut search_start = 0;
1729 while let Some(rel_pos) = content[search_start..].find(ref_name) {
1730 let pos = search_start + rel_pos;
1731 let after = pos + name_bytes.len();
1732 if after < bytes.len() && bytes[after] == b'(' {
1734 let is_boundary = pos == 0 || {
1736 let prev = bytes[pos - 1];
1737 !prev.is_ascii_alphanumeric() && prev != b'_'
1738 };
1739 if is_boundary {
1740 return RefType::Calls;
1741 }
1742 }
1743 search_start = pos + 1;
1745 while search_start < content.len() && !content.is_char_boundary(search_start) {
1746 search_start += 1;
1747 }
1748 }
1749
1750 for line in content.lines() {
1752 let trimmed = line.trim();
1753 if (trimmed.starts_with("import ") || trimmed.starts_with("use ")
1754 || trimmed.starts_with("from ") || trimmed.starts_with("require("))
1755 && trimmed.contains(ref_name)
1756 {
1757 return RefType::Imports;
1758 }
1759 }
1760
1761 RefType::TypeRef
1763}
1764
1765static KEYWORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
1766 [
1767 "if", "else", "for", "while", "do", "switch", "case", "break",
1769 "continue", "return", "try", "catch", "finally", "throw",
1770 "new", "delete", "typeof", "instanceof", "in", "of",
1771 "true", "false", "null", "undefined", "void", "this",
1772 "super", "class", "extends", "implements", "interface",
1773 "enum", "const", "let", "var", "function", "async",
1774 "await", "yield", "import", "export", "default", "from",
1775 "as", "static", "public", "private", "protected",
1776 "abstract", "final", "override",
1777 "fn", "pub", "mod", "use", "struct", "impl", "trait",
1779 "where", "type", "self", "Self", "mut", "ref", "match",
1780 "loop", "move", "unsafe", "extern", "crate", "dyn",
1781 "def", "elif", "except", "raise", "with",
1783 "pass", "lambda", "nonlocal", "global", "assert",
1784 "True", "False", "and", "or", "not", "is",
1785 "func", "package", "range", "select", "chan", "go",
1787 "defer", "map", "make", "append", "len", "cap",
1788 "auto", "register", "volatile", "sizeof", "typedef",
1790 "template", "typename", "namespace", "virtual", "inline",
1791 "constexpr", "nullptr", "noexcept", "explicit", "friend",
1792 "operator", "using", "cout", "endl", "cerr", "cin",
1793 "printf", "scanf", "malloc", "free", "NULL", "include",
1794 "ifdef", "ifndef", "endif", "define", "pragma",
1795 "end", "then", "elsif", "unless", "until",
1797 "begin", "rescue", "ensure", "when", "require",
1798 "attr_accessor", "attr_reader", "attr_writer",
1799 "puts", "nil", "module", "defined",
1800 "internal", "sealed", "readonly",
1802 "partial", "delegate", "event", "params", "out",
1803 "object", "decimal", "sbyte", "ushort", "uint",
1804 "ulong", "nint", "nuint", "dynamic",
1805 "get", "set", "value", "init", "record",
1806 "string", "number", "boolean", "int", "float", "double",
1808 "bool", "char", "byte", "i8", "i16", "i32", "i64",
1809 "u8", "u16", "u32", "u64", "f32", "f64", "usize",
1810 "isize", "str", "String", "Vec", "Option", "Result",
1811 "Box", "Arc", "Rc", "HashMap", "HashSet", "Some",
1812 "Ok", "Err",
1813 ].into_iter().collect()
1814});
1815
1816fn is_keyword(word: &str) -> bool {
1817 KEYWORDS.contains(word)
1818}
1819
1820#[cfg(test)]
1821mod tests {
1822 use super::*;
1823 use crate::git::types::{FileChange, FileStatus};
1824 use std::io::Write;
1825 use tempfile::TempDir;
1826
1827 fn create_test_repo() -> (TempDir, ParserRegistry) {
1828 let dir = TempDir::new().unwrap();
1829 let registry = crate::parser::plugins::create_default_registry();
1830 (dir, registry)
1831 }
1832
1833 fn write_file(dir: &Path, name: &str, content: &str) {
1834 let path = dir.join(name);
1835 if let Some(parent) = path.parent() {
1836 std::fs::create_dir_all(parent).unwrap();
1837 }
1838 let mut f = std::fs::File::create(path).unwrap();
1839 f.write_all(content.as_bytes()).unwrap();
1840 }
1841
1842 #[test]
1843 fn test_incremental_add_file() {
1844 let (dir, registry) = create_test_repo();
1845 let root = dir.path();
1846
1847 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1849 write_file(root, "b.ts", "export function bar() { return 1; }\n");
1850
1851 let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
1852 assert_eq!(graph.entities.len(), 2);
1853
1854 write_file(root, "c.ts", "export function baz() { return foo(); }\n");
1856 graph.update_from_changes(
1857 &[FileChange {
1858 file_path: "c.ts".into(),
1859 status: FileStatus::Added,
1860 old_file_path: None,
1861 before_content: None,
1862 after_content: None, }],
1864 root,
1865 ®istry,
1866 );
1867
1868 assert_eq!(graph.entities.len(), 3);
1869 assert!(graph.entities.contains_key("c.ts::function::baz"));
1870 let baz_deps = graph.get_dependencies("c.ts::function::baz");
1872 assert!(
1873 baz_deps.iter().any(|d| d.name == "foo"),
1874 "baz should depend on foo. Deps: {:?}",
1875 baz_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1876 );
1877 }
1878
1879 #[test]
1880 fn test_incremental_delete_file() {
1881 let (dir, registry) = create_test_repo();
1882 let root = dir.path();
1883
1884 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1885 write_file(root, "b.ts", "export function bar() { return 1; }\n");
1886
1887 let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
1888 assert_eq!(graph.entities.len(), 2);
1889
1890 graph.update_from_changes(
1892 &[FileChange {
1893 file_path: "b.ts".into(),
1894 status: FileStatus::Deleted,
1895 old_file_path: None,
1896 before_content: None,
1897 after_content: None,
1898 }],
1899 root,
1900 ®istry,
1901 );
1902
1903 assert_eq!(graph.entities.len(), 1);
1904 assert!(!graph.entities.contains_key("b.ts::function::bar"));
1905 let foo_deps = graph.get_dependencies("a.ts::function::foo");
1907 assert!(
1908 foo_deps.is_empty(),
1909 "foo's deps should be empty after bar deleted. Deps: {:?}",
1910 foo_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
1911 );
1912 }
1913
1914 #[test]
1915 fn test_incremental_modify_file() {
1916 let (dir, registry) = create_test_repo();
1917 let root = dir.path();
1918
1919 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
1920 write_file(root, "b.ts", "export function bar() { return 1; }\nexport function baz() { return 2; }\n");
1921
1922 let (mut graph, _) = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
1923 assert_eq!(graph.entities.len(), 3);
1924
1925 write_file(root, "a.ts", "export function foo() { return baz(); }\n");
1927 graph.update_from_changes(
1928 &[FileChange {
1929 file_path: "a.ts".into(),
1930 status: FileStatus::Modified,
1931 old_file_path: None,
1932 before_content: None,
1933 after_content: None,
1934 }],
1935 root,
1936 ®istry,
1937 );
1938
1939 assert_eq!(graph.entities.len(), 3);
1940 let foo_deps = graph.get_dependencies("a.ts::function::foo");
1942 let dep_names: Vec<&str> = foo_deps.iter().map(|d| d.name.as_str()).collect();
1943 assert!(dep_names.contains(&"baz"), "foo should depend on baz after modification. Deps: {:?}", dep_names);
1944 assert!(!dep_names.contains(&"bar"), "foo should no longer depend on bar. Deps: {:?}", dep_names);
1945 }
1946
1947 #[test]
1948 fn test_incremental_with_content() {
1949 let (dir, registry) = create_test_repo();
1950 let root = dir.path();
1951
1952 write_file(root, "a.ts", "export function foo() { return 1; }\n");
1953 let (mut graph, _) = EntityGraph::build(root, &["a.ts".into()], ®istry);
1954 assert_eq!(graph.entities.len(), 1);
1955
1956 graph.update_from_changes(
1958 &[FileChange {
1959 file_path: "b.ts".into(),
1960 status: FileStatus::Added,
1961 old_file_path: None,
1962 before_content: None,
1963 after_content: Some("export function bar() { return foo(); }\n".into()),
1964 }],
1965 root,
1966 ®istry,
1967 );
1968
1969 assert_eq!(graph.entities.len(), 2);
1970 let bar_deps = graph.get_dependencies("b.ts::function::bar");
1971 assert!(bar_deps.iter().any(|d| d.name == "foo"));
1972 }
1973
1974 #[test]
1975 fn test_extract_references() {
1976 let content = "function processData(input) {\n const result = validateInput(input);\n return transform(result);\n}";
1977 let refs = extract_references_from_content(content, "processData");
1978 assert!(refs.contains(&"validateInput"));
1979 assert!(refs.contains(&"transform"));
1980 assert!(!refs.contains(&"processData")); }
1982
1983 #[test]
1984 fn test_extract_references_skips_keywords() {
1985 let content = "function foo() { if (true) { return false; } }";
1986 let refs = extract_references_from_content(content, "foo");
1987 assert!(!refs.contains(&"if"));
1988 assert!(!refs.contains(&"true"));
1989 assert!(!refs.contains(&"return"));
1990 assert!(!refs.contains(&"false"));
1991 }
1992
1993 #[test]
1994 fn test_infer_ref_type_call() {
1995 assert_eq!(
1996 infer_ref_type("validateInput(data)", "validateInput"),
1997 RefType::Calls,
1998 );
1999 }
2000
2001 #[test]
2002 fn test_infer_ref_type_type() {
2003 assert_eq!(
2004 infer_ref_type("let x: MyType = something", "MyType"),
2005 RefType::TypeRef,
2006 );
2007 }
2008
2009 #[test]
2010 fn test_infer_ref_type_multibyte_utf8() {
2011 assert_eq!(
2013 infer_ref_type("let café = foo(x)", "foo"),
2014 RefType::Calls,
2015 );
2016 assert_eq!(
2017 infer_ref_type("class HandicapfrPublicationFieldsEnum:\n É = 1\n bar()", "bar"),
2018 RefType::Calls,
2019 );
2020 assert_eq!(
2022 infer_ref_type("// 日本語コメント\nlet x = 1", "missing"),
2023 RefType::TypeRef,
2024 );
2025 }
2026
2027 #[test]
2028 fn test_dot_chain_self_resolution() {
2029 let (dir, registry) = create_test_repo();
2030 let root = dir.path();
2031
2032 write_file(root, "service.py", "\
2033class MyService:
2034 def process(self):
2035 return self.validate()
2036
2037 def validate(self):
2038 return True
2039");
2040
2041 let (graph, _) = EntityGraph::build(root, &["service.py".into()], ®istry);
2042
2043 let process_id = graph.entities.keys()
2045 .find(|id| id.contains("process"))
2046 .expect("process entity should exist");
2047 let deps = graph.get_dependencies(process_id);
2048 assert!(
2049 deps.iter().any(|d| d.name == "validate"),
2050 "process should depend on validate via self.validate(). Deps: {:?}",
2051 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2052 );
2053 }
2054
2055 #[test]
2056 fn test_dot_chain_this_resolution() {
2057 let (dir, registry) = create_test_repo();
2058 let root = dir.path();
2059
2060 write_file(root, "service.ts", "\
2061class UserService {
2062 process() {
2063 return this.validate();
2064 }
2065 validate() {
2066 return true;
2067 }
2068}
2069");
2070
2071 let (graph, _) = EntityGraph::build(root, &["service.ts".into()], ®istry);
2072
2073 let process_id = graph.entities.keys()
2074 .find(|id| id.contains("process"))
2075 .expect("process entity should exist");
2076 let deps = graph.get_dependencies(process_id);
2077 assert!(
2078 deps.iter().any(|d| d.name == "validate"),
2079 "process should depend on validate via this.validate(). Deps: {:?}",
2080 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2081 );
2082 }
2083
2084 #[test]
2085 fn test_dot_chain_class_static() {
2086 let (dir, registry) = create_test_repo();
2087 let root = dir.path();
2088
2089 write_file(root, "utils.ts", "\
2090class MathUtils {
2091 static compute() { return 1; }
2092}
2093function caller() { return MathUtils.compute(); }
2094");
2095
2096 let (graph, _) = EntityGraph::build(root, &["utils.ts".into()], ®istry);
2097
2098 let caller_id = graph.entities.keys()
2099 .find(|id| id.contains("caller"))
2100 .expect("caller entity should exist");
2101 let deps = graph.get_dependencies(caller_id);
2102 assert!(
2103 deps.iter().any(|d| d.name == "compute"),
2104 "caller should depend on compute via MathUtils.compute(). Deps: {:?}",
2105 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2106 );
2107 }
2108
2109 #[test]
2110 fn test_js_ts_import_resolution() {
2111 let (dir, registry) = create_test_repo();
2112 let root = dir.path();
2113
2114 write_file(root, "helper.ts", "\
2115export function helper() { return 1; }
2116");
2117 write_file(root, "main.ts", "\
2118import { helper } from './helper';
2119export function main() { return helper(); }
2120");
2121
2122 let (graph, _) = EntityGraph::build(
2123 root,
2124 &["helper.ts".into(), "main.ts".into()],
2125 ®istry,
2126 );
2127
2128 let main_id = graph.entities.keys()
2129 .find(|id| id.contains("main"))
2130 .expect("main entity should exist");
2131 let deps = graph.get_dependencies(main_id);
2132 assert!(
2133 deps.iter().any(|d| d.name == "helper"),
2134 "main should depend on helper via JS import. Deps: {:?}",
2135 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2136 );
2137 }
2138
2139 #[test]
2140 fn test_dot_chain_no_false_edges() {
2141 let (dir, registry) = create_test_repo();
2142 let root = dir.path();
2143
2144 write_file(root, "a.py", "\
2147class ClassA:
2148 def run(self):
2149 return self.process()
2150
2151 def process(self):
2152 return 1
2153");
2154 write_file(root, "b.py", "\
2155class ClassB:
2156 def process(self):
2157 return 2
2158");
2159
2160 let (graph, _) = EntityGraph::build(
2161 root,
2162 &["a.py".into(), "b.py".into()],
2163 ®istry,
2164 );
2165
2166 let run_id = graph.entities.keys()
2167 .find(|id| id.contains("run"))
2168 .expect("run entity should exist");
2169 let deps = graph.get_dependencies(run_id);
2170 for dep in &deps {
2172 if dep.name == "process" {
2173 assert!(
2174 dep.file_path == "a.py",
2175 "run's process dep should be in a.py, not {}",
2176 dep.file_path
2177 );
2178 }
2179 }
2180 }
2181
2182 #[test]
2183 fn test_dot_chain_fallback() {
2184 let (dir, registry) = create_test_repo();
2185 let root = dir.path();
2186
2187 write_file(root, "app.ts", "\
2191export function helper() { return 1; }
2192export function caller() {
2193 const val = helper();
2194 return val;
2195}
2196");
2197
2198 let (graph, _) = EntityGraph::build(root, &["app.ts".into()], ®istry);
2199
2200 let caller_id = graph.entities.keys()
2201 .find(|id| id.contains("caller"))
2202 .expect("caller entity should exist");
2203 let deps = graph.get_dependencies(caller_id);
2204 assert!(
2205 deps.iter().any(|d| d.name == "helper"),
2206 "caller should still resolve helper via bag-of-words. Deps: {:?}",
2207 deps.iter().map(|d| &d.name).collect::<Vec<_>>()
2208 );
2209 }
2210
2211}