1use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::LazyLock;
13
14use rayon::prelude::*;
15use serde::Serialize;
16
17use crate::git::types::{FileChange, FileStatus};
18use crate::model::entity::SemanticEntity;
19use crate::parser::registry::ParserRegistry;
20
21#[derive(Debug, Clone, Serialize)]
23#[serde(rename_all = "camelCase")]
24pub struct EntityRef {
25 pub from_entity: String,
26 pub to_entity: String,
27 pub ref_type: RefType,
28}
29
30#[derive(Debug, Clone, PartialEq, Serialize)]
32#[serde(rename_all = "lowercase")]
33pub enum RefType {
34 Calls,
36 TypeRef,
38 Imports,
40}
41
42#[derive(Debug)]
44pub struct EntityGraph {
45 pub entities: HashMap<String, EntityInfo>,
47 pub edges: Vec<EntityRef>,
49 pub dependents: HashMap<String, Vec<String>>,
51 pub dependencies: HashMap<String, Vec<String>>,
53}
54
55#[derive(Debug, Clone, Serialize)]
57#[serde(rename_all = "camelCase")]
58pub struct EntityInfo {
59 pub id: String,
60 pub name: String,
61 pub entity_type: String,
62 pub file_path: String,
63 pub start_line: usize,
64 pub end_line: usize,
65}
66
67impl EntityGraph {
68 pub fn build(
74 root: &Path,
75 file_paths: &[String],
76 registry: &ParserRegistry,
77 ) -> Self {
78 let all_entities: Vec<SemanticEntity> = file_paths
80 .par_iter()
81 .filter_map(|file_path| {
82 let full_path = root.join(file_path);
83 let content = std::fs::read_to_string(&full_path).ok()?;
84 let plugin = registry.get_plugin(file_path)?;
85 Some(plugin.extract_entities(&content, file_path))
86 })
87 .flatten()
88 .collect();
89
90 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::with_capacity(all_entities.len());
92 let mut entity_map: HashMap<String, EntityInfo> = HashMap::with_capacity(all_entities.len());
93
94 for entity in &all_entities {
95 symbol_table
96 .entry(entity.name.clone())
97 .or_default()
98 .push(entity.id.clone());
99
100 entity_map.insert(
101 entity.id.clone(),
102 EntityInfo {
103 id: entity.id.clone(),
104 name: entity.name.clone(),
105 entity_type: entity.entity_type.clone(),
106 file_path: entity.file_path.clone(),
107 start_line: entity.start_line,
108 end_line: entity.end_line,
109 },
110 );
111 }
112
113 let resolved_refs: Vec<(String, String, RefType)> = all_entities
116 .par_iter()
117 .flat_map(|entity| {
118 let refs = extract_references_from_content(&entity.content, &entity.name);
119 let mut entity_edges = Vec::new();
120 for ref_name in refs {
121 if let Some(target_ids) = symbol_table.get(ref_name) {
122 let target = target_ids
123 .iter()
124 .find(|id| {
125 *id != &entity.id
126 && entity_map
127 .get(*id)
128 .map_or(false, |e| e.file_path == entity.file_path)
129 })
130 .or_else(|| target_ids.iter().find(|id| *id != &entity.id));
131
132 if let Some(target_id) = target {
133 let ref_type = infer_ref_type(&entity.content, &ref_name);
134 entity_edges.push((
135 entity.id.clone(),
136 target_id.clone(),
137 ref_type,
138 ));
139 }
140 }
141 }
142 entity_edges
143 })
144 .collect();
145
146 let mut edges: Vec<EntityRef> = Vec::with_capacity(resolved_refs.len());
148 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
149 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
150
151 for (from_entity, to_entity, ref_type) in resolved_refs {
152 dependents
153 .entry(to_entity.clone())
154 .or_default()
155 .push(from_entity.clone());
156 dependencies
157 .entry(from_entity.clone())
158 .or_default()
159 .push(to_entity.clone());
160 edges.push(EntityRef {
161 from_entity,
162 to_entity,
163 ref_type,
164 });
165 }
166
167 EntityGraph {
168 entities: entity_map,
169 edges,
170 dependents,
171 dependencies,
172 }
173 }
174
175 pub fn get_dependents(&self, entity_id: &str) -> Vec<&EntityInfo> {
177 self.dependents
178 .get(entity_id)
179 .map(|ids| {
180 ids.iter()
181 .filter_map(|id| self.entities.get(id))
182 .collect()
183 })
184 .unwrap_or_default()
185 }
186
187 pub fn get_dependencies(&self, entity_id: &str) -> Vec<&EntityInfo> {
189 self.dependencies
190 .get(entity_id)
191 .map(|ids| {
192 ids.iter()
193 .filter_map(|id| self.entities.get(id))
194 .collect()
195 })
196 .unwrap_or_default()
197 }
198
199 pub fn impact_analysis(&self, entity_id: &str) -> Vec<&EntityInfo> {
202 self.impact_analysis_capped(entity_id, 10_000)
203 }
204
205 pub fn impact_analysis_capped(&self, entity_id: &str, max_visited: usize) -> Vec<&EntityInfo> {
208 let mut visited: HashSet<&str> = HashSet::new();
209 let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
210 let mut result = Vec::new();
211
212 let start_key = match self.entities.get_key_value(entity_id) {
213 Some((k, _)) => k.as_str(),
214 None => return result,
215 };
216
217 queue.push_back(start_key);
218 visited.insert(start_key);
219
220 while let Some(current) = queue.pop_front() {
221 if result.len() >= max_visited {
222 break;
223 }
224 if let Some(deps) = self.dependents.get(current) {
225 for dep in deps {
226 if visited.insert(dep.as_str()) {
227 if let Some(info) = self.entities.get(dep.as_str()) {
228 result.push(info);
229 }
230 queue.push_back(dep.as_str());
231 if result.len() >= max_visited {
232 break;
233 }
234 }
235 }
236 }
237 }
238
239 result
240 }
241
242 pub fn impact_count(&self, entity_id: &str, max_count: usize) -> usize {
245 let mut visited: HashSet<&str> = HashSet::new();
246 let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new();
247 let mut count = 0;
248
249 let start_key = match self.entities.get_key_value(entity_id) {
251 Some((k, _)) => k.as_str(),
252 None => return 0,
253 };
254
255 queue.push_back(start_key);
256 visited.insert(start_key);
257
258 while let Some(current) = queue.pop_front() {
259 if count >= max_count {
260 break;
261 }
262 if let Some(deps) = self.dependents.get(current) {
263 for dep in deps {
264 if visited.insert(dep.as_str()) {
265 count += 1;
266 queue.push_back(dep.as_str());
267 if count >= max_count {
268 break;
269 }
270 }
271 }
272 }
273 }
274
275 count
276 }
277
278 pub fn update_from_changes(
289 &mut self,
290 changed_files: &[FileChange],
291 root: &Path,
292 registry: &ParserRegistry,
293 ) {
294 let mut affected_files: HashSet<String> = HashSet::new();
295 let mut new_entities: Vec<SemanticEntity> = Vec::new();
296
297 for change in changed_files {
298 affected_files.insert(change.file_path.clone());
299 if let Some(ref old_path) = change.old_file_path {
300 affected_files.insert(old_path.clone());
301 }
302
303 match change.status {
304 FileStatus::Deleted => {
305 self.remove_entities_for_file(&change.file_path);
306 }
307 FileStatus::Renamed => {
308 if let Some(ref old_path) = change.old_file_path {
310 self.remove_entities_for_file(old_path);
311 }
312 if let Some(entities) = self.extract_file_entities(
314 &change.file_path,
315 change.after_content.as_deref(),
316 root,
317 registry,
318 ) {
319 new_entities.extend(entities);
320 }
321 }
322 FileStatus::Added | FileStatus::Modified => {
323 self.remove_entities_for_file(&change.file_path);
325 if let Some(entities) = self.extract_file_entities(
327 &change.file_path,
328 change.after_content.as_deref(),
329 root,
330 registry,
331 ) {
332 new_entities.extend(entities);
333 }
334 }
335 }
336 }
337
338 for entity in &new_entities {
340 self.entities.insert(
341 entity.id.clone(),
342 EntityInfo {
343 id: entity.id.clone(),
344 name: entity.name.clone(),
345 entity_type: entity.entity_type.clone(),
346 file_path: entity.file_path.clone(),
347 start_line: entity.start_line,
348 end_line: entity.end_line,
349 },
350 );
351 }
352
353 let symbol_table = self.build_symbol_table();
355
356 for entity in &new_entities {
358 self.resolve_entity_references(entity, &symbol_table);
359 }
360
361 let changed_entity_names: HashSet<String> = new_entities
364 .iter()
365 .map(|e| e.name.clone())
366 .collect();
367
368 let entities_to_recheck: Vec<String> = self
370 .entities
371 .values()
372 .filter(|e| !affected_files.contains(&e.file_path))
373 .filter(|e| {
374 self.dependencies
375 .get(&e.id)
376 .map_or(false, |deps| {
377 deps.iter().any(|dep_id| {
378 self.entities
379 .get(dep_id)
380 .map_or(false, |dep| changed_entity_names.contains(&dep.name))
381 })
382 })
383 })
384 .map(|e| e.id.clone())
385 .collect();
386
387 let _ = entities_to_recheck; }
394
395 fn extract_file_entities(
397 &self,
398 file_path: &str,
399 content: Option<&str>,
400 root: &Path,
401 registry: &ParserRegistry,
402 ) -> Option<Vec<SemanticEntity>> {
403 let plugin = registry.get_plugin(file_path)?;
404
405 let content = if let Some(c) = content {
406 c.to_string()
407 } else {
408 let full_path = root.join(file_path);
409 std::fs::read_to_string(&full_path).ok()?
410 };
411
412 Some(plugin.extract_entities(&content, file_path))
413 }
414
415 fn remove_entities_for_file(&mut self, file_path: &str) {
417 let ids_to_remove: Vec<String> = self
419 .entities
420 .values()
421 .filter(|e| e.file_path == file_path)
422 .map(|e| e.id.clone())
423 .collect();
424
425 let id_set: HashSet<&str> = ids_to_remove.iter().map(|s| s.as_str()).collect();
426
427 for id in &ids_to_remove {
429 self.entities.remove(id);
430 }
431
432 self.edges
434 .retain(|e| !id_set.contains(e.from_entity.as_str()) && !id_set.contains(e.to_entity.as_str()));
435
436 for id in &ids_to_remove {
438 if let Some(deps) = self.dependencies.remove(id) {
440 for dep in &deps {
442 if let Some(dependents) = self.dependents.get_mut(dep) {
443 dependents.retain(|d| d != id);
444 }
445 }
446 }
447 if let Some(deps) = self.dependents.remove(id) {
449 for dep in &deps {
451 if let Some(dependencies) = self.dependencies.get_mut(dep) {
452 dependencies.retain(|d| d != id);
453 }
454 }
455 }
456 }
457 }
458
459 fn build_symbol_table(&self) -> HashMap<String, Vec<String>> {
461 let mut symbol_table: HashMap<String, Vec<String>> = HashMap::new();
462 for entity in self.entities.values() {
463 symbol_table
464 .entry(entity.name.clone())
465 .or_default()
466 .push(entity.id.clone());
467 }
468 symbol_table
469 }
470
471 fn resolve_entity_references(
473 &mut self,
474 entity: &SemanticEntity,
475 symbol_table: &HashMap<String, Vec<String>>,
476 ) {
477 let refs = extract_references_from_content(&entity.content, &entity.name);
478
479 for ref_name in refs {
480 if let Some(target_ids) = symbol_table.get(ref_name) {
481 let target = target_ids
482 .iter()
483 .find(|id| {
484 *id != &entity.id
485 && self
486 .entities
487 .get(*id)
488 .map_or(false, |e| e.file_path == entity.file_path)
489 })
490 .or_else(|| target_ids.iter().find(|id| *id != &entity.id));
491
492 if let Some(target_id) = target {
493 let ref_type = infer_ref_type(&entity.content, &ref_name);
494 self.edges.push(EntityRef {
495 from_entity: entity.id.clone(),
496 to_entity: target_id.clone(),
497 ref_type,
498 });
499 self.dependents
500 .entry(target_id.clone())
501 .or_default()
502 .push(entity.id.clone());
503 self.dependencies
504 .entry(entity.id.clone())
505 .or_default()
506 .push(target_id.clone());
507 }
508 }
509 }
510 }
511}
512
513fn extract_references_from_content<'a>(content: &'a str, own_name: &str) -> Vec<&'a str> {
516 let mut refs = Vec::new();
517 let mut seen: HashSet<&str> = HashSet::new();
518
519 for word in content.split(|c: char| !c.is_alphanumeric() && c != '_') {
520 if word.is_empty() || word == own_name {
521 continue;
522 }
523 if is_keyword(word) || word.len() < 2 {
524 continue;
525 }
526 if word.starts_with(|c: char| c.is_lowercase()) && word.len() < 3 {
528 continue;
529 }
530 if !word.starts_with(|c: char| c.is_alphabetic() || c == '_') {
531 continue;
532 }
533 if is_common_local_name(word) {
535 continue;
536 }
537 if seen.insert(word) {
538 refs.push(word);
539 }
540 }
541
542 refs
543}
544
545static COMMON_LOCAL_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
546 [
547 "result", "results", "data", "config", "value", "values",
548 "item", "items", "input", "output", "args", "opts",
549 "name", "path", "file", "line", "count", "index",
550 "temp", "prev", "next", "curr", "current", "node",
551 "left", "right", "root", "head", "tail", "body",
552 "text", "content", "source", "target", "entry",
553 "error", "errors", "message", "response", "request",
554 "context", "state", "props", "event", "handler",
555 "callback", "options", "params", "query", "list",
556 "base", "info", "meta", "kind", "mode", "flag",
557 "size", "length", "width", "height", "start", "stop",
558 "begin", "done", "found", "status", "code", "test",
559 ].into_iter().collect()
560});
561
562fn is_common_local_name(word: &str) -> bool {
565 COMMON_LOCAL_NAMES.contains(word)
566}
567
568fn infer_ref_type(content: &str, ref_name: &str) -> RefType {
570 let bytes = content.as_bytes();
573 let name_bytes = ref_name.as_bytes();
574 let mut search_start = 0;
575 while let Some(rel_pos) = content[search_start..].find(ref_name) {
576 let pos = search_start + rel_pos;
577 let after = pos + name_bytes.len();
578 if after < bytes.len() && bytes[after] == b'(' {
580 let is_boundary = pos == 0 || {
582 let prev = bytes[pos - 1];
583 !prev.is_ascii_alphanumeric() && prev != b'_'
584 };
585 if is_boundary {
586 return RefType::Calls;
587 }
588 }
589 search_start = pos + 1;
590 }
591
592 for line in content.lines() {
594 let trimmed = line.trim();
595 if (trimmed.starts_with("import ") || trimmed.starts_with("use ")
596 || trimmed.starts_with("from ") || trimmed.starts_with("require("))
597 && trimmed.contains(ref_name)
598 {
599 return RefType::Imports;
600 }
601 }
602
603 RefType::TypeRef
605}
606
607static KEYWORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
608 [
609 "if", "else", "for", "while", "do", "switch", "case", "break",
611 "continue", "return", "try", "catch", "finally", "throw",
612 "new", "delete", "typeof", "instanceof", "in", "of",
613 "true", "false", "null", "undefined", "void", "this",
614 "super", "class", "extends", "implements", "interface",
615 "enum", "const", "let", "var", "function", "async",
616 "await", "yield", "import", "export", "default", "from",
617 "as", "static", "public", "private", "protected",
618 "abstract", "final", "override",
619 "fn", "pub", "mod", "use", "struct", "impl", "trait",
621 "where", "type", "self", "Self", "mut", "ref", "match",
622 "loop", "move", "unsafe", "extern", "crate", "dyn",
623 "def", "elif", "except", "raise", "with",
625 "pass", "lambda", "nonlocal", "global", "assert",
626 "True", "False", "and", "or", "not", "is",
627 "func", "package", "range", "select", "chan", "go",
629 "defer", "map", "make", "append", "len", "cap",
630 "auto", "register", "volatile", "sizeof", "typedef",
632 "template", "typename", "namespace", "virtual", "inline",
633 "constexpr", "nullptr", "noexcept", "explicit", "friend",
634 "operator", "using", "cout", "endl", "cerr", "cin",
635 "printf", "scanf", "malloc", "free", "NULL", "include",
636 "ifdef", "ifndef", "endif", "define", "pragma",
637 "end", "then", "elsif", "unless", "until",
639 "begin", "rescue", "ensure", "when", "require",
640 "attr_accessor", "attr_reader", "attr_writer",
641 "puts", "nil", "module", "defined",
642 "internal", "sealed", "readonly",
644 "partial", "delegate", "event", "params", "out",
645 "object", "decimal", "sbyte", "ushort", "uint",
646 "ulong", "nint", "nuint", "dynamic",
647 "get", "set", "value", "init", "record",
648 "string", "number", "boolean", "int", "float", "double",
650 "bool", "char", "byte", "i8", "i16", "i32", "i64",
651 "u8", "u16", "u32", "u64", "f32", "f64", "usize",
652 "isize", "str", "String", "Vec", "Option", "Result",
653 "Box", "Arc", "Rc", "HashMap", "HashSet", "Some",
654 "Ok", "Err",
655 ].into_iter().collect()
656});
657
658fn is_keyword(word: &str) -> bool {
659 KEYWORDS.contains(word)
660}
661
662#[cfg(test)]
663mod tests {
664 use super::*;
665 use crate::git::types::{FileChange, FileStatus};
666 use std::io::Write;
667 use tempfile::TempDir;
668
669 fn create_test_repo() -> (TempDir, ParserRegistry) {
670 let dir = TempDir::new().unwrap();
671 let registry = crate::parser::plugins::create_default_registry();
672 (dir, registry)
673 }
674
675 fn write_file(dir: &Path, name: &str, content: &str) {
676 let path = dir.join(name);
677 if let Some(parent) = path.parent() {
678 std::fs::create_dir_all(parent).unwrap();
679 }
680 let mut f = std::fs::File::create(path).unwrap();
681 f.write_all(content.as_bytes()).unwrap();
682 }
683
684 #[test]
685 fn test_incremental_add_file() {
686 let (dir, registry) = create_test_repo();
687 let root = dir.path();
688
689 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
691 write_file(root, "b.ts", "export function bar() { return 1; }\n");
692
693 let mut graph = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
694 assert_eq!(graph.entities.len(), 2);
695
696 write_file(root, "c.ts", "export function baz() { return foo(); }\n");
698 graph.update_from_changes(
699 &[FileChange {
700 file_path: "c.ts".into(),
701 status: FileStatus::Added,
702 old_file_path: None,
703 before_content: None,
704 after_content: None, }],
706 root,
707 ®istry,
708 );
709
710 assert_eq!(graph.entities.len(), 3);
711 assert!(graph.entities.contains_key("c.ts::function::baz"));
712 let baz_deps = graph.get_dependencies("c.ts::function::baz");
714 assert!(
715 baz_deps.iter().any(|d| d.name == "foo"),
716 "baz should depend on foo. Deps: {:?}",
717 baz_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
718 );
719 }
720
721 #[test]
722 fn test_incremental_delete_file() {
723 let (dir, registry) = create_test_repo();
724 let root = dir.path();
725
726 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
727 write_file(root, "b.ts", "export function bar() { return 1; }\n");
728
729 let mut graph = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
730 assert_eq!(graph.entities.len(), 2);
731
732 graph.update_from_changes(
734 &[FileChange {
735 file_path: "b.ts".into(),
736 status: FileStatus::Deleted,
737 old_file_path: None,
738 before_content: None,
739 after_content: None,
740 }],
741 root,
742 ®istry,
743 );
744
745 assert_eq!(graph.entities.len(), 1);
746 assert!(!graph.entities.contains_key("b.ts::function::bar"));
747 let foo_deps = graph.get_dependencies("a.ts::function::foo");
749 assert!(
750 foo_deps.is_empty(),
751 "foo's deps should be empty after bar deleted. Deps: {:?}",
752 foo_deps.iter().map(|d| &d.name).collect::<Vec<_>>()
753 );
754 }
755
756 #[test]
757 fn test_incremental_modify_file() {
758 let (dir, registry) = create_test_repo();
759 let root = dir.path();
760
761 write_file(root, "a.ts", "export function foo() { return bar(); }\n");
762 write_file(root, "b.ts", "export function bar() { return 1; }\nexport function baz() { return 2; }\n");
763
764 let mut graph = EntityGraph::build(root, &["a.ts".into(), "b.ts".into()], ®istry);
765 assert_eq!(graph.entities.len(), 3);
766
767 write_file(root, "a.ts", "export function foo() { return baz(); }\n");
769 graph.update_from_changes(
770 &[FileChange {
771 file_path: "a.ts".into(),
772 status: FileStatus::Modified,
773 old_file_path: None,
774 before_content: None,
775 after_content: None,
776 }],
777 root,
778 ®istry,
779 );
780
781 assert_eq!(graph.entities.len(), 3);
782 let foo_deps = graph.get_dependencies("a.ts::function::foo");
784 let dep_names: Vec<&str> = foo_deps.iter().map(|d| d.name.as_str()).collect();
785 assert!(dep_names.contains(&"baz"), "foo should depend on baz after modification. Deps: {:?}", dep_names);
786 assert!(!dep_names.contains(&"bar"), "foo should no longer depend on bar. Deps: {:?}", dep_names);
787 }
788
789 #[test]
790 fn test_incremental_with_content() {
791 let (dir, registry) = create_test_repo();
792 let root = dir.path();
793
794 write_file(root, "a.ts", "export function foo() { return 1; }\n");
795 let mut graph = EntityGraph::build(root, &["a.ts".into()], ®istry);
796 assert_eq!(graph.entities.len(), 1);
797
798 graph.update_from_changes(
800 &[FileChange {
801 file_path: "b.ts".into(),
802 status: FileStatus::Added,
803 old_file_path: None,
804 before_content: None,
805 after_content: Some("export function bar() { return foo(); }\n".into()),
806 }],
807 root,
808 ®istry,
809 );
810
811 assert_eq!(graph.entities.len(), 2);
812 let bar_deps = graph.get_dependencies("b.ts::function::bar");
813 assert!(bar_deps.iter().any(|d| d.name == "foo"));
814 }
815
816 #[test]
817 fn test_extract_references() {
818 let content = "function processData(input) {\n const result = validateInput(input);\n return transform(result);\n}";
819 let refs = extract_references_from_content(content, "processData");
820 assert!(refs.contains(&"validateInput"));
821 assert!(refs.contains(&"transform"));
822 assert!(!refs.contains(&"processData")); }
824
825 #[test]
826 fn test_extract_references_skips_keywords() {
827 let content = "function foo() { if (true) { return false; } }";
828 let refs = extract_references_from_content(content, "foo");
829 assert!(!refs.contains(&"if"));
830 assert!(!refs.contains(&"true"));
831 assert!(!refs.contains(&"return"));
832 assert!(!refs.contains(&"false"));
833 }
834
835 #[test]
836 fn test_infer_ref_type_call() {
837 assert_eq!(
838 infer_ref_type("validateInput(data)", "validateInput"),
839 RefType::Calls,
840 );
841 }
842
843 #[test]
844 fn test_infer_ref_type_type() {
845 assert_eq!(
846 infer_ref_type("let x: MyType = something", "MyType"),
847 RefType::TypeRef,
848 );
849 }
850}