1use std::collections::{BinaryHeap, HashMap, HashSet, VecDeque};
15use std::path::Path;
16use serde::{Deserialize, Serialize};
17use regex::Regex;
18use walkdir::WalkDir;
19use tree_sitter::Parser;
20
21#[derive(Debug, Clone, Default, Serialize, Deserialize)]
25pub struct CodeGraph {
26 pub nodes: Vec<CodeNode>,
27 pub edges: Vec<CodeEdge>,
28 #[serde(skip)]
30 pub outgoing: HashMap<String, Vec<usize>>,
31 #[serde(skip)]
33 pub incoming: HashMap<String, Vec<usize>>,
34 #[serde(skip)]
36 pub node_index: HashMap<String, usize>,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct CodeNode {
42 pub id: String,
43 pub kind: NodeKind,
44 pub name: String,
45 pub file_path: String,
46 pub line: Option<usize>,
47 #[serde(default, skip_serializing_if = "Vec::is_empty")]
48 pub decorators: Vec<String>,
49 #[serde(default, skip_serializing_if = "Option::is_none")]
50 pub signature: Option<String>,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub docstring: Option<String>,
53 #[serde(default)]
54 pub line_count: usize,
55 #[serde(default)]
56 pub is_test: bool,
57}
58
59impl CodeNode {
60 pub fn new_file(path: &str) -> Self {
61 Self {
62 id: format!("file:{}", path),
63 kind: NodeKind::File,
64 name: path.rsplit('/').next().unwrap_or(path).to_string(),
65 file_path: path.to_string(),
66 line: None,
67 decorators: Vec::new(),
68 signature: None,
69 docstring: None,
70 line_count: 0,
71 is_test: path.contains("/test") || path.contains("_test."),
72 }
73 }
74
75 pub fn new_class(path: &str, name: &str, line: usize) -> Self {
76 Self {
77 id: format!("class:{}:{}", path, name),
78 kind: NodeKind::Class,
79 name: name.to_string(),
80 file_path: path.to_string(),
81 line: Some(line),
82 decorators: Vec::new(),
83 signature: None,
84 docstring: None,
85 line_count: 0,
86 is_test: name.starts_with("Test") || path.contains("/test"),
87 }
88 }
89
90 pub fn new_function(path: &str, name: &str, line: usize, is_method: bool) -> Self {
91 let prefix = if is_method { "method" } else { "func" };
92 Self {
93 id: format!("{}:{}:{}", prefix, path, name),
94 kind: NodeKind::Function,
95 name: name.to_string(),
96 file_path: path.to_string(),
97 line: Some(line),
98 decorators: Vec::new(),
99 signature: None,
100 docstring: None,
101 line_count: 0,
102 is_test: name.starts_with("test_") || name.starts_with("Test") || path.contains("/test"),
103 }
104 }
105}
106
107#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
109#[serde(rename_all = "lowercase")]
110pub enum NodeKind {
111 File,
112 Class,
113 Function,
114 Module,
115}
116
117#[derive(Debug, Clone, Serialize, Deserialize)]
119pub struct CodeEdge {
120 pub from: String,
121 pub to: String,
122 pub relation: EdgeRelation,
123 #[serde(default)]
124 pub weight: f32,
125 #[serde(default)]
126 pub call_count: u32,
127 #[serde(default)]
128 pub in_error_path: bool,
129 #[serde(default)]
130 pub confidence: f32,
131}
132
133impl CodeEdge {
134 pub fn new(from: &str, to: &str, relation: EdgeRelation) -> Self {
135 Self {
136 from: from.to_string(),
137 to: to.to_string(),
138 relation,
139 weight: 0.5,
140 call_count: 1,
141 in_error_path: false,
142 confidence: 1.0,
143 }
144 }
145
146 pub fn imports(from: &str, to: &str) -> Self {
147 Self::new(from, to, EdgeRelation::Imports)
148 }
149
150 pub fn calls(from: &str, to: &str) -> Self {
151 Self::new(from, to, EdgeRelation::Calls)
152 }
153
154 pub fn inherits(from: &str, to: &str) -> Self {
155 Self::new(from, to, EdgeRelation::Inherits)
156 }
157
158 pub fn defined_in(from: &str, to: &str) -> Self {
159 Self::new(from, to, EdgeRelation::DefinedIn)
160 }
161
162 pub fn compute_weight(&mut self) {
164 if self.relation == EdgeRelation::Calls {
165 let count_norm = (self.call_count as f32 / 10.0).min(1.0);
166 let error_factor = if self.in_error_path { 0.8 } else { 0.5 };
167 self.weight = 0.4 * count_norm + 0.3 * error_factor + 0.3 * self.confidence;
168 } else {
169 self.weight = 0.7; }
171 }
172}
173
174#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
176#[serde(rename_all = "snake_case")]
177pub enum EdgeRelation {
178 Imports,
180 Inherits,
182 DefinedIn,
184 Calls,
186 TestsFor,
188 Overrides,
190}
191
192impl std::fmt::Display for EdgeRelation {
193 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
194 match self {
195 EdgeRelation::Imports => write!(f, "imports"),
196 EdgeRelation::Inherits => write!(f, "inherits"),
197 EdgeRelation::DefinedIn => write!(f, "defined_in"),
198 EdgeRelation::Calls => write!(f, "calls"),
199 EdgeRelation::TestsFor => write!(f, "tests_for"),
200 EdgeRelation::Overrides => write!(f, "overrides"),
201 }
202 }
203}
204
205#[derive(Debug)]
209pub struct ImpactReport<'a> {
210 pub affected_source: Vec<&'a CodeNode>,
211 pub affected_tests: Vec<&'a CodeNode>,
212}
213
214#[derive(Debug, Clone)]
216pub struct CausalChain {
217 pub symptom_node_id: String,
218 pub chain: Vec<ChainNode>,
219}
220
221#[derive(Debug, Clone)]
222pub struct ChainNode {
223 pub node_id: String,
224 pub node_name: String,
225 pub file_path: String,
226 pub line: Option<usize>,
227 pub edge_to_next: Option<String>,
228}
229
230#[derive(Debug, Clone, Copy, PartialEq, Eq)]
233pub enum Language {
234 Rust,
235 TypeScript,
236 Python,
237 Unknown,
238}
239
240impl Language {
241 pub fn from_path(path: &Path) -> Self {
242 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
243 match ext {
244 "rs" => Language::Rust,
245 "ts" | "tsx" => Language::TypeScript,
246 "js" | "jsx" => Language::TypeScript, "py" => Language::Python,
248 _ => Language::Unknown,
249 }
250 }
251}
252
253impl CodeGraph {
256 pub fn extract_cached(repo_dir: &Path, repo_name: &str, base_commit: &str) -> Self {
260 let cache_dir = repo_dir.parent().unwrap_or(repo_dir).join(".graph-cache");
261 let _ = std::fs::create_dir_all(&cache_dir);
262
263 let safe_repo = repo_name.replace('/', "__");
265 let short_commit = &base_commit[..base_commit.len().min(8)];
266 let cache_file = cache_dir.join(format!("{}__{}.json", safe_repo, short_commit));
267
268 if cache_file.exists() {
270 if let Ok(data) = std::fs::read_to_string(&cache_file) {
271 if let Ok(mut graph) = serde_json::from_str::<CodeGraph>(&data) {
272 graph.build_indexes();
273 tracing::info!(
274 "Loaded code graph from cache: {} ({} nodes, {} edges)",
275 cache_file.display(),
276 graph.nodes.len(),
277 graph.edges.len()
278 );
279 return graph;
280 }
281 }
282 let _ = std::fs::remove_file(&cache_file);
284 }
285
286 let graph = Self::extract_from_dir(repo_dir);
288
289 if let Ok(json) = serde_json::to_string(&graph) {
291 let _ = std::fs::write(&cache_file, json);
292 tracing::info!(
293 "Saved code graph to cache: {} ({} nodes, {} edges)",
294 cache_file.display(),
295 graph.nodes.len(),
296 graph.edges.len()
297 );
298 }
299
300 graph
301 }
302
303 pub fn extract_from_dir(dir: &Path) -> Self {
305 let mut nodes = Vec::new();
306 let mut edges = Vec::new();
307
308 let mut class_map: HashMap<String, String> = HashMap::new();
310 let mut func_map: HashMap<String, Vec<String>> = HashMap::new();
311 let mut module_map: HashMap<String, String> = HashMap::new();
312
313 let mut method_to_class: HashMap<String, String> = HashMap::new();
315 let mut class_methods: HashMap<String, Vec<String>> = HashMap::new();
316
317 let mut class_parents: HashMap<String, Vec<String>> = HashMap::new();
319
320 let mut file_imported_names: HashMap<String, HashSet<String>> = HashMap::new();
322
323 let mut file_entries: Vec<(String, String, Language)> = Vec::new();
325
326 for entry in WalkDir::new(dir)
327 .follow_links(false)
328 .max_depth(20)
329 .into_iter()
330 .filter_entry(|e| {
331 let name = e.file_name().to_str().unwrap_or("");
332 !name.starts_with('.')
333 && name != "node_modules"
334 && name != "__pycache__"
335 && name != "target"
336 && name != "build"
337 && name != "dist"
338 && name != ".git"
339 && name != ".eggs"
340 && name != ".tox"
341 })
342 {
343 let entry = match entry {
344 Ok(e) => e,
345 Err(_) => continue,
346 };
347
348 if !entry.file_type().is_file() {
349 continue;
350 }
351
352 let path = entry.path();
353 let lang = Language::from_path(path);
354 if lang == Language::Unknown {
355 continue;
356 }
357
358 let rel_path = path
359 .strip_prefix(dir)
360 .unwrap_or(path)
361 .to_string_lossy()
362 .to_string();
363
364 if rel_path == "setup.py" || rel_path == "conftest.py" || rel_path.contains("__pycache__") {
366 continue;
367 }
368
369 let content = match std::fs::read_to_string(path) {
370 Ok(c) => c,
371 Err(_) => continue,
372 };
373
374 let module_path = rel_path
376 .replace('/', ".")
377 .trim_end_matches(".py")
378 .trim_end_matches(".rs")
379 .trim_end_matches(".ts")
380 .trim_end_matches(".tsx")
381 .trim_end_matches(".js")
382 .trim_end_matches(".jsx")
383 .to_string();
384
385 let file_id = format!("file:{}", rel_path);
386 module_map.insert(module_path.clone(), file_id.clone());
387
388 let parts: Vec<&str> = module_path.split('.').collect();
390 for start in 1..parts.len() {
391 let partial = parts[start..].join(".");
392 module_map.entry(partial).or_insert_with(|| file_id.clone());
393 }
394
395 file_entries.push((rel_path, content, lang));
396 }
397
398 let mut parser = Parser::new();
400 let python_language = tree_sitter_python::LANGUAGE;
401 parser.set_language(&python_language.into()).ok();
402
403 for (rel_path, content, lang) in &file_entries {
404 let _file_id = format!("file:{}", rel_path);
405
406 let (file_nodes, file_edges, imports) = match lang {
407 Language::Python => {
408 extract_python_tree_sitter(
409 rel_path,
410 content,
411 &mut parser,
412 &mut class_map,
413 )
414 }
415 Language::Rust => {
416 extract_rust_tree_sitter(
417 rel_path,
418 content,
419 &mut parser,
420 &mut class_map,
421 )
422 }
423 Language::TypeScript => {
424 let ext = rel_path.rsplit('.').next().unwrap_or("ts");
425 extract_typescript_tree_sitter(
426 rel_path,
427 content,
428 &mut parser,
429 &mut class_map,
430 ext,
431 )
432 }
433 Language::Unknown => continue,
434 };
435
436 for node in &file_nodes {
438 if node.kind == NodeKind::Class {
439 class_map.insert(node.name.clone(), node.id.clone());
440 } else if node.kind == NodeKind::Function {
441 func_map
442 .entry(node.name.clone())
443 .or_default()
444 .push(node.id.clone());
445 }
446 }
447
448 for edge in &file_edges {
450 if edge.relation == EdgeRelation::DefinedIn {
451 if edge.from.starts_with("method:") && edge.to.starts_with("class:") {
452 method_to_class.insert(edge.from.clone(), edge.to.clone());
453 class_methods
454 .entry(edge.to.clone())
455 .or_default()
456 .push(edge.from.clone());
457 }
458 }
459 if edge.relation == EdgeRelation::Inherits {
460 if let Some(parent_id) = class_map.get(
461 edge.to.strip_prefix("class_ref:").unwrap_or(&edge.to),
462 ) {
463 class_parents
464 .entry(edge.from.clone())
465 .or_default()
466 .push(parent_id.clone());
467 }
468 }
469 }
470
471 if !imports.is_empty() {
473 file_imported_names.insert(rel_path.clone(), imports);
474 }
475
476 if !file_nodes.is_empty() {
478 nodes.push(CodeNode::new_file(rel_path));
479 }
480
481 nodes.extend(file_nodes);
482 edges.extend(file_edges);
483 }
484
485 let class_init_map: HashMap<String, Vec<(String, String)>> = {
487 let mut map: HashMap<String, Vec<(String, String)>> = HashMap::new();
488 for node in &nodes {
489 if node.kind == NodeKind::Function && node.name == "__init__" && !node.is_test {
490 if let Some(class_id) = method_to_class.get(&node.id) {
491 if let Some(class_name) = class_id.rsplit(':').next() {
492 map.entry(class_name.to_string())
493 .or_default()
494 .push((node.file_path.clone(), node.id.clone()));
495 }
496 }
497 }
498 }
499 map
500 };
501
502 let node_pkg_map: HashMap<String, String> = nodes
504 .iter()
505 .map(|n| {
506 let pkg = n.file_path.rsplitn(2, '/').nth(1).unwrap_or("").to_string();
507 (n.id.clone(), pkg)
508 })
509 .collect();
510
511 for (rel_path, content, lang) in &file_entries {
513 if *lang != Language::Python {
514 continue;
515 }
516
517 let file_func_ids: HashSet<String> = nodes
518 .iter()
519 .filter(|n| n.file_path == *rel_path && n.kind == NodeKind::Function)
520 .map(|n| n.id.clone())
521 .collect();
522
523 let package_dir = rel_path.rsplitn(2, '/').nth(1).unwrap_or("");
524
525 if let Some(tree) = parser.parse(content, None) {
526 let source = content.as_bytes();
527 let root = tree.root_node();
528
529 extract_calls_from_tree(
530 root,
531 source,
532 rel_path,
533 &func_map,
534 &method_to_class,
535 &class_parents,
536 &file_func_ids,
537 &file_imported_names,
538 package_dir,
539 &class_init_map,
540 &node_pkg_map,
541 &mut edges,
542 );
543 }
544
545 let is_test_file = rel_path.contains("/tests/") || rel_path.contains("/test_");
547 if is_test_file {
548 let file_id = format!("file:{}", rel_path);
549 let re_from_import = Regex::new(r"^from\s+([\w.]+)\s+import").unwrap();
550
551 for line in content.lines() {
552 if let Some(cap) = re_from_import.captures(line) {
553 let module = cap[1].to_string();
554 if let Some(source_file_id) = module_map.get(&module) {
555 edges.push(CodeEdge {
556 from: file_id.clone(),
557 to: source_file_id.clone(),
558 relation: EdgeRelation::TestsFor,
559 weight: 0.5,
560 call_count: 1,
561 in_error_path: false,
562 confidence: 1.0,
563 });
564 }
565 }
566 }
567 }
568 }
569
570 let mut resolved_edges = Vec::new();
572 for edge in edges {
573 if edge.to.starts_with("class_ref:") {
574 let class_name = &edge.to["class_ref:".len()..];
575 if let Some(class_id) = class_map.get(class_name) {
576 resolved_edges.push(CodeEdge {
577 from: edge.from,
578 to: class_id.clone(),
579 relation: edge.relation,
580 weight: edge.weight,
581 call_count: edge.call_count,
582 in_error_path: edge.in_error_path,
583 confidence: edge.confidence,
584 });
585 }
586 } else if edge.to.starts_with("module_ref:") {
587 let module = &edge.to["module_ref:".len()..];
588 if let Some(file_id) = module_map.get(module) {
589 resolved_edges.push(CodeEdge {
590 from: edge.from,
591 to: file_id.clone(),
592 relation: edge.relation,
593 weight: edge.weight,
594 call_count: edge.call_count,
595 in_error_path: edge.in_error_path,
596 confidence: edge.confidence,
597 });
598 }
599 } else if edge.to.starts_with("func_ref:") {
600 let func_name = &edge.to["func_ref:".len()..];
601 if let Some(func_ids) = func_map.get(func_name) {
602 if let Some(func_id) = func_ids.first() {
603 resolved_edges.push(CodeEdge {
604 from: edge.from,
605 to: func_id.clone(),
606 relation: edge.relation,
607 weight: edge.weight,
608 call_count: edge.call_count,
609 in_error_path: edge.in_error_path,
610 confidence: edge.confidence,
611 });
612 }
613 }
614 } else {
615 resolved_edges.push(edge);
616 }
617 }
618
619 let mut edge_map: HashMap<(String, String), CodeEdge> = HashMap::new();
621 let mut other_edges: Vec<CodeEdge> = Vec::new();
622
623 for edge in resolved_edges {
624 if edge.relation == EdgeRelation::Calls {
625 let key = (edge.from.clone(), edge.to.clone());
626 let entry = edge_map.entry(key).or_insert_with(|| {
627 let mut e = edge.clone();
628 e.call_count = 0;
629 e
630 });
631 entry.call_count += 1;
632 if edge.confidence > entry.confidence {
633 entry.confidence = edge.confidence;
634 }
635 if edge.in_error_path {
636 entry.in_error_path = true;
637 }
638 } else {
639 other_edges.push(edge);
640 }
641 }
642
643 let mut final_edges: Vec<CodeEdge> = edge_map.into_values().collect();
644 final_edges.extend(other_edges);
645
646 for edge in &mut final_edges {
648 edge.compute_weight();
649 }
650
651 add_override_edges(&nodes, &mut final_edges);
653
654 let mut graph = CodeGraph {
655 nodes,
656 edges: final_edges,
657 outgoing: HashMap::new(),
658 incoming: HashMap::new(),
659 node_index: HashMap::new(),
660 };
661 graph.build_indexes();
662 graph
663 }
664
665 pub fn build_indexes(&mut self) {
667 self.node_index.clear();
668 self.outgoing.clear();
669 self.incoming.clear();
670
671 for (i, node) in self.nodes.iter().enumerate() {
672 self.node_index.insert(node.id.clone(), i);
673 }
674
675 for (i, edge) in self.edges.iter().enumerate() {
676 self.outgoing.entry(edge.from.clone()).or_default().push(i);
677 self.incoming.entry(edge.to.clone()).or_default().push(i);
678 }
679 }
680
681 #[inline]
685 pub fn outgoing_edges(&self, node_id: &str) -> impl Iterator<Item = &CodeEdge> {
686 self.outgoing
687 .get(node_id)
688 .map(|indices| indices.as_slice())
689 .unwrap_or(&[])
690 .iter()
691 .map(move |&i| &self.edges[i])
692 }
693
694 #[inline]
696 pub fn incoming_edges(&self, node_id: &str) -> impl Iterator<Item = &CodeEdge> {
697 self.incoming
698 .get(node_id)
699 .map(|indices| indices.as_slice())
700 .unwrap_or(&[])
701 .iter()
702 .map(move |&i| &self.edges[i])
703 }
704
705 #[inline]
707 pub fn node_by_id(&self, node_id: &str) -> Option<&CodeNode> {
708 self.node_index.get(node_id).map(|&i| &self.nodes[i])
709 }
710
711 pub fn get_callers(&self, node_id: &str) -> Vec<&CodeNode> {
713 self.incoming_edges(node_id)
714 .filter(|e| e.relation == EdgeRelation::Calls)
715 .filter_map(|e| self.node_by_id(&e.from))
716 .collect()
717 }
718
719 pub fn get_callees(&self, node_id: &str) -> Vec<&CodeNode> {
721 self.outgoing_edges(node_id)
722 .filter(|e| e.relation == EdgeRelation::Calls)
723 .filter_map(|e| self.node_by_id(&e.to))
724 .collect()
725 }
726
727 pub fn get_dependencies(&self, node_id: &str) -> Vec<&CodeNode> {
729 self.outgoing_edges(node_id)
730 .filter_map(|e| self.node_by_id(&e.to))
731 .collect()
732 }
733
734 pub fn get_impact(&self, node_id: &str) -> Vec<&CodeNode> {
736 let mut impacted = Vec::new();
737 let mut visited = HashSet::new();
738 self.collect_dependents(node_id, &mut impacted, &mut visited);
739 impacted
740 }
741
742 fn collect_dependents<'a>(
743 &'a self,
744 node_id: &str,
745 result: &mut Vec<&'a CodeNode>,
746 visited: &mut HashSet<String>,
747 ) {
748 if !visited.insert(node_id.to_string()) {
749 return;
750 }
751
752 for edge in self.incoming_edges(node_id) {
753 if let Some(node) = self.node_by_id(&edge.from) {
754 result.push(node);
755 self.collect_dependents(&edge.from, result, visited);
756 }
757 }
758 }
759
760 pub fn find_relevant_nodes(&self, keywords: &[&str]) -> Vec<&CodeNode> {
762 let mut scored: Vec<(usize, &CodeNode)> = self
763 .nodes
764 .iter()
765 .map(|n| {
766 let score: usize = keywords
767 .iter()
768 .filter(|kw| {
769 let kw_lower = kw.to_lowercase();
770 let name_lower = n.name.to_lowercase();
771 let path_lower = n.file_path.to_lowercase();
772 name_lower.contains(&kw_lower)
773 || path_lower.contains(&kw_lower)
774 || (name_lower.len() >= 5
775 && kw_lower.contains(name_lower.trim_start_matches('_')))
776 })
777 .count();
778 (score, n)
779 })
780 .filter(|(score, _)| *score > 0)
781 .collect();
782
783 scored.sort_by(|a, b| b.0.cmp(&a.0));
784 let mut results: Vec<&CodeNode> = scored.into_iter().map(|(_, n)| n).collect();
785
786 let relevant_files: HashSet<String> = results.iter().map(|n| n.file_path.clone()).collect();
788
789 for node in &self.nodes {
790 if relevant_files.contains(&node.file_path) && !results.iter().any(|r| r.id == node.id) {
791 results.push(node);
792 }
793 }
794
795 let mut inheritance_additions: Vec<&CodeNode> = Vec::new();
797 let result_ids: HashSet<String> = results.iter().map(|n| n.id.clone()).collect();
798
799 for node in &results {
800 if node.kind == NodeKind::Class {
801 let chain = self.get_inheritance_chain(&node.id);
802 for ancestor_id in &chain {
803 if !result_ids.contains(ancestor_id) {
804 if let Some(ancestor) = self.node_by_id(ancestor_id) {
805 inheritance_additions.push(ancestor);
806 }
807 }
808 }
809 for edge in self.incoming_edges(&node.id) {
810 if edge.relation == EdgeRelation::Inherits && !result_ids.contains(&edge.from) {
811 if let Some(child) = self.node_by_id(&edge.from) {
812 inheritance_additions.push(child);
813 }
814 }
815 }
816 }
817 }
818
819 let mut extra_files: HashSet<String> = HashSet::new();
820 for node in &inheritance_additions {
821 if !results.iter().any(|r| r.id == node.id) {
822 extra_files.insert(node.file_path.clone());
823 results.push(node);
824 }
825 }
826 for node in &self.nodes {
827 if extra_files.contains(&node.file_path) && !results.iter().any(|r| r.id == node.id) {
828 results.push(node);
829 }
830 }
831
832 let mut import_additions: Vec<&CodeNode> = Vec::new();
835 let current_ids: HashSet<String> = results.iter().map(|n| n.id.clone()).collect();
836
837 for node in &results {
838 if node.kind == NodeKind::File {
839 for edge in self.outgoing_edges(&node.id) {
840 if edge.relation == EdgeRelation::Imports {
841 if !current_ids.contains(&edge.to) {
842 if let Some(imported) = self.node_by_id(&edge.to) {
843 import_additions.push(imported);
844 }
845 }
846 }
847 }
848 }
849 }
850
851 for node in &import_additions {
853 if node.kind == NodeKind::File {
854 let has_keyword_match = self
855 .nodes
856 .iter()
857 .filter(|n| n.file_path == node.file_path && n.kind != NodeKind::File)
858 .any(|n| {
859 let name_lower = n.name.to_lowercase();
860 keywords.iter().any(|kw| {
861 let kw_lower = kw.to_lowercase();
862 name_lower.contains(&kw_lower) || kw_lower.contains(&name_lower)
863 })
864 });
865 if has_keyword_match && !results.iter().any(|r| r.id == node.id) {
866 results.push(node);
867 for entity in &self.nodes {
869 if entity.file_path == node.file_path
870 && !results.iter().any(|r| r.id == entity.id)
871 {
872 results.push(entity);
873 }
874 }
875 }
876 }
877 }
878
879 results
880 }
881
882 pub fn impact_analysis(&self, changed_node_ids: &[&str]) -> ImpactReport<'_> {
884 let mut affected_nodes = Vec::new();
885 let mut affected_tests = Vec::new();
886 let mut seen = HashSet::new();
887
888 for node_id in changed_node_ids {
889 let impacted = self.get_impact(node_id);
890 for node in impacted {
891 if seen.insert(node.id.clone()) {
892 if node.file_path.contains("/tests/") || node.file_path.contains("/test_") {
893 affected_tests.push(node);
894 } else {
895 affected_nodes.push(node);
896 }
897 }
898 }
899 }
900
901 let related_tests = self.find_related_tests(changed_node_ids);
902 for test in related_tests {
903 if seen.insert(test.id.clone()) {
904 affected_tests.push(test);
905 }
906 }
907
908 ImpactReport {
909 affected_source: affected_nodes,
910 affected_tests,
911 }
912 }
913
914 pub fn find_related_tests(&self, source_node_ids: &[&str]) -> Vec<&CodeNode> {
916 let mut test_nodes = Vec::new();
917 let mut seen = HashSet::new();
918
919 let source_files: HashSet<String> = source_node_ids
920 .iter()
921 .filter_map(|id| self.node_by_id(id))
922 .map(|n| n.file_path.clone())
923 .collect();
924
925 let source_file_ids: HashSet<String> = source_files.iter().map(|f| format!("file:{}", f)).collect();
926
927 for source_fid in &source_file_ids {
929 for edge in self.incoming_edges(source_fid.as_str()) {
930 if edge.relation == EdgeRelation::TestsFor {
931 if let Some(test_node) = self.node_by_id(&edge.from) {
932 if seen.insert(test_node.id.clone()) {
933 test_nodes.push(test_node);
934 }
935 for node in &self.nodes {
936 if node.file_path == test_node.file_path
937 && node.kind != NodeKind::File
938 && seen.insert(node.id.clone())
939 {
940 test_nodes.push(node);
941 }
942 }
943 }
944 }
945 }
946 }
947
948 for source_id in source_node_ids.iter() {
950 for edge in self.incoming_edges(source_id) {
951 if edge.relation == EdgeRelation::Calls {
952 if let Some(caller) = self.node_by_id(&edge.from) {
953 if caller.file_path.contains("/tests/") || caller.file_path.contains("/test_") {
954 if seen.insert(caller.id.clone()) {
955 test_nodes.push(caller);
956 }
957 }
958 }
959 }
960 }
961 }
962
963 test_nodes
964 }
965
966 pub fn format_impact_for_llm(&self, changed_node_ids: &[&str], repo_dir: &Path) -> String {
968 let report = self.impact_analysis(changed_node_ids);
969 let mut result = String::new();
970
971 if !report.affected_source.is_empty() {
972 result.push_str("**⚠️ Impact Analysis — Code affected by your change:**\n");
973 for node in &report.affected_source {
974 let prefix = match node.kind {
975 NodeKind::File => "📄",
976 NodeKind::Class => "🔷",
977 NodeKind::Function => "🔹",
978 NodeKind::Module => "📦",
979 };
980 result.push_str(&format!("{} {} (`{}`)\n", prefix, node.name, node.file_path));
981 }
982 result.push('\n');
983 }
984
985 if !report.affected_tests.is_empty() {
986 result.push_str("**🧪 Tests that exercise the code you're changing:**\n");
987 result.push_str("DO NOT break these tests! Make minimal changes.\n\n");
988
989 let mut test_files: HashSet<String> = HashSet::new();
990 for node in &report.affected_tests {
991 test_files.insert(node.file_path.clone());
992 }
993
994 for test_file in &test_files {
995 result.push_str(&format!("📋 `{}`\n", test_file));
996 let funcs: Vec<&str> = report
997 .affected_tests
998 .iter()
999 .filter(|n| n.file_path == *test_file && n.kind == NodeKind::Function)
1000 .map(|n| n.name.as_str())
1001 .collect();
1002 if !funcs.is_empty() {
1003 for func in funcs.iter().take(10) {
1004 result.push_str(&format!(" - {}\n", func));
1005 }
1006 if funcs.len() > 10 {
1007 result.push_str(&format!(" ... and {} more\n", funcs.len() - 10));
1008 }
1009 }
1010 }
1011 result.push('\n');
1012
1013 let test_nodes_refs: Vec<&CodeNode> = report
1014 .affected_tests
1015 .iter()
1016 .filter(|n| n.kind == NodeKind::Function)
1017 .take(10)
1018 .copied()
1019 .collect();
1020
1021 if !test_nodes_refs.is_empty() {
1022 let test_snippets = self.extract_snippets(&test_nodes_refs, repo_dir, 30);
1023 if !test_snippets.is_empty() {
1024 result.push_str("**Key test code (DO NOT break these):**\n```python\n");
1025 for (node_id, snippet) in test_snippets.iter().take(5) {
1026 let name = self.node_name(node_id);
1027 result.push_str(&format!("# --- {} ---\n{}\n\n", name, snippet));
1028 }
1029 result.push_str("```\n");
1030 }
1031 }
1032 }
1033
1034 result
1035 }
1036
1037 pub fn trace_causal_chains_from_symptoms(
1039 &self,
1040 symptom_node_ids: &[&str],
1041 max_depth: usize,
1042 max_chains: usize,
1043 ) -> Vec<CausalChain> {
1044 #[derive(Clone)]
1045 struct WeightedPath {
1046 node_id: String,
1047 accumulated_weight: f32,
1048 chain: Vec<ChainNode>,
1049 }
1050
1051 impl PartialEq for WeightedPath {
1052 fn eq(&self, other: &Self) -> bool {
1053 self.accumulated_weight
1054 .total_cmp(&other.accumulated_weight)
1055 == std::cmp::Ordering::Equal
1056 }
1057 }
1058 impl Eq for WeightedPath {}
1059 impl PartialOrd for WeightedPath {
1060 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
1061 Some(self.cmp(other))
1062 }
1063 }
1064 impl Ord for WeightedPath {
1065 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
1066 self.accumulated_weight.total_cmp(&other.accumulated_weight)
1067 }
1068 }
1069
1070 let mut all_chains: Vec<CausalChain> = Vec::new();
1071
1072 for symptom_id in symptom_node_ids {
1073 let symptom_node = match self.node_by_id(symptom_id) {
1074 Some(n) => n,
1075 None => continue,
1076 };
1077
1078 {
1080 let mut heap: BinaryHeap<WeightedPath> = BinaryHeap::new();
1081 let mut visited = HashSet::new();
1082 visited.insert(symptom_id.to_string());
1083
1084 let start_chain_node = ChainNode {
1085 node_id: symptom_id.to_string(),
1086 node_name: symptom_node.name.clone(),
1087 file_path: symptom_node.file_path.clone(),
1088 line: symptom_node.line,
1089 edge_to_next: None,
1090 };
1091 heap.push(WeightedPath {
1092 node_id: symptom_id.to_string(),
1093 accumulated_weight: 1.0,
1094 chain: vec![start_chain_node],
1095 });
1096
1097 while let Some(current) = heap.pop() {
1098 if current.chain.len() > max_depth {
1099 continue;
1100 }
1101
1102 for edge in self.outgoing_edges(¤t.node_id) {
1103 let (target_id, edge_label) = match edge.relation {
1104 EdgeRelation::Calls => (&edge.to, "calls"),
1105 EdgeRelation::Inherits => (&edge.to, "inherits"),
1106 EdgeRelation::Imports => (&edge.to, "imports"),
1107 EdgeRelation::Overrides => (&edge.to, "overrides"),
1108 EdgeRelation::TestsFor => (&edge.to, "tests"),
1109 _ => continue,
1110 };
1111 if visited.contains(target_id) {
1112 continue;
1113 }
1114 if let Some(target_node) = self.node_by_id(target_id) {
1115 visited.insert(target_node.id.clone());
1116 let new_weight = current.accumulated_weight * edge.weight;
1117
1118 let mut new_chain = current.chain.clone();
1119 if let Some(last) = new_chain.last_mut() {
1120 last.edge_to_next = Some(edge_label.to_string());
1121 }
1122 new_chain.push(ChainNode {
1123 node_id: target_node.id.clone(),
1124 node_name: target_node.name.clone(),
1125 file_path: target_node.file_path.clone(),
1126 line: target_node.line,
1127 edge_to_next: None,
1128 });
1129
1130 if new_chain.len() >= 2 {
1131 all_chains.push(CausalChain {
1132 symptom_node_id: symptom_id.to_string(),
1133 chain: new_chain.clone(),
1134 });
1135 }
1136
1137 if new_chain.len() < max_depth {
1138 heap.push(WeightedPath {
1139 node_id: target_node.id.clone(),
1140 accumulated_weight: new_weight,
1141 chain: new_chain,
1142 });
1143 }
1144 }
1145 }
1146 }
1147 }
1148
1149 {
1151 let mut heap: BinaryHeap<WeightedPath> = BinaryHeap::new();
1152 let mut visited = HashSet::new();
1153 visited.insert(symptom_id.to_string());
1154
1155 let start_chain_node = ChainNode {
1156 node_id: symptom_id.to_string(),
1157 node_name: symptom_node.name.clone(),
1158 file_path: symptom_node.file_path.clone(),
1159 line: symptom_node.line,
1160 edge_to_next: None,
1161 };
1162 heap.push(WeightedPath {
1163 node_id: symptom_id.to_string(),
1164 accumulated_weight: 1.0,
1165 chain: vec![start_chain_node],
1166 });
1167
1168 while let Some(current) = heap.pop() {
1169 if current.chain.len() > max_depth {
1170 continue;
1171 }
1172
1173 for edge in self.incoming_edges(¤t.node_id) {
1174 if edge.relation != EdgeRelation::Calls
1175 && edge.relation != EdgeRelation::Imports
1176 && edge.relation != EdgeRelation::Overrides
1177 {
1178 continue;
1179 }
1180 if visited.contains(&edge.from) {
1181 continue;
1182 }
1183 if let Some(caller) = self.node_by_id(&edge.from) {
1184 if caller.file_path.contains("/tests/")
1185 || caller.file_path.contains("/test_")
1186 {
1187 continue;
1188 }
1189 visited.insert(caller.id.clone());
1190 let new_weight = current.accumulated_weight * edge.weight;
1191
1192 let edge_label = match edge.relation {
1193 EdgeRelation::Imports => "imported_by",
1194 EdgeRelation::Overrides => "overridden_by",
1195 _ => "called_by",
1196 };
1197 let mut new_chain = current.chain.clone();
1198 if let Some(last) = new_chain.last_mut() {
1199 last.edge_to_next = Some(edge_label.to_string());
1200 }
1201 new_chain.push(ChainNode {
1202 node_id: caller.id.clone(),
1203 node_name: caller.name.clone(),
1204 file_path: caller.file_path.clone(),
1205 line: caller.line,
1206 edge_to_next: None,
1207 });
1208
1209 if new_chain.len() >= 2 {
1210 all_chains.push(CausalChain {
1211 symptom_node_id: symptom_id.to_string(),
1212 chain: new_chain.clone(),
1213 });
1214 }
1215
1216 if new_chain.len() < max_depth {
1217 heap.push(WeightedPath {
1218 node_id: caller.id.clone(),
1219 accumulated_weight: new_weight,
1220 chain: new_chain,
1221 });
1222 }
1223 }
1224 }
1225 }
1226 }
1227 }
1228
1229 all_chains.sort_by(|a, b| {
1231 let len_cmp = a.chain.len().cmp(&b.chain.len());
1232 if len_cmp != std::cmp::Ordering::Equal {
1233 return len_cmp;
1234 }
1235 let a_source = a
1236 .chain
1237 .iter()
1238 .filter(|n| !n.file_path.contains("/tests/") && !n.file_path.contains("/test_"))
1239 .count();
1240 let b_source = b
1241 .chain
1242 .iter()
1243 .filter(|n| !n.file_path.contains("/tests/") && !n.file_path.contains("/test_"))
1244 .count();
1245 b_source.cmp(&a_source)
1246 });
1247
1248 let mut deduped: Vec<CausalChain> = Vec::new();
1249 for chain in &all_chains {
1250 let is_prefix = deduped.iter().any(|existing| {
1251 existing.chain.len() > chain.chain.len()
1252 && chain
1253 .chain
1254 .iter()
1255 .zip(existing.chain.iter())
1256 .all(|(a, b)| a.node_id == b.node_id)
1257 });
1258 if is_prefix {
1259 continue;
1260 }
1261 deduped.retain(|existing| {
1262 !(existing.chain.len() < chain.chain.len()
1263 && existing
1264 .chain
1265 .iter()
1266 .zip(chain.chain.iter())
1267 .all(|(a, b)| a.node_id == b.node_id))
1268 });
1269 deduped.push(chain.clone());
1270 }
1271
1272 deduped.truncate(max_chains);
1273 deduped
1274 }
1275
1276 pub fn trace_causal_chains(
1278 &self,
1279 changed_node_ids: &[&str],
1280 failed_p2p_tests: &[String],
1281 failed_f2p_tests: &[String],
1282 ) -> String {
1283 if failed_p2p_tests.is_empty() && failed_f2p_tests.is_empty() {
1284 return String::new();
1285 }
1286
1287 let mut result = String::new();
1288
1289 if !failed_p2p_tests.is_empty() {
1290 result.push_str("## 🚨 CAUSAL ANALYSIS — Why Your Fix Broke Existing Tests\n\n");
1291 result.push_str(
1292 "These tests PASSED before your change and now FAIL. You MUST fix these regressions.\n\n",
1293 );
1294
1295 for test_name in failed_p2p_tests {
1296 let short_name = test_name.split("::").last().unwrap_or(test_name);
1297 result.push_str(&format!("### ❌ REGRESSION: `{}`\n", short_name));
1298
1299 let test_node = self.nodes.iter().find(|n| {
1300 n.name == short_name
1301 || n.name.ends_with(short_name)
1302 || (n.file_path.contains("/test") && n.name == short_name)
1303 });
1304
1305 if let Some(test) = test_node {
1306 let chains = self.find_paths_to_test(changed_node_ids, &test.id);
1307
1308 if !chains.is_empty() {
1309 result.push_str("**Causal chain(s):**\n");
1310 for chain in chains.iter().take(3) {
1311 let chain_str: Vec<String> = chain
1312 .iter()
1313 .map(|id| {
1314 self.nodes
1315 .iter()
1316 .find(|n| n.id == *id)
1317 .map(|n| format!("`{}` ({})", n.name, n.file_path))
1318 .unwrap_or_else(|| id.to_string())
1319 })
1320 .collect();
1321 result.push_str(&format!(" 🔗 {}\n", chain_str.join(" → ")));
1322 }
1323 result.push_str("\n**What this means:** Your change propagated through the dependency chain above and broke this test.\n");
1324 result.push_str("**How to fix:** Make your change more surgical — ensure the modified function's behavior is backward-compatible for the callers in this chain.\n\n");
1325 } else {
1326 let changed_files: HashSet<String> = changed_node_ids
1328 .iter()
1329 .filter_map(|id| self.node_by_id(id))
1330 .map(|n| n.file_path.clone())
1331 .collect();
1332
1333 if changed_files
1334 .iter()
1335 .any(|f| test.file_path.contains(f.as_str()))
1336 || self.shares_import(&test.id, changed_node_ids)
1337 {
1338 result.push_str("**Connection:** Indirect — test imports or uses a module you changed.\n");
1339 result.push_str("**How to fix:** Check that your change doesn't alter the public API or default behavior of the module.\n\n");
1340 } else {
1341 result.push_str("**Connection:** Could not trace via graph (may be via dynamic dispatch, monkey-patching, or shared global state).\n");
1342 result.push_str("**How to fix:** Read the test's assertion error carefully — it will tell you what behavior changed.\n\n");
1343 }
1344 }
1345 } else {
1346 result.push_str(
1347 "**Note:** Test not found in code graph. Read the error output to understand what broke.\n\n",
1348 );
1349 }
1350 }
1351
1352 result.push_str("### 🎯 Overall Regression Fix Strategy\n");
1353 result.push_str(
1354 "1. **Don't change your approach** — your bug fix logic is likely correct\n",
1355 );
1356 result.push_str("2. **Narrow the scope** — guard your change with a condition so it only applies to the bug case\n");
1357 result.push_str("3. **Add backward compatibility** — if you changed a return type/value, ensure callers still get what they expect\n");
1358 result.push_str("4. **Check default parameters** — if you changed defaults, existing callers rely on the old defaults\n\n");
1359 }
1360
1361 if !failed_f2p_tests.is_empty() {
1362 result.push_str("## ⚠️ Original Bug Not Fixed\n");
1363 result.push_str("These tests still fail — your fix is incomplete or incorrect:\n");
1364 for test_name in failed_f2p_tests {
1365 let short_name = test_name.split("::").last().unwrap_or(test_name);
1366 result.push_str(&format!("- `{}`\n", short_name));
1367 }
1368 result.push('\n');
1369 }
1370
1371 result
1372 }
1373
1374 fn find_paths_to_test(&self, changed_node_ids: &[&str], test_node_id: &str) -> Vec<Vec<String>> {
1375 let mut paths = Vec::new();
1376
1377 for changed_id in changed_node_ids {
1378 if let Some(path) = self.bfs_path(test_node_id, changed_id, 5) {
1379 let mut p = path;
1380 p.reverse();
1381 paths.push(p);
1382 }
1383 }
1384
1385 paths
1386 }
1387
1388 pub fn bfs_path(&self, from: &str, to: &str, max_depth: usize) -> Option<Vec<String>> {
1390 let mut queue: VecDeque<(String, Vec<String>)> = VecDeque::new();
1391 let mut visited = HashSet::new();
1392
1393 queue.push_back((from.to_string(), vec![from.to_string()]));
1394 visited.insert(from.to_string());
1395
1396 while let Some((current, path)) = queue.pop_front() {
1397 if path.len() > max_depth {
1398 continue;
1399 }
1400
1401 for edge in self.outgoing_edges(¤t) {
1402 if edge.to == to {
1403 let mut final_path = path.clone();
1404 final_path.push(edge.to.clone());
1405 return Some(final_path);
1406 }
1407 if !visited.contains(&edge.to) {
1408 visited.insert(edge.to.clone());
1409 let mut new_path = path.clone();
1410 new_path.push(edge.to.clone());
1411 queue.push_back((edge.to.clone(), new_path));
1412 }
1413 }
1414 }
1415 None
1416 }
1417
1418 pub fn get_node_summary(&self, node_id: &str, repo_dir: &Path) -> String {
1420 let node = match self.node_by_id(node_id) {
1421 Some(n) => n,
1422 None => return format!("[unknown node: {}]", node_id),
1423 };
1424
1425 let mut result = format!(
1426 "{} ({}:{})",
1427 node.name,
1428 node.file_path,
1429 node.line.map(|l| l.to_string()).unwrap_or_else(|| "?".to_string()),
1430 );
1431
1432 let full_path = repo_dir.join(&node.file_path);
1433 if let Ok(content) = std::fs::read_to_string(&full_path) {
1434 let lines: Vec<&str> = content.lines().collect();
1435 if let Some(start_line) = node.line {
1436 if start_line > 0 && start_line <= lines.len() {
1437 let start_idx = start_line - 1;
1438 let end_idx = (start_idx + 15).min(lines.len());
1439 let preview: String = lines[start_idx..end_idx]
1440 .iter()
1441 .map(|l| *l)
1442 .collect::<Vec<_>>()
1443 .join("\n");
1444 result.push('\n');
1445 result.push_str(&preview);
1446 }
1447 }
1448 }
1449
1450 result
1451 }
1452
1453 pub fn extract_snippets(
1455 &self,
1456 nodes: &[&CodeNode],
1457 repo_dir: &Path,
1458 max_lines: usize,
1459 ) -> HashMap<String, String> {
1460 let mut snippets = HashMap::new();
1461 let mut file_cache: HashMap<String, Vec<String>> = HashMap::new();
1462
1463 for node in nodes {
1464 if node.kind == NodeKind::File {
1465 continue;
1466 }
1467
1468 let file_path = repo_dir.join(&node.file_path);
1469 let lines = file_cache.entry(node.file_path.clone()).or_insert_with(|| {
1470 std::fs::read_to_string(&file_path)
1471 .unwrap_or_default()
1472 .lines()
1473 .map(|l| l.to_string())
1474 .collect()
1475 });
1476
1477 if let Some(start_line) = node.line {
1478 if start_line == 0 || start_line > lines.len() {
1479 continue;
1480 }
1481 let start_idx = start_line - 1;
1482
1483 let base_indent = lines[start_idx]
1484 .chars()
1485 .take_while(|c| c.is_whitespace())
1486 .count();
1487
1488 let mut end_idx = start_idx + 1;
1489 while end_idx < lines.len() && end_idx < start_idx + max_lines {
1490 let line = &lines[end_idx];
1491 if line.trim().is_empty() {
1492 end_idx += 1;
1493 continue;
1494 }
1495 let indent = line.chars().take_while(|c| c.is_whitespace()).count();
1496 if indent <= base_indent && !line.trim().is_empty() {
1497 break;
1498 }
1499 end_idx += 1;
1500 }
1501
1502 let snippet: String = lines[start_idx..end_idx.min(lines.len())]
1503 .iter()
1504 .map(|l| l.as_str())
1505 .collect::<Vec<_>>()
1506 .join("\n");
1507
1508 if !snippet.trim().is_empty() {
1509 snippets.insert(node.id.clone(), snippet);
1510 }
1511 }
1512 }
1513
1514 snippets
1515 }
1516
1517 pub fn format_for_llm(&self, keywords: &[&str], max_chars: usize) -> String {
1519 let relevant = self.find_relevant_nodes(keywords);
1520
1521 if relevant.is_empty() {
1522 return self.format_file_summary(max_chars);
1523 }
1524
1525 let mut result = String::from("**Code structure (relevant to issue):**\n");
1526
1527 result.push_str("\nRelevant files/classes/functions:\n");
1528 let relevant_ids: HashSet<&str> = relevant.iter().map(|n| n.id.as_str()).collect();
1529
1530 for node in relevant.iter().take(20) {
1531 let prefix = match node.kind {
1532 NodeKind::File => "📄",
1533 NodeKind::Class => "🔷",
1534 NodeKind::Function => "🔹",
1535 NodeKind::Module => "📦",
1536 };
1537 let line_info = node.line.map(|l| format!(" (line {})", l)).unwrap_or_default();
1538 result.push_str(&format!(
1539 "{} {} — `{}`{}\n",
1540 prefix, node.name, node.file_path, line_info
1541 ));
1542
1543 if result.len() > max_chars / 2 {
1544 break;
1545 }
1546 }
1547
1548 let relevant_edges: Vec<&CodeEdge> = self
1549 .edges
1550 .iter()
1551 .filter(|e| {
1552 relevant_ids.contains(e.from.as_str()) || relevant_ids.contains(e.to.as_str())
1553 })
1554 .filter(|e| e.relation != EdgeRelation::DefinedIn)
1555 .collect();
1556
1557 if !relevant_edges.is_empty() {
1558 result.push_str("\nRelationships:\n");
1559 for edge in relevant_edges.iter().take(15) {
1560 let from_name = self.node_name(&edge.from);
1561 let to_name = self.node_name(&edge.to);
1562 result.push_str(&format!(
1563 " {} --[{}]--> {}\n",
1564 from_name, edge.relation, to_name
1565 ));
1566
1567 if result.len() > max_chars {
1568 break;
1569 }
1570 }
1571 }
1572
1573 let relevant_classes: Vec<&&CodeNode> = relevant
1574 .iter()
1575 .filter(|n| n.kind == NodeKind::Class)
1576 .collect();
1577
1578 if !relevant_classes.is_empty() {
1579 result.push_str("\nInheritance:\n");
1580 for cls in relevant_classes.iter().take(5) {
1581 let chain = self.get_inheritance_chain(&cls.id);
1582 if chain.len() > 1 {
1583 let names: Vec<String> =
1584 chain.iter().map(|id| self.node_name(id)).collect();
1585 result.push_str(&format!(" {} \n", names.join(" → ")));
1586 }
1587 }
1588 }
1589
1590 let file_count = self.nodes.iter().filter(|n| n.kind == NodeKind::File).count();
1591 let class_count = self.nodes.iter().filter(|n| n.kind == NodeKind::Class).count();
1592 let import_count = self
1593 .edges
1594 .iter()
1595 .filter(|e| e.relation == EdgeRelation::Imports)
1596 .count();
1597 let inherit_count = self
1598 .edges
1599 .iter()
1600 .filter(|e| e.relation == EdgeRelation::Inherits)
1601 .count();
1602
1603 result.push_str(&format!(
1604 "\nGraph: {} files, {} classes, {} imports, {} inheritance edges\n",
1605 file_count, class_count, import_count, inherit_count
1606 ));
1607
1608 if result.len() > max_chars {
1609 result.truncate(max_chars);
1610 result.push_str("\n...[truncated]\n");
1611 }
1612
1613 result
1614 }
1615
1616 fn format_file_summary(&self, max_chars: usize) -> String {
1617 let mut result = String::from("**Repository files:**\n");
1618
1619 let files: Vec<&CodeNode> = self
1620 .nodes
1621 .iter()
1622 .filter(|n| n.kind == NodeKind::File)
1623 .collect();
1624
1625 for file in &files {
1626 let classes: Vec<String> = self
1627 .nodes
1628 .iter()
1629 .filter(|n| n.kind == NodeKind::Class && n.file_path == file.file_path)
1630 .map(|n| n.name.clone())
1631 .collect();
1632
1633 let mut line = format!("- `{}`", file.file_path);
1634 if !classes.is_empty() {
1635 line.push_str(&format!(" — {}", classes.join(", ")));
1636 }
1637 line.push('\n');
1638
1639 if result.len() + line.len() > max_chars {
1640 result.push_str(&format!("... and {} more files\n", files.len()));
1641 break;
1642 }
1643 result.push_str(&line);
1644 }
1645
1646 result
1647 }
1648
1649 fn node_name(&self, id: &str) -> String {
1650 self.nodes
1651 .iter()
1652 .find(|n| n.id == id)
1653 .map(|n| n.name.clone())
1654 .unwrap_or_else(|| id.to_string())
1655 }
1656
1657 fn get_inheritance_chain(&self, class_id: &str) -> Vec<String> {
1658 let mut chain = vec![class_id.to_string()];
1659 let mut current = class_id.to_string();
1660
1661 for _ in 0..10 {
1662 let parent = self
1663 .edges
1664 .iter()
1665 .find(|e| e.from == current && e.relation == EdgeRelation::Inherits);
1666 match parent {
1667 Some(edge) => {
1668 chain.push(edge.to.clone());
1669 current = edge.to.clone();
1670 }
1671 None => break,
1672 }
1673 }
1674
1675 chain
1676 }
1677
1678 fn shares_import(&self, test_node_id: &str, changed_node_ids: &[&str]) -> bool {
1681 let test_imports: HashSet<String> = self
1682 .edges
1683 .iter()
1684 .filter(|e| e.from == test_node_id && e.relation == EdgeRelation::Imports)
1685 .map(|e| e.to.clone())
1686 .collect();
1687
1688 let changed_files: HashSet<String> = changed_node_ids
1689 .iter()
1690 .filter_map(|id| self.node_by_id(id))
1691 .flat_map(|n| {
1692 let file_id = format!("file:{}", n.file_path);
1693 vec![n.id.clone(), file_id]
1694 })
1695 .collect();
1696
1697 test_imports.intersection(&changed_files).next().is_some()
1698 }
1699
1700 pub fn grep_for_identifiers(&self, repo_dir: &Path, identifiers: &[&str]) -> Vec<CodeNode> {
1702 let mut found_nodes = Vec::new();
1703 let existing_names: HashSet<String> = self.nodes.iter().map(|n| n.name.clone()).collect();
1704
1705 for ident in identifiers {
1706 if existing_names.contains(*ident) {
1707 continue;
1708 }
1709
1710 let patterns = [
1711 format!("class {}[:(]", ident),
1712 format!("def {}[(]", ident),
1713 format!("class {}\\b", ident),
1714 ];
1715
1716 for pattern in &patterns {
1717 if let Ok(output) = std::process::Command::new("grep")
1718 .args(["-rn", pattern, "--include=*.py", "-l"])
1719 .current_dir(repo_dir)
1720 .output()
1721 {
1722 let stdout = String::from_utf8_lossy(&output.stdout);
1723 for file_path in stdout.lines().take(3) {
1724 let file_path = file_path.trim();
1725 if file_path.is_empty()
1726 || file_path.contains("/tests/")
1727 || file_path.contains("/test_")
1728 {
1729 continue;
1730 }
1731
1732 if let Ok(line_output) = std::process::Command::new("grep")
1733 .args(["-n", pattern, file_path])
1734 .current_dir(repo_dir)
1735 .output()
1736 {
1737 let line_stdout = String::from_utf8_lossy(&line_output.stdout);
1738 if let Some(first_line) = line_stdout.lines().next() {
1739 let line_num: usize = first_line
1740 .split(':')
1741 .next()
1742 .unwrap_or("0")
1743 .parse()
1744 .unwrap_or(0);
1745
1746 let is_class = first_line.contains("class ");
1747 found_nodes.push(CodeNode {
1748 id: format!("grep:{}:{}", file_path, ident),
1749 kind: if is_class {
1750 NodeKind::Class
1751 } else {
1752 NodeKind::Function
1753 },
1754 name: ident.to_string(),
1755 file_path: file_path.to_string(),
1756 line: if line_num > 0 { Some(line_num) } else { None },
1757 decorators: Vec::new(),
1758 signature: None,
1759 docstring: None,
1760 line_count: 0,
1761 is_test: false,
1762 });
1763 break;
1764 }
1765 }
1766 }
1767 }
1768 if found_nodes.iter().any(|n| n.name == *ident) {
1769 break;
1770 }
1771 }
1772 }
1773
1774 found_nodes
1775 }
1776
1777 pub fn extract_keywords(problem_statement: &str) -> Vec<&str> {
1779 let mut keywords = Vec::new();
1780
1781 for word in
1782 problem_statement.split(|c: char| !c.is_alphanumeric() && c != '_' && c != '.')
1783 {
1784 let trimmed = word.trim();
1785 if trimmed.len() < 3 {
1786 continue;
1787 }
1788 let lower = trimmed.to_lowercase();
1789 if [
1790 "the", "and", "for", "that", "this", "with", "from", "not", "but", "are", "was",
1791 "has", "have", "can", "should", "would", "when", "what", "how", "does", "bug",
1792 "fix", "issue", "error", "problem", "description",
1793 ]
1794 .contains(&lower.as_str())
1795 {
1796 continue;
1797 }
1798 if trimmed.contains('_')
1799 || trimmed.contains('.')
1800 || trimmed.chars().any(|c| c.is_uppercase())
1801 || trimmed.ends_with(".py")
1802 {
1803 keywords.push(trimmed);
1804 }
1805 }
1806
1807 keywords.dedup();
1808 keywords.truncate(20);
1809 keywords
1810 }
1811
1812 pub fn has_node(&self, file_path: &str, name: &str) -> bool {
1814 let needle = file_path.strip_prefix("./").unwrap_or(file_path);
1815 self.nodes.iter().any(|n| {
1816 let hay = n.file_path.strip_prefix("./").unwrap_or(&n.file_path);
1817 hay == needle && n.name == name
1818 })
1819 }
1820
1821 pub fn find_node(&self, file_path: &str, name: &str) -> Option<&CodeNode> {
1823 let needle = file_path.strip_prefix("./").unwrap_or(file_path);
1824 self.nodes.iter().find(|n| {
1825 let hay = n.file_path.strip_prefix("./").unwrap_or(&n.file_path);
1826 hay == needle && n.name == name
1827 })
1828 }
1829
1830 pub fn add_file_nodes(
1832 &mut self,
1833 repo_dir: &Path,
1834 file_path: &Path,
1835 target_names: Option<&[String]>,
1836 ) -> anyhow::Result<()> {
1837 use anyhow::Context;
1838
1839 let full_path = repo_dir.join(file_path);
1840 if !full_path.exists() {
1841 anyhow::bail!("File not found: {:?}", full_path);
1842 }
1843
1844 let source = std::fs::read_to_string(&full_path)
1845 .context(format!("Failed to read {:?}", full_path))?;
1846
1847 let mut parser = Parser::new();
1848 let language = tree_sitter_python::LANGUAGE;
1849 parser
1850 .set_language(&language.into())
1851 .context("Failed to set Python language")?;
1852
1853 let tree = parser
1854 .parse(&source, None)
1855 .context("Failed to parse Python file")?;
1856
1857 let file_path_str = file_path.to_string_lossy().to_string();
1858
1859 let root = tree.root_node();
1860
1861 fn extract_from_node(
1862 node: tree_sitter::Node,
1863 source: &str,
1864 file_path: &str,
1865 nodes: &mut Vec<CodeNode>,
1866 target_names: Option<&[String]>,
1867 ) {
1868 if node.kind() == "function_definition" {
1869 if let Some(name_node) = node.child_by_field_name("name") {
1870 let name = &source[name_node.byte_range()];
1871 let matched =
1872 target_names.map_or(true, |targets| targets.iter().any(|t| t == name));
1873 if matched {
1874 let line = name_node.start_position().row + 1;
1875 let id = format!("func:{}:{}", file_path, name);
1876 nodes.push(CodeNode {
1877 id,
1878 kind: NodeKind::Function,
1879 name: name.to_string(),
1880 file_path: file_path.to_string(),
1881 line: Some(line),
1882 decorators: vec![],
1883 signature: None,
1884 docstring: None,
1885 line_count: 0,
1886 is_test: false,
1887 });
1888 }
1889 }
1890 } else if node.kind() == "class_definition" {
1891 if let Some(name_node) = node.child_by_field_name("name") {
1892 let name = &source[name_node.byte_range()];
1893 let matched =
1894 target_names.map_or(true, |targets| targets.iter().any(|t| t == name));
1895 if matched {
1896 let line = name_node.start_position().row + 1;
1897 let id = format!("class:{}:{}", file_path, name);
1898 nodes.push(CodeNode {
1899 id,
1900 kind: NodeKind::Class,
1901 name: name.to_string(),
1902 file_path: file_path.to_string(),
1903 line: Some(line),
1904 decorators: vec![],
1905 signature: None,
1906 docstring: None,
1907 line_count: 0,
1908 is_test: false,
1909 });
1910 }
1911 }
1912 }
1913
1914 for child in node.children(&mut node.walk()) {
1915 extract_from_node(child, source, file_path, nodes, target_names);
1916 }
1917 }
1918
1919 extract_from_node(root, &source, &file_path_str, &mut self.nodes, target_names);
1920 self.build_indexes();
1921
1922 Ok(())
1923 }
1924
1925 pub fn get_schema(&self) -> String {
1927 let node_kinds: HashSet<&str> = self.nodes.iter().map(|n| match n.kind {
1928 NodeKind::File => "File",
1929 NodeKind::Class => "Class",
1930 NodeKind::Function => "Function",
1931 NodeKind::Module => "Module",
1932 }).collect();
1933
1934 let edge_relations: HashSet<&str> = self.edges.iter().map(|e| match e.relation {
1935 EdgeRelation::Imports => "imports",
1936 EdgeRelation::Inherits => "inherits",
1937 EdgeRelation::DefinedIn => "defined_in",
1938 EdgeRelation::Calls => "calls",
1939 EdgeRelation::TestsFor => "tests_for",
1940 EdgeRelation::Overrides => "overrides",
1941 }).collect();
1942
1943 format!(
1944 "Schema:\n Node kinds: {:?}\n Edge relations: {:?}\n Total nodes: {}\n Total edges: {}",
1945 node_kinds,
1946 edge_relations,
1947 self.nodes.len(),
1948 self.edges.len()
1949 )
1950 }
1951
1952 pub fn get_file_summary(&self, file_path: &str) -> String {
1954 let file_nodes: Vec<&CodeNode> = self.nodes.iter()
1955 .filter(|n| n.file_path == file_path)
1956 .collect();
1957
1958 if file_nodes.is_empty() {
1959 return format!("No nodes found for file: {}", file_path);
1960 }
1961
1962 let classes: Vec<&str> = file_nodes.iter()
1963 .filter(|n| n.kind == NodeKind::Class)
1964 .map(|n| n.name.as_str())
1965 .collect();
1966
1967 let functions: Vec<&str> = file_nodes.iter()
1968 .filter(|n| n.kind == NodeKind::Function)
1969 .map(|n| n.name.as_str())
1970 .collect();
1971
1972 format!(
1973 "File: {}\n Classes ({}): {}\n Functions ({}): {}",
1974 file_path,
1975 classes.len(),
1976 classes.join(", "),
1977 functions.len(),
1978 functions.join(", ")
1979 )
1980 }
1981
1982 pub fn analyze_test_failures(
1987 &self,
1988 changed_node_ids: &[&str],
1989 failed_test_names: &[String],
1990 _repo_dir: &Path,
1991 ) -> String {
1992 let mut analysis = String::new();
1993 analysis.push_str("## 🔍 Graph-based Failure Analysis\n\n");
1994
1995 let changed_names: Vec<String> = changed_node_ids.iter()
1997 .filter_map(|id| self.node_by_id(id))
1998 .map(|n| n.name.clone())
1999 .collect();
2000
2001 let changed_files: HashSet<String> = changed_node_ids.iter()
2002 .filter_map(|id| self.node_by_id(id))
2003 .map(|n| n.file_path.clone())
2004 .collect();
2005
2006 for test_name in failed_test_names {
2008 let short_name = test_name.split("::").last().unwrap_or(test_name);
2011
2012 let test_node = self.nodes.iter().find(|n| {
2014 n.name == short_name
2015 || n.name.ends_with(short_name)
2016 || (n.file_path.contains("/test") && n.name == short_name)
2017 });
2018
2019 analysis.push_str(&format!("### ❌ {}\n", short_name));
2020
2021 if let Some(test) = test_node {
2022 let callees = self.get_callees(&test.id);
2024 let mut found_connection = false;
2025
2026 for callee in &callees {
2027 if changed_node_ids.contains(&callee.id.as_str())
2028 || changed_names.contains(&callee.name)
2029 {
2030 analysis.push_str(&format!(
2031 "**Direct call chain:** `{}` → `{}` (YOU CHANGED THIS)\n",
2032 short_name, callee.name
2033 ));
2034 found_connection = true;
2035
2036 let other_callers = self.get_callers(&callee.id);
2038 let other_caller_names: Vec<&str> = other_callers.iter()
2039 .filter(|c| c.id != test.id)
2040 .map(|c| c.name.as_str())
2041 .take(5)
2042 .collect();
2043 if !other_caller_names.is_empty() {
2044 analysis.push_str(&format!(
2045 "**Other callers of `{}`:** {}\n",
2046 callee.name,
2047 other_caller_names.join(", ")
2048 ));
2049 }
2050 }
2051 }
2052
2053 if !found_connection {
2055 for callee in &callees {
2056 let sub_callees = self.get_callees(&callee.id);
2057 for sub in &sub_callees {
2058 if changed_node_ids.contains(&sub.id.as_str())
2059 || changed_names.contains(&sub.name)
2060 {
2061 analysis.push_str(&format!(
2062 "**Indirect chain:** `{}` → `{}` → `{}` (YOU CHANGED THIS)\n",
2063 short_name, callee.name, sub.name
2064 ));
2065 found_connection = true;
2066 break;
2067 }
2068 }
2069 if found_connection { break; }
2070 }
2071 }
2072
2073 if !found_connection {
2075 let test_file = &test.file_path;
2076 let test_file_id = format!("file:{}", test_file);
2077
2078 for edge in self.outgoing_edges(&test_file_id) {
2079 if edge.relation == EdgeRelation::TestsFor {
2080 if let Some(target) = self.node_by_id(&edge.to) {
2081 if changed_files.contains(&target.file_path) {
2082 analysis.push_str(&format!(
2083 "**File-level connection:** test file `{}` tests `{}` which you modified\n",
2084 test_file, target.file_path
2085 ));
2086 found_connection = true;
2087 break;
2088 }
2089 }
2090 }
2091 }
2092 }
2093
2094 if !found_connection {
2095 analysis.push_str("**Connection:** Could not trace via graph (may be indirect import)\n");
2096 }
2097 } else {
2098 analysis.push_str("**Note:** Test not found in code graph\n");
2099 }
2100 analysis.push('\n');
2101 }
2102
2103 if !changed_names.is_empty() {
2105 analysis.push_str("### Summary\n");
2106 analysis.push_str(&format!("**You changed:** {}\n", changed_names.join(", ")));
2107
2108 let total_callers: usize = changed_node_ids.iter()
2109 .map(|id| self.get_callers(id).len())
2110 .sum();
2111 analysis.push_str(&format!(
2112 "**Total callers of changed code:** {}\n",
2113 total_callers
2114 ));
2115 analysis.push_str("**Repair strategy:** Keep the fix but make it backward-compatible with all callers.\n");
2116 }
2117
2118 analysis
2119 }
2120
2121 pub fn find_symptom_nodes(&self, problem_statement: &str, test_names: &str) -> Vec<&CodeNode> {
2127 let mut result: Vec<&CodeNode> = Vec::new();
2128 let mut seen = HashSet::new();
2129
2130 let test_list: Vec<String> = serde_json::from_str(test_names)
2132 .unwrap_or_else(|_| {
2133 test_names.lines()
2134 .map(|s| s.trim().to_string())
2135 .filter(|s| !s.is_empty())
2136 .collect()
2137 });
2138
2139 for test_id in &test_list {
2140 let short_name = if test_id.contains("::") {
2144 test_id.split("::").last().unwrap_or(test_id)
2145 } else if test_id.contains(" (") {
2146 test_id.split(" (").next().unwrap_or(test_id).trim()
2147 } else {
2148 test_id.as_str()
2149 };
2150
2151 for node in &self.nodes {
2153 if node.kind == NodeKind::Function
2154 && (node.name == short_name || node.name.ends_with(short_name))
2155 && (node.file_path.contains("/tests/")
2156 || node.file_path.contains("/test_")
2157 || node.name.starts_with("test_"))
2158 {
2159 if seen.insert(node.id.clone()) {
2160 result.push(node);
2161 }
2162 }
2163 }
2164 }
2165
2166 for line in problem_statement.lines() {
2168 let trimmed = line.trim();
2169
2170 if trimmed.contains(", in ") {
2172 if let Some(func_part) = trimmed.rsplit(", in ").next() {
2173 let func_name = func_part.trim().trim_start_matches('<').trim_end_matches('>');
2174 if func_name.len() >= 3 && func_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
2175 for node in &self.nodes {
2176 if node.name == func_name && node.kind == NodeKind::Function {
2177 if seen.insert(node.id.clone()) {
2178 result.push(node);
2179 }
2180 }
2181 }
2182 }
2183 }
2184 }
2185
2186 for quote in &['\'', '"', '`'] {
2188 let parts: Vec<&str> = trimmed.split(*quote).collect();
2189 for i in (1..parts.len()).step_by(2) {
2190 let word = parts[i].trim();
2191 if word.len() >= 3
2192 && word.len() <= 60
2193 && word.chars().all(|c| c.is_alphanumeric() || c == '_')
2194 {
2195 for node in &self.nodes {
2196 if node.name == word && (node.kind == NodeKind::Function || node.kind == NodeKind::Class) {
2197 if seen.insert(node.id.clone()) {
2198 result.push(node);
2199 }
2200 }
2201 }
2202 }
2203 }
2204 }
2205 }
2206
2207 for word in problem_statement.split(|c: char| c.is_whitespace() || c == ',' || c == '(' || c == ')' || c == '\'' || c == '"' || c == '`') {
2209 let word = word.trim_matches(|c: char| c == '.' || c == ':' || c == ';');
2210 if word.len() < 4 { continue; }
2211 let has_upper = word.chars().filter(|c| c.is_uppercase()).count() >= 2;
2212 let has_lower = word.chars().any(|c| c.is_lowercase());
2213 let is_ident = word.chars().all(|c| c.is_alphanumeric() || c == '_');
2214 if has_upper && has_lower && is_ident {
2215 for node in &self.nodes {
2216 if node.name == word && node.kind == NodeKind::Class {
2217 if seen.insert(node.id.clone()) {
2218 result.push(node);
2219 }
2220 }
2221 }
2222 }
2223 }
2224
2225 if result.is_empty() {
2227 for test_id in &test_list {
2228 let short_name = if test_id.contains("::") {
2229 test_id.split("::").last().unwrap_or(test_id)
2230 } else if test_id.contains(" (") {
2231 test_id.split(" (").next().unwrap_or(test_id).trim()
2232 } else {
2233 test_id.as_str()
2234 };
2235
2236 let kws: Vec<&str> = short_name.split('_')
2238 .filter(|w| w.len() >= 3 && *w != "test" && *w != "tests")
2239 .collect();
2240 if kws.is_empty() { continue; }
2241
2242 for node in &self.nodes {
2244 if node.file_path.contains("/tests/") || node.file_path.contains("/test_") {
2245 continue;
2246 }
2247 let name_lower = node.name.to_lowercase();
2248 let match_count = kws.iter()
2249 .filter(|kw| name_lower.contains(&kw.to_lowercase()))
2250 .count();
2251 if match_count >= 2 || (match_count >= 1 && kws.len() == 1) {
2252 if seen.insert(node.id.clone()) {
2253 result.push(node);
2254 }
2255 }
2256 }
2257
2258 if test_id.contains(" (") {
2261 let class_part = test_id
2262 .split(" (")
2263 .nth(1)
2264 .unwrap_or("")
2265 .trim_end_matches(')');
2266 let class_name = class_part.rsplit('.').next().unwrap_or("");
2267 if !class_name.is_empty() {
2268 for node in &self.nodes {
2269 if node.kind == NodeKind::Class && node.name == class_name {
2270 let file_id = format!("file:{}", node.file_path);
2271 for edge in self.outgoing_edges(&file_id) {
2272 if edge.relation == EdgeRelation::TestsFor {
2273 if let Some(target) = self.node_by_id(&edge.to) {
2274 if target.kind != NodeKind::File {
2275 if seen.insert(target.id.clone()) {
2276 result.push(target);
2277 }
2278 }
2279 }
2280 for src_node in &self.nodes {
2281 if format!("file:{}", src_node.file_path) == edge.to
2282 && src_node.kind != NodeKind::File
2283 {
2284 if seen.insert(src_node.id.clone()) {
2285 result.push(src_node);
2286 }
2287 }
2288 }
2289 }
2290 }
2291 }
2292 }
2293 }
2294 }
2295 }
2296 }
2297
2298 result
2299 }
2300
2301 pub fn build_unified_graph(
2304 &self,
2305 relevant_nodes: &[&CodeNode],
2306 snippets: &HashMap<String, String>,
2307 issue_id: &str,
2308 issue_description: &str,
2309 ) -> UnifiedGraphResult {
2310 let relevant_ids: HashSet<&str> = relevant_nodes.iter()
2311 .map(|n| n.id.as_str())
2312 .collect();
2313
2314 let mut nodes: Vec<UnifiedNode> = Vec::new();
2316 for code_node in relevant_nodes {
2317 let node_id = code_node.name.replace(|c: char| !c.is_alphanumeric() && c != '_', "_");
2318
2319 let (node_type, layer) = match code_node.kind {
2320 NodeKind::File => ("File".to_string(), "infrastructure"),
2321 NodeKind::Class => ("Component".to_string(), "domain"),
2322 NodeKind::Function | NodeKind::Module => ("Component".to_string(), "application"),
2323 };
2324
2325 let snippet = snippets.get(&code_node.id).cloned();
2326
2327 nodes.push(UnifiedNode {
2328 id: node_id,
2329 node_type,
2330 layer: layer.to_string(),
2331 description: format!("{} in {}", code_node.name, code_node.file_path),
2332 path: Some(code_node.file_path.clone()),
2333 line: code_node.line,
2334 code: snippet,
2335 });
2336 }
2337
2338 let mut edges: Vec<UnifiedEdge> = Vec::new();
2340 let mut seen_keys: HashSet<(String, String, String)> = HashSet::new();
2341
2342 for rel_id in &relevant_ids {
2343 for edge in self.outgoing_edges(rel_id) {
2344 if let (Some(from), Some(to)) = (self.node_by_id(&edge.from), self.node_by_id(&edge.to)) {
2345 let from_id = from.name.replace(|c: char| !c.is_alphanumeric() && c != '_', "_");
2346 let to_id = to.name.replace(|c: char| !c.is_alphanumeric() && c != '_', "_");
2347 let rel = edge.relation.to_string();
2348 let key = (from_id.clone(), to_id.clone(), rel.clone());
2349
2350 if nodes.iter().any(|n| n.id == from_id)
2351 && nodes.iter().any(|n| n.id == to_id)
2352 && seen_keys.insert(key)
2353 {
2354 edges.push(UnifiedEdge {
2355 from: from_id,
2356 to: to_id,
2357 relation: rel,
2358 });
2359 }
2360 }
2361 }
2362 }
2363
2364 let description = if issue_description.len() > 100 {
2365 let mut end = 100;
2366 while end > 0 && !issue_description.is_char_boundary(end) { end -= 1; }
2367 format!("{}...", &issue_description[..end])
2368 } else {
2369 issue_description.to_string()
2370 };
2371
2372 UnifiedGraphResult {
2373 issue_id: issue_id.to_string(),
2374 description,
2375 nodes,
2376 edges,
2377 }
2378 }
2379}
2380
2381#[derive(Debug, Clone, Serialize, Deserialize)]
2383pub struct UnifiedGraphResult {
2384 pub issue_id: String,
2385 pub description: String,
2386 pub nodes: Vec<UnifiedNode>,
2387 pub edges: Vec<UnifiedEdge>,
2388}
2389
2390#[derive(Debug, Clone, Serialize, Deserialize)]
2392pub struct UnifiedNode {
2393 pub id: String,
2394 pub node_type: String,
2395 pub layer: String,
2396 pub description: String,
2397 pub path: Option<String>,
2398 pub line: Option<usize>,
2399 pub code: Option<String>,
2400}
2401
2402#[derive(Debug, Clone, Serialize, Deserialize)]
2404pub struct UnifiedEdge {
2405 pub from: String,
2406 pub to: String,
2407 pub relation: String,
2408}
2409
2410fn collect_decorators(node: tree_sitter::Node, source: &[u8]) -> Vec<String> {
2413 let mut decorators = Vec::new();
2414 let mut cursor = node.walk();
2415 for child in node.children(&mut cursor) {
2416 if child.kind() == "decorator" {
2417 let dec_text = child.utf8_text(source).unwrap_or("").trim().to_string();
2418 let name = dec_text.trim_start_matches('@');
2419 let name = name.split('(').next().unwrap_or(name).trim();
2420 if !name.is_empty() {
2421 decorators.push(name.to_string());
2422 }
2423 }
2424 }
2425 decorators
2426}
2427
2428fn extract_docstring(node: tree_sitter::Node, source: &str) -> Option<String> {
2429 let body = node.child_by_field_name("body")?;
2430 let mut cursor = body.walk();
2431 for child in body.children(&mut cursor) {
2432 if child.kind() == "comment" {
2433 continue;
2434 }
2435 if child.kind() == "expression_statement" {
2436 if let Some(str_node) = child.child(0) {
2437 if str_node.kind() == "string" || str_node.kind() == "concatenated_string" {
2438 if str_node.start_byte() < source.len() && str_node.end_byte() <= source.len() {
2439 let doc_text = &source[str_node.start_byte()..str_node.end_byte()];
2440 let doc_clean = doc_text
2441 .trim_start_matches("\"\"\"")
2442 .trim_end_matches("\"\"\"")
2443 .trim_start_matches("'''")
2444 .trim_end_matches("'''")
2445 .trim_start_matches('"')
2446 .trim_end_matches('"')
2447 .trim_start_matches('\'')
2448 .trim_end_matches('\'')
2449 .trim();
2450 let first_line = doc_clean.lines().find(|l| !l.trim().is_empty()).unwrap_or("");
2451 if first_line.is_empty() {
2452 return None;
2453 }
2454 let truncated = if first_line.len() > 100 {
2455 let mut end = 100;
2456 while end > 0 && !first_line.is_char_boundary(end) {
2457 end -= 1;
2458 }
2459 &first_line[..end]
2460 } else {
2461 first_line
2462 };
2463 return Some(truncated.to_string());
2464 }
2465 }
2466 }
2467 }
2468 break;
2469 }
2470 None
2471}
2472
2473fn is_in_error_path(node: &tree_sitter::Node, source: &[u8]) -> bool {
2474 let source_str = std::str::from_utf8(source).unwrap_or("");
2475 let mut current = node.parent();
2476 let mut levels = 0;
2477 while let Some(parent) = current {
2478 levels += 1;
2479 if levels > 10 {
2480 break;
2481 }
2482 match parent.kind() {
2483 "except_clause" | "raise_statement" => return true,
2484 "try_statement" => return true,
2485 "if_statement" => {
2486 if let Some(cond) = parent.child_by_field_name("condition") {
2487 if cond.start_byte() < source_str.len() && cond.end_byte() <= source_str.len() {
2488 let cond_text = &source_str[cond.start_byte()..cond.end_byte()];
2489 let lower = cond_text.to_lowercase();
2490 if lower.contains("error")
2491 || lower.contains("exception")
2492 || lower.contains("err")
2493 || lower.contains("fail")
2494 || lower.contains("none")
2495 {
2496 return true;
2497 }
2498 }
2499 }
2500 }
2501 _ => {}
2502 }
2503 current = parent.parent();
2504 }
2505 false
2506}
2507
2508fn extract_python_tree_sitter(
2510 path: &str,
2511 content: &str,
2512 parser: &mut Parser,
2513 class_id_map: &mut HashMap<String, String>,
2514) -> (Vec<CodeNode>, Vec<CodeEdge>, HashSet<String>) {
2515 let mut nodes = Vec::new();
2516 let mut edges = Vec::new();
2517 let mut imports = HashSet::new();
2518
2519 let tree = match parser.parse(content, None) {
2520 Some(t) => t,
2521 None => return (nodes, edges, imports),
2522 };
2523
2524 let file_id = format!("file:{}", path);
2525 let source = content.as_bytes();
2526 let root = tree.root_node();
2527
2528 let text = |node: tree_sitter::Node| -> String {
2529 node.utf8_text(source).unwrap_or("").to_string()
2530 };
2531
2532 let mut cursor = root.walk();
2533 for child in root.children(&mut cursor) {
2534 match child.kind() {
2535 "class_definition" => {
2536 extract_class_node(
2537 child,
2538 source,
2539 content,
2540 path,
2541 &file_id,
2542 &[],
2543 &mut nodes,
2544 &mut edges,
2545 class_id_map,
2546 );
2547 }
2548 "function_definition" => {
2549 extract_function_node(child, source, content, path, &file_id, &[], &mut nodes, &mut edges);
2550 }
2551 "decorated_definition" => {
2552 let decorators = collect_decorators(child, source);
2553 let mut inner_cursor = child.walk();
2554 for inner in child.children(&mut inner_cursor) {
2555 match inner.kind() {
2556 "class_definition" => {
2557 extract_class_node(
2558 inner,
2559 source,
2560 content,
2561 path,
2562 &file_id,
2563 &decorators,
2564 &mut nodes,
2565 &mut edges,
2566 class_id_map,
2567 );
2568 }
2569 "function_definition" => {
2570 extract_function_node(
2571 inner, source, content, path, &file_id, &decorators, &mut nodes, &mut edges,
2572 );
2573 }
2574 _ => {}
2575 }
2576 }
2577 }
2578 "import_statement" => {
2579 let import_text = text(child);
2580 let re_import = Regex::new(r"import\s+([\w.]+)").unwrap();
2581 if let Some(cap) = re_import.captures(&import_text) {
2582 let module = cap[1].to_string();
2583 if !is_stdlib(&module) {
2584 edges.push(CodeEdge {
2585 from: file_id.clone(),
2586 to: format!("module_ref:{}", module),
2587 relation: EdgeRelation::Imports,
2588 weight: 0.5,
2589 call_count: 1,
2590 in_error_path: false,
2591 confidence: 1.0,
2592 });
2593 }
2594 }
2595 }
2596 "import_from_statement" => {
2597 let mut mod_cursor = child.walk();
2598 for mod_child in child.children(&mut mod_cursor) {
2599 if mod_child.kind() == "dotted_name" {
2600 let module = text(mod_child);
2601 if !is_stdlib(&module) {
2602 edges.push(CodeEdge {
2603 from: file_id.clone(),
2604 to: format!("module_ref:{}", module),
2605 relation: EdgeRelation::Imports,
2606 weight: 0.5,
2607 call_count: 1,
2608 in_error_path: false,
2609 confidence: 1.0,
2610 });
2611 }
2612 break;
2613 }
2614 if mod_child.kind() == "relative_import" {
2615 let rel_import_text = text(mod_child);
2616 let trimmed = rel_import_text.trim_start_matches('.');
2617 if !trimmed.is_empty() && !is_stdlib(trimmed) {
2618 edges.push(CodeEdge {
2619 from: file_id.clone(),
2620 to: format!("module_ref:{}", trimmed),
2621 relation: EdgeRelation::Imports,
2622 weight: 0.5,
2623 call_count: 1,
2624 in_error_path: false,
2625 confidence: 1.0,
2626 });
2627 }
2628 break;
2629 }
2630 }
2631
2632 let import_text = child.utf8_text(source).unwrap_or("");
2634 if let Some(after_import) = import_text.split(" import ").nth(1) {
2635 for name in after_import.split(',') {
2636 let clean = name.trim().split(" as ").next().unwrap_or("").trim();
2637 if !clean.is_empty() && clean != "*" && clean != "(" && clean != ")" {
2638 imports.insert(clean.to_string());
2639 }
2640 }
2641 }
2642 }
2643 _ => {}
2644 }
2645 }
2646
2647 (nodes, edges, imports)
2648}
2649
2650fn extract_class_node(
2651 node: tree_sitter::Node,
2652 source: &[u8],
2653 source_str: &str,
2654 path: &str,
2655 file_id: &str,
2656 decorators: &[String],
2657 nodes: &mut Vec<CodeNode>,
2658 edges: &mut Vec<CodeEdge>,
2659 class_id_map: &mut HashMap<String, String>,
2660) {
2661 let class_name = node
2662 .child_by_field_name("name")
2663 .and_then(|n| n.utf8_text(source).ok())
2664 .unwrap_or("")
2665 .to_string();
2666
2667 if class_name.is_empty() {
2668 return;
2669 }
2670
2671 let line_num = node.start_position().row + 1;
2672 let class_id = format!("class:{}:{}", path, class_name);
2673
2674 let class_sig = {
2675 let sig_text = &source_str[node.start_byte()..];
2676 let sig_end = sig_text
2677 .find(":\n")
2678 .or_else(|| sig_text.find(":\r"))
2679 .unwrap_or(sig_text.len().min(200));
2680 Some(sig_text[..sig_end].trim().to_string())
2681 };
2682
2683 let class_docstring = extract_docstring(node, source_str);
2684 let class_line_count = node.end_position().row - node.start_position().row + 1;
2685 let class_is_test =
2686 path.contains("/tests/") || path.contains("/test_") || class_name.starts_with("Test");
2687
2688 nodes.push(CodeNode {
2689 id: class_id.clone(),
2690 kind: NodeKind::Class,
2691 name: class_name.clone(),
2692 file_path: path.to_string(),
2693 line: Some(line_num),
2694 decorators: decorators.to_vec(),
2695 signature: class_sig,
2696 docstring: class_docstring,
2697 line_count: class_line_count,
2698 is_test: class_is_test,
2699 });
2700
2701 edges.push(CodeEdge {
2702 from: class_id.clone(),
2703 to: file_id.to_string(),
2704 relation: EdgeRelation::DefinedIn,
2705 weight: 0.5,
2706 call_count: 1,
2707 in_error_path: false,
2708 confidence: 1.0,
2709 });
2710
2711 class_id_map.insert(class_name.clone(), class_id.clone());
2712
2713 if let Some(superclasses) = node.child_by_field_name("superclasses") {
2715 let mut sc_cursor = superclasses.walk();
2716 for sc_child in superclasses.children(&mut sc_cursor) {
2717 let kind = sc_child.kind();
2718 if kind == "identifier" || kind == "attribute" {
2719 let parent_text = sc_child.utf8_text(source).unwrap_or("");
2720 let parent_name = parent_text.split('.').last().unwrap_or("").trim();
2721 if !parent_name.is_empty() && parent_name != "object" {
2722 edges.push(CodeEdge {
2723 from: class_id.clone(),
2724 to: format!("class_ref:{}", parent_name),
2725 relation: EdgeRelation::Inherits,
2726 weight: 0.5,
2727 call_count: 1,
2728 in_error_path: false,
2729 confidence: 1.0,
2730 });
2731 }
2732 }
2733 }
2734 }
2735
2736 if let Some(body) = node.child_by_field_name("body") {
2738 let mut body_cursor = body.walk();
2739 for body_child in body.children(&mut body_cursor) {
2740 match body_child.kind() {
2741 "function_definition" => {
2742 extract_method_node(body_child, source, source_str, path, &class_id, &[], nodes, edges);
2743 }
2744 "decorated_definition" => {
2745 let method_decorators = collect_decorators(body_child, source);
2746 let mut inner_cursor = body_child.walk();
2747 for inner in body_child.children(&mut inner_cursor) {
2748 if inner.kind() == "function_definition" {
2749 extract_method_node(
2750 inner,
2751 source,
2752 source_str,
2753 path,
2754 &class_id,
2755 &method_decorators,
2756 nodes,
2757 edges,
2758 );
2759 }
2760 }
2761 }
2762 _ => {}
2763 }
2764 }
2765 }
2766}
2767
2768fn extract_method_node(
2769 node: tree_sitter::Node,
2770 source: &[u8],
2771 source_str: &str,
2772 path: &str,
2773 class_id: &str,
2774 decorators: &[String],
2775 nodes: &mut Vec<CodeNode>,
2776 edges: &mut Vec<CodeEdge>,
2777) {
2778 let func_name = node
2779 .child_by_field_name("name")
2780 .and_then(|n| n.utf8_text(source).ok())
2781 .unwrap_or("")
2782 .to_string();
2783
2784 if func_name.is_empty() {
2785 return;
2786 }
2787
2788 let line_num = node.start_position().row + 1;
2789 let parent_name = class_id.rsplit(':').next().unwrap_or("");
2791 let method_id = if parent_name.is_empty() {
2792 format!("method:{}:{}", path, func_name)
2793 } else {
2794 format!("method:{}:{}.{}", path, parent_name, func_name)
2795 };
2796
2797 let signature = {
2798 let sig_text = &source_str[node.start_byte()..];
2799 let sig_end = sig_text
2800 .find(":\n")
2801 .or_else(|| sig_text.find(":\r"))
2802 .unwrap_or(sig_text.len().min(200));
2803 Some(sig_text[..sig_end].trim().to_string())
2804 };
2805 let docstring = extract_docstring(node, source_str);
2806 let line_count = node.end_position().row - node.start_position().row + 1;
2807 let is_test = path.contains("/tests/")
2808 || path.contains("/test_")
2809 || func_name.starts_with("test_")
2810 || func_name.starts_with("Test");
2811
2812 nodes.push(CodeNode {
2813 id: method_id.clone(),
2814 kind: NodeKind::Function,
2815 name: func_name,
2816 file_path: path.to_string(),
2817 line: Some(line_num),
2818 decorators: decorators.to_vec(),
2819 signature,
2820 docstring,
2821 line_count,
2822 is_test,
2823 });
2824
2825 edges.push(CodeEdge {
2826 from: method_id,
2827 to: class_id.to_string(),
2828 relation: EdgeRelation::DefinedIn,
2829 weight: 0.5,
2830 call_count: 1,
2831 in_error_path: false,
2832 confidence: 1.0,
2833 });
2834}
2835
2836fn extract_function_node(
2837 node: tree_sitter::Node,
2838 source: &[u8],
2839 source_str: &str,
2840 path: &str,
2841 file_id: &str,
2842 decorators: &[String],
2843 nodes: &mut Vec<CodeNode>,
2844 edges: &mut Vec<CodeEdge>,
2845) {
2846 let func_name = node
2847 .child_by_field_name("name")
2848 .and_then(|n| n.utf8_text(source).ok())
2849 .unwrap_or("")
2850 .to_string();
2851
2852 if func_name.is_empty() {
2853 return;
2854 }
2855
2856 let line_num = node.start_position().row + 1;
2857 let func_id = format!("func:{}:{}", path, func_name);
2858
2859 let signature = {
2860 let sig_text = &source_str[node.start_byte()..];
2861 let sig_end = sig_text
2862 .find(":\n")
2863 .or_else(|| sig_text.find(":\r"))
2864 .unwrap_or(sig_text.len().min(200));
2865 Some(sig_text[..sig_end].trim().to_string())
2866 };
2867 let docstring = extract_docstring(node, source_str);
2868 let line_count = node.end_position().row - node.start_position().row + 1;
2869 let is_test = path.contains("/tests/")
2870 || path.contains("/test_")
2871 || func_name.starts_with("test_")
2872 || func_name.starts_with("Test");
2873
2874 nodes.push(CodeNode {
2875 id: func_id.clone(),
2876 kind: NodeKind::Function,
2877 name: func_name,
2878 file_path: path.to_string(),
2879 line: Some(line_num),
2880 decorators: decorators.to_vec(),
2881 signature,
2882 docstring,
2883 line_count,
2884 is_test,
2885 });
2886
2887 edges.push(CodeEdge {
2888 from: func_id,
2889 to: file_id.to_string(),
2890 relation: EdgeRelation::DefinedIn,
2891 weight: 0.5,
2892 call_count: 1,
2893 in_error_path: false,
2894 confidence: 1.0,
2895 });
2896}
2897
2898fn extract_calls_from_tree(
2900 root: tree_sitter::Node,
2901 source: &[u8],
2902 rel_path: &str,
2903 func_name_map: &HashMap<String, Vec<String>>,
2904 method_to_class: &HashMap<String, String>,
2905 class_parents: &HashMap<String, Vec<String>>,
2906 file_func_ids: &HashSet<String>,
2907 file_imported_names: &HashMap<String, HashSet<String>>,
2908 package_dir: &str,
2909 class_init_map: &HashMap<String, Vec<(String, String)>>,
2910 node_pkg_map: &HashMap<String, String>,
2911 edges: &mut Vec<CodeEdge>,
2912) {
2913 let mut scope_map: Vec<(usize, usize, String, Option<String>)> = Vec::new();
2915 build_scope_map(root, source, rel_path, &mut scope_map);
2916
2917 let mut stack = vec![root];
2919 while let Some(node) = stack.pop() {
2920 if node.kind() == "string"
2921 || node.kind() == "comment"
2922 || node.kind() == "string_content"
2923 || node.kind() == "concatenated_string"
2924 {
2925 continue;
2926 }
2927
2928 if node.kind() == "call" {
2929 let call_line = node.start_position().row + 1;
2930 let error_path = is_in_error_path(&node, source);
2931
2932 let scope = scope_map
2933 .iter()
2934 .filter(|(start, end, _, _)| call_line >= *start && call_line <= *end)
2935 .max_by_key(|(start, _, _, _)| *start);
2936
2937 if let Some((_start, _end, caller_id, caller_class)) = scope {
2938 if let Some(function_node) = node.child_by_field_name("function") {
2939 let edges_before = edges.len();
2940 match function_node.kind() {
2941 "identifier" => {
2942 let callee_name = function_node.utf8_text(source).unwrap_or("");
2943 if !callee_name.is_empty() && !is_python_builtin(callee_name) {
2944 resolve_and_add_call_edge(
2945 caller_id,
2946 callee_name,
2947 func_name_map,
2948 file_func_ids,
2949 file_imported_names,
2950 rel_path,
2951 package_dir,
2952 class_init_map,
2953 node_pkg_map,
2954 false,
2955 edges,
2956 );
2957 }
2958 }
2959 "attribute" => {
2960 let obj_node = function_node.child_by_field_name("object");
2961 let attr_node = function_node.child_by_field_name("attribute");
2962
2963 if let (Some(obj), Some(attr)) = (obj_node, attr_node) {
2964 let obj_text = obj.utf8_text(source).unwrap_or("");
2965 let method_name = attr.utf8_text(source).unwrap_or("");
2966
2967 if (obj_text == "self" || obj_text == "cls") && !method_name.is_empty() {
2968 resolve_self_method_call(
2969 caller_id,
2970 method_name,
2971 caller_class.as_deref(),
2972 func_name_map,
2973 method_to_class,
2974 class_parents,
2975 file_func_ids,
2976 edges,
2977 );
2978 } else if !method_name.is_empty() && !is_python_builtin(method_name) {
2979 resolve_and_add_call_edge(
2980 caller_id,
2981 method_name,
2982 func_name_map,
2983 file_func_ids,
2984 file_imported_names,
2985 rel_path,
2986 package_dir,
2987 class_init_map,
2988 node_pkg_map,
2989 true,
2990 edges,
2991 );
2992 }
2993 }
2994 }
2995 _ => {}
2996 }
2997 if error_path {
2998 for edge in edges[edges_before..].iter_mut() {
2999 edge.in_error_path = true;
3000 }
3001 }
3002 }
3003 }
3004 }
3005
3006 let child_count = node.child_count();
3007 for i in (0..child_count).rev() {
3008 if let Some(child) = node.child(i) {
3009 stack.push(child);
3010 }
3011 }
3012 }
3013}
3014
3015fn build_scope_map(
3016 node: tree_sitter::Node,
3017 source: &[u8],
3018 rel_path: &str,
3019 scope_map: &mut Vec<(usize, usize, String, Option<String>)>,
3020) {
3021 let mut stack: Vec<(tree_sitter::Node, Option<String>)> = vec![(node, None)];
3022
3023 while let Some((current, class_ctx)) = stack.pop() {
3024 match current.kind() {
3025 "class_definition" => {
3026 let class_name = current
3027 .child_by_field_name("name")
3028 .and_then(|n| n.utf8_text(source).ok())
3029 .unwrap_or("");
3030 let class_id = if !class_name.is_empty() {
3031 Some(format!("class:{}:{}", rel_path, class_name))
3032 } else {
3033 class_ctx.clone()
3034 };
3035
3036 let child_count = current.child_count();
3037 for i in (0..child_count).rev() {
3038 if let Some(child) = current.child(i) {
3039 stack.push((child, class_id.clone()));
3040 }
3041 }
3042 }
3043 "function_definition" => {
3044 let func_name = current
3045 .child_by_field_name("name")
3046 .and_then(|n| n.utf8_text(source).ok())
3047 .unwrap_or("");
3048
3049 if !func_name.is_empty() {
3050 let start_line = current.start_position().row + 1;
3051 let end_line = current.end_position().row + 1;
3052
3053 let func_id = if let Some(ref cls) = class_ctx {
3054 let cls_name = cls.rsplit(':').next().unwrap_or("");
3055 if cls_name.is_empty() {
3056 format!("method:{}:{}", rel_path, func_name)
3057 } else {
3058 format!("method:{}:{}.{}", rel_path, cls_name, func_name)
3059 }
3060 } else {
3061 format!("func:{}:{}", rel_path, func_name)
3062 };
3063
3064 scope_map.push((start_line, end_line, func_id, class_ctx.clone()));
3065 }
3066
3067 let child_count = current.child_count();
3068 for i in (0..child_count).rev() {
3069 if let Some(child) = current.child(i) {
3070 stack.push((child, class_ctx.clone()));
3071 }
3072 }
3073 }
3074 "decorated_definition" => {
3075 let child_count = current.child_count();
3076 for i in (0..child_count).rev() {
3077 if let Some(child) = current.child(i) {
3078 stack.push((child, class_ctx.clone()));
3079 }
3080 }
3081 }
3082 _ => {
3083 let child_count = current.child_count();
3084 for i in (0..child_count).rev() {
3085 if let Some(child) = current.child(i) {
3086 stack.push((child, class_ctx.clone()));
3087 }
3088 }
3089 }
3090 }
3091 }
3092}
3093
3094fn is_common_dunder(name: &str) -> bool {
3095 matches!(
3096 name,
3097 "__init__"
3098 | "__str__"
3099 | "__repr__"
3100 | "__eq__"
3101 | "__ne__"
3102 | "__hash__"
3103 | "__len__"
3104 | "__iter__"
3105 | "__next__"
3106 | "__getitem__"
3107 | "__setitem__"
3108 | "__delitem__"
3109 | "__contains__"
3110 | "__call__"
3111 | "__enter__"
3112 | "__exit__"
3113 | "__get__"
3114 | "__set__"
3115 | "__delete__"
3116 | "__getattr__"
3117 | "__setattr__"
3118 | "__bool__"
3119 | "__lt__"
3120 | "__le__"
3121 | "__gt__"
3122 | "__ge__"
3123 | "__add__"
3124 | "__sub__"
3125 | "__mul__"
3126 | "__new__"
3127 | "__del__"
3128 | "__format__"
3129 | "get"
3130 | "set"
3131 | "update"
3132 | "delete"
3133 | "save"
3134 | "clean"
3135 | "run"
3136 | "setup"
3137 | "teardown"
3138 )
3139}
3140
3141fn resolve_and_add_call_edge(
3142 caller_id: &str,
3143 callee_name: &str,
3144 func_name_map: &HashMap<String, Vec<String>>,
3145 file_func_ids: &HashSet<String>,
3146 file_imported_names: &HashMap<String, HashSet<String>>,
3147 rel_path: &str,
3148 package_dir: &str,
3149 class_init_map: &HashMap<String, Vec<(String, String)>>,
3150 node_pkg_map: &HashMap<String, String>,
3151 is_attribute_call: bool,
3152 edges: &mut Vec<CodeEdge>,
3153) {
3154 if let Some(callee_ids) = func_name_map.get(callee_name) {
3155 let same_file: Vec<&String> = callee_ids
3156 .iter()
3157 .filter(|id| file_func_ids.contains(*id))
3158 .collect();
3159 let imported: Vec<&String> = callee_ids
3160 .iter()
3161 .filter(|_id| {
3162 file_imported_names
3163 .get(rel_path)
3164 .map(|names| names.contains(callee_name))
3165 .unwrap_or(false)
3166 })
3167 .collect();
3168 let same_pkg: Vec<&String> = callee_ids
3169 .iter()
3170 .filter(|id| {
3171 node_pkg_map
3172 .get(id.as_str())
3173 .map(|pkg| pkg == package_dir)
3174 .unwrap_or(false)
3175 })
3176 .collect();
3177
3178 let global_limit = if is_attribute_call && !is_common_dunder(callee_name) {
3179 20
3180 } else {
3181 3
3182 };
3183
3184 let confidence = if !same_file.is_empty() {
3185 0.8_f32
3186 } else if !imported.is_empty() {
3187 0.8
3188 } else if !same_pkg.is_empty() {
3189 0.7
3190 } else if is_attribute_call {
3191 0.3
3192 } else {
3193 0.5
3194 };
3195
3196 let weight = if !same_file.is_empty() || !imported.is_empty() || !same_pkg.is_empty() {
3197 0.5
3198 } else if is_attribute_call {
3199 0.8
3200 } else {
3201 0.5
3202 };
3203
3204 let targets = if !same_file.is_empty() {
3205 same_file
3206 } else if !imported.is_empty() {
3207 imported
3208 } else if !same_pkg.is_empty() {
3209 same_pkg
3210 } else if callee_ids.len() <= global_limit {
3211 callee_ids.iter().collect()
3212 } else {
3213 vec![]
3214 };
3215
3216 for callee_id in targets {
3217 if callee_id != caller_id {
3218 edges.push(CodeEdge {
3219 from: caller_id.to_string(),
3220 to: callee_id.clone(),
3221 relation: EdgeRelation::Calls,
3222 weight,
3223 call_count: 1,
3224 in_error_path: false,
3225 confidence,
3226 });
3227 }
3228 }
3229 } else if callee_name
3230 .chars()
3231 .next()
3232 .map(|c| c.is_uppercase())
3233 .unwrap_or(false)
3234 {
3235 if let Some(init_entries) = class_init_map.get(callee_name) {
3237 let same_file: Vec<&str> = init_entries
3238 .iter()
3239 .filter(|(fp, _)| fp == rel_path)
3240 .map(|(_, id)| id.as_str())
3241 .collect();
3242 let is_imported = file_imported_names
3243 .get(rel_path)
3244 .map(|names| names.contains(callee_name))
3245 .unwrap_or(false);
3246 let imported: Vec<&str> = if is_imported {
3247 init_entries.iter().map(|(_, id)| id.as_str()).collect()
3248 } else {
3249 vec![]
3250 };
3251 let same_pkg: Vec<&str> = init_entries
3252 .iter()
3253 .filter(|(fp, _)| fp.rsplitn(2, '/').nth(1).unwrap_or("") == package_dir)
3254 .map(|(_, id)| id.as_str())
3255 .collect();
3256
3257 let (targets, confidence): (Vec<&str>, f32) = if !same_file.is_empty() {
3258 (same_file, 0.8)
3259 } else if !imported.is_empty() {
3260 (imported, 0.7)
3261 } else if !same_pkg.is_empty() {
3262 (same_pkg, 0.6)
3263 } else if init_entries.len() <= 3 {
3264 (init_entries.iter().map(|(_, id)| id.as_str()).collect(), 0.5)
3265 } else {
3266 (vec![], 0.0)
3267 };
3268
3269 for init_id in targets {
3270 if init_id != caller_id {
3271 edges.push(CodeEdge {
3272 from: caller_id.to_string(),
3273 to: init_id.to_string(),
3274 relation: EdgeRelation::Calls,
3275 weight: 0.5,
3276 call_count: 1,
3277 in_error_path: false,
3278 confidence,
3279 });
3280 }
3281 }
3282 }
3283 }
3284}
3285
3286fn resolve_self_method_call(
3287 caller_id: &str,
3288 method_name: &str,
3289 caller_class: Option<&str>,
3290 func_name_map: &HashMap<String, Vec<String>>,
3291 method_to_class: &HashMap<String, String>,
3292 class_parents: &HashMap<String, Vec<String>>,
3293 file_func_ids: &HashSet<String>,
3294 edges: &mut Vec<CodeEdge>,
3295) {
3296 if let Some(callee_ids) = func_name_map.get(method_name) {
3297 if let Some(class_id) = caller_class {
3298 let mut valid_classes = vec![class_id.to_string()];
3299 if let Some(parents) = class_parents.get(class_id) {
3300 valid_classes.extend(parents.iter().cloned());
3301 }
3302
3303 let scoped: Vec<&String> = callee_ids
3304 .iter()
3305 .filter(|id| {
3306 method_to_class
3307 .get(*id)
3308 .map(|cls| valid_classes.contains(cls))
3309 .unwrap_or(false)
3310 })
3311 .collect();
3312
3313 let targets = if !scoped.is_empty() {
3314 scoped
3315 } else if callee_ids.len() <= 3 {
3316 callee_ids.iter().collect()
3317 } else {
3318 callee_ids
3319 .iter()
3320 .filter(|id| file_func_ids.contains(*id))
3321 .collect()
3322 };
3323
3324 for callee_id in targets {
3325 if callee_id != caller_id {
3326 edges.push(CodeEdge {
3327 from: caller_id.to_string(),
3328 to: callee_id.clone(),
3329 relation: EdgeRelation::Calls,
3330 weight: 0.5,
3331 call_count: 1,
3332 in_error_path: false,
3333 confidence: 0.9,
3334 });
3335 }
3336 }
3337 } else {
3338 for callee_id in callee_ids {
3339 if callee_id != caller_id && file_func_ids.contains(callee_id) {
3340 edges.push(CodeEdge {
3341 from: caller_id.to_string(),
3342 to: callee_id.clone(),
3343 relation: EdgeRelation::Calls,
3344 weight: 0.5,
3345 call_count: 1,
3346 in_error_path: false,
3347 confidence: 0.6,
3348 });
3349 }
3350 }
3351 }
3352 }
3353}
3354
3355fn add_override_edges(nodes: &[CodeNode], edges: &mut Vec<CodeEdge>) {
3356 let mut class_methods: HashMap<String, Vec<(String, String)>> = HashMap::new();
3357 for edge in edges.iter() {
3358 if edge.relation == EdgeRelation::DefinedIn && edge.to.starts_with("class:") {
3359 if let Some(method) = nodes.iter().find(|n| n.id == edge.from && n.kind == NodeKind::Function) {
3360 class_methods
3361 .entry(edge.to.clone())
3362 .or_default()
3363 .push((method.name.clone(), method.id.clone()));
3364 }
3365 }
3366 }
3367
3368 let inherits_pairs: Vec<(String, String)> = edges
3369 .iter()
3370 .filter(|e| e.relation == EdgeRelation::Inherits)
3371 .map(|e| (e.from.clone(), e.to.clone()))
3372 .collect();
3373
3374 let mut new_edges = Vec::new();
3375 for (sub_class_id, base_class_id) in &inherits_pairs {
3376 let sub_methods = match class_methods.get(sub_class_id) {
3377 Some(m) => m,
3378 None => continue,
3379 };
3380 let base_methods = match class_methods.get(base_class_id) {
3381 Some(m) => m,
3382 None => continue,
3383 };
3384
3385 for (sub_name, sub_id) in sub_methods {
3386 for (base_name, base_id) in base_methods {
3387 if sub_name == base_name && sub_id != base_id {
3388 new_edges.push(CodeEdge {
3389 from: base_id.clone(),
3390 to: sub_id.clone(),
3391 relation: EdgeRelation::Overrides,
3392 weight: 0.4,
3393 call_count: 1,
3394 in_error_path: false,
3395 confidence: 0.6,
3396 });
3397 }
3398 }
3399 }
3400 }
3401
3402 edges.extend(new_edges);
3403}
3404
3405fn extract_rust_tree_sitter(
3412 path: &str,
3413 content: &str,
3414 parser: &mut Parser,
3415 class_id_map: &mut HashMap<String, String>,
3416) -> (Vec<CodeNode>, Vec<CodeEdge>, HashSet<String>) {
3417 let mut nodes = Vec::new();
3418 let mut edges = Vec::new();
3419 let mut imports = HashSet::new();
3420
3421 if parser.set_language(&tree_sitter_rust::LANGUAGE.into()).is_err() {
3423 return (nodes, edges, imports);
3424 }
3425
3426 let tree = match parser.parse(content, None) {
3427 Some(t) => t,
3428 None => return (nodes, edges, imports),
3429 };
3430
3431 let file_id = format!("file:{}", path);
3432 let source = content.as_bytes();
3433 let root = tree.root_node();
3434
3435 let mut impl_target_map: HashMap<String, String> = HashMap::new();
3437
3438 let mut cursor = root.walk();
3439 for child in root.children(&mut cursor) {
3440 extract_rust_node(
3441 child,
3442 source,
3443 content,
3444 path,
3445 &file_id,
3446 &mut nodes,
3447 &mut edges,
3448 class_id_map,
3449 &mut impl_target_map,
3450 &mut imports,
3451 "", );
3453 }
3454
3455 (nodes, edges, imports)
3456}
3457
3458fn extract_rust_node(
3460 node: tree_sitter::Node,
3461 source: &[u8],
3462 source_str: &str,
3463 path: &str,
3464 file_id: &str,
3465 nodes: &mut Vec<CodeNode>,
3466 edges: &mut Vec<CodeEdge>,
3467 class_id_map: &mut HashMap<String, String>,
3468 impl_target_map: &mut HashMap<String, String>,
3469 imports: &mut HashSet<String>,
3470 module_prefix: &str,
3471) {
3472 let text = |n: tree_sitter::Node| -> String {
3473 n.utf8_text(source).unwrap_or("").to_string()
3474 };
3475
3476 match node.kind() {
3477 "use_declaration" => {
3478 let use_text = text(node);
3480 if let Some(path_part) = use_text.strip_prefix("use ") {
3482 let clean_path = path_part.trim_end_matches(';').trim();
3483 if !clean_path.starts_with("std::") && !clean_path.starts_with("core::") && !clean_path.starts_with("alloc::") {
3485 let module = if clean_path.contains('{') {
3487 clean_path.split("::").next().unwrap_or(clean_path).to_string()
3488 } else {
3489 clean_path.split("::").take(2).collect::<Vec<_>>().join("::")
3490 };
3491 if !module.is_empty() {
3492 edges.push(CodeEdge {
3493 from: file_id.to_string(),
3494 to: format!("module_ref:{}", module),
3495 relation: EdgeRelation::Imports,
3496 weight: 0.5,
3497 call_count: 1,
3498 in_error_path: false,
3499 confidence: 1.0,
3500 });
3501 imports.insert(module);
3502 }
3503 }
3504 }
3505 }
3506
3507 "struct_item" => {
3508 let name = node.child_by_field_name("name")
3509 .and_then(|n| n.utf8_text(source).ok())
3510 .unwrap_or("")
3511 .to_string();
3512 if name.is_empty() { return; }
3513
3514 let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
3515 let line = node.start_position().row + 1;
3516 let class_id = format!("class:{}:{}", path, full_name);
3517
3518 let signature = extract_rust_signature(node, source_str);
3519 let docstring = extract_rust_docstring(node, source_str);
3520 let line_count = node.end_position().row - node.start_position().row + 1;
3521
3522 nodes.push(CodeNode {
3523 id: class_id.clone(),
3524 kind: NodeKind::Class,
3525 name: full_name.clone(),
3526 file_path: path.to_string(),
3527 line: Some(line),
3528 decorators: extract_rust_attributes(node, source),
3529 signature,
3530 docstring,
3531 line_count,
3532 is_test: path.contains("/tests/") || full_name.contains("Test"),
3533 });
3534
3535 edges.push(CodeEdge::defined_in(&class_id, file_id));
3536 class_id_map.insert(name.clone(), class_id);
3537 }
3538
3539 "enum_item" => {
3540 let name = node.child_by_field_name("name")
3541 .and_then(|n| n.utf8_text(source).ok())
3542 .unwrap_or("")
3543 .to_string();
3544 if name.is_empty() { return; }
3545
3546 let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
3547 let line = node.start_position().row + 1;
3548 let class_id = format!("class:{}:{}", path, full_name);
3549
3550 let signature = extract_rust_signature(node, source_str);
3551 let docstring = extract_rust_docstring(node, source_str);
3552 let line_count = node.end_position().row - node.start_position().row + 1;
3553
3554 nodes.push(CodeNode {
3555 id: class_id.clone(),
3556 kind: NodeKind::Class,
3557 name: full_name.clone(),
3558 file_path: path.to_string(),
3559 line: Some(line),
3560 decorators: extract_rust_attributes(node, source),
3561 signature,
3562 docstring,
3563 line_count,
3564 is_test: path.contains("/tests/") || full_name.contains("Test"),
3565 });
3566
3567 edges.push(CodeEdge::defined_in(&class_id, file_id));
3568 class_id_map.insert(name.clone(), class_id);
3569 }
3570
3571 "trait_item" => {
3572 let name = node.child_by_field_name("name")
3573 .and_then(|n| n.utf8_text(source).ok())
3574 .unwrap_or("")
3575 .to_string();
3576 if name.is_empty() { return; }
3577
3578 let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
3579 let line = node.start_position().row + 1;
3580 let trait_id = format!("class:{}:{}", path, full_name);
3581
3582 let signature = extract_rust_signature(node, source_str);
3583 let docstring = extract_rust_docstring(node, source_str);
3584 let line_count = node.end_position().row - node.start_position().row + 1;
3585
3586 nodes.push(CodeNode {
3587 id: trait_id.clone(),
3588 kind: NodeKind::Class,
3589 name: full_name.clone(),
3590 file_path: path.to_string(),
3591 line: Some(line),
3592 decorators: extract_rust_attributes(node, source),
3593 signature,
3594 docstring,
3595 line_count,
3596 is_test: path.contains("/tests/") || full_name.contains("Test"),
3597 });
3598
3599 edges.push(CodeEdge::defined_in(&trait_id, file_id));
3600 class_id_map.insert(name.clone(), trait_id.clone());
3601
3602 if let Some(body) = node.child_by_field_name("body") {
3604 let mut body_cursor = body.walk();
3605 for body_child in body.children(&mut body_cursor) {
3606 if body_child.kind() == "function_item" || body_child.kind() == "function_signature_item" {
3607 extract_rust_method(body_child, source, source_str, path, &trait_id, nodes, edges);
3608 }
3609 }
3610 }
3611 }
3612
3613 "impl_item" => {
3614 let mut trait_name: Option<String> = None;
3616 let mut type_name: Option<String> = None;
3617
3618 let mut cursor = node.walk();
3620 for child in node.children(&mut cursor) {
3621 match child.kind() {
3622 "type_identifier" | "generic_type" | "primitive_type" | "scoped_type_identifier" => {
3623 let name = if child.kind() == "generic_type" {
3625 child.child_by_field_name("type")
3627 .and_then(|n| n.utf8_text(source).ok())
3628 .unwrap_or("")
3629 .to_string()
3630 } else if child.kind() == "scoped_type_identifier" {
3631 child.utf8_text(source).ok()
3633 .map(|s| s.rsplit("::").next().unwrap_or(s).to_string())
3634 .unwrap_or_default()
3635 } else {
3636 text(child)
3637 };
3638
3639 if type_name.is_none() {
3640 type_name = Some(name);
3641 } else if trait_name.is_none() {
3642 trait_name = type_name.take();
3644 type_name = Some(name);
3645 }
3646 }
3647 _ => {}
3648 }
3649 }
3650
3651 let type_name = match type_name {
3652 Some(n) => n,
3653 None => return,
3654 };
3655
3656 let type_id = class_id_map.get(&type_name)
3658 .cloned()
3659 .unwrap_or_else(|| format!("class:{}:{}", path, type_name));
3660
3661 if let Some(ref trait_n) = trait_name {
3663 edges.push(CodeEdge {
3664 from: type_id.clone(),
3665 to: format!("class_ref:{}", trait_n),
3666 relation: EdgeRelation::Inherits,
3667 weight: 0.5,
3668 call_count: 1,
3669 in_error_path: false,
3670 confidence: 1.0,
3671 });
3672 }
3673
3674 if let Some(body) = node.child_by_field_name("body") {
3676 let mut body_cursor = body.walk();
3677 for body_child in body.children(&mut body_cursor) {
3678 if body_child.kind() == "function_item" {
3679 extract_rust_method(body_child, source, source_str, path, &type_id, nodes, edges);
3680 }
3681 }
3682 }
3683 }
3684
3685 "function_item" => {
3686 let name = node.child_by_field_name("name")
3688 .and_then(|n| n.utf8_text(source).ok())
3689 .unwrap_or("")
3690 .to_string();
3691 if name.is_empty() { return; }
3692
3693 let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
3694 let line = node.start_position().row + 1;
3695 let func_id = format!("func:{}:{}", path, full_name);
3696
3697 let signature = extract_rust_signature(node, source_str);
3698 let docstring = extract_rust_docstring(node, source_str);
3699 let line_count = node.end_position().row - node.start_position().row + 1;
3700 let is_test = path.contains("/tests/") || full_name.starts_with("test_") ||
3701 extract_rust_attributes(node, source).iter().any(|a| a.contains("test"));
3702
3703 nodes.push(CodeNode {
3704 id: func_id.clone(),
3705 kind: NodeKind::Function,
3706 name: full_name,
3707 file_path: path.to_string(),
3708 line: Some(line),
3709 decorators: extract_rust_attributes(node, source),
3710 signature,
3711 docstring,
3712 line_count,
3713 is_test,
3714 });
3715
3716 edges.push(CodeEdge::defined_in(&func_id, file_id));
3717 }
3718
3719 "mod_item" => {
3720 let name = node.child_by_field_name("name")
3721 .and_then(|n| n.utf8_text(source).ok())
3722 .unwrap_or("")
3723 .to_string();
3724 if name.is_empty() { return; }
3725
3726 let new_prefix = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
3727
3728 if let Some(body) = node.child_by_field_name("body") {
3730 let mut body_cursor = body.walk();
3731 for body_child in body.children(&mut body_cursor) {
3732 extract_rust_node(
3733 body_child,
3734 source,
3735 source_str,
3736 path,
3737 file_id,
3738 nodes,
3739 edges,
3740 class_id_map,
3741 impl_target_map,
3742 imports,
3743 &new_prefix,
3744 );
3745 }
3746 }
3747 }
3748
3749 "type_item" => {
3750 let name = node.child_by_field_name("name")
3752 .and_then(|n| n.utf8_text(source).ok())
3753 .unwrap_or("")
3754 .to_string();
3755 if name.is_empty() { return; }
3756
3757 let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
3758 let line = node.start_position().row + 1;
3759 let type_id = format!("class:{}:{}", path, full_name);
3760
3761 let signature = extract_rust_signature(node, source_str);
3762 let line_count = node.end_position().row - node.start_position().row + 1;
3763
3764 nodes.push(CodeNode {
3765 id: type_id.clone(),
3766 kind: NodeKind::Class,
3767 name: full_name.clone(),
3768 file_path: path.to_string(),
3769 line: Some(line),
3770 decorators: extract_rust_attributes(node, source),
3771 signature,
3772 docstring: None,
3773 line_count,
3774 is_test: false,
3775 });
3776
3777 edges.push(CodeEdge::defined_in(&type_id, file_id));
3778 class_id_map.insert(name, type_id);
3779 }
3780
3781 "const_item" | "static_item" => {
3782 let name = node.child_by_field_name("name")
3784 .and_then(|n| n.utf8_text(source).ok())
3785 .unwrap_or("")
3786 .to_string();
3787 if name.is_empty() || name.starts_with('_') { return; }
3788
3789 let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
3790 let line = node.start_position().row + 1;
3791 let const_id = format!("const:{}:{}", path, full_name);
3792
3793 let signature = extract_rust_signature(node, source_str);
3794
3795 nodes.push(CodeNode {
3796 id: const_id.clone(),
3797 kind: NodeKind::Class, name: full_name,
3799 file_path: path.to_string(),
3800 line: Some(line),
3801 decorators: extract_rust_attributes(node, source),
3802 signature,
3803 docstring: None,
3804 line_count: 1,
3805 is_test: false,
3806 });
3807
3808 edges.push(CodeEdge::defined_in(&const_id, file_id));
3809 }
3810
3811 "macro_definition" => {
3812 let name = node.child_by_field_name("name")
3814 .and_then(|n| n.utf8_text(source).ok())
3815 .unwrap_or("")
3816 .to_string();
3817 if name.is_empty() { return; }
3818
3819 let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
3820 let line = node.start_position().row + 1;
3821 let macro_id = format!("macro:{}:{}", path, full_name);
3822
3823 let line_count = node.end_position().row - node.start_position().row + 1;
3824
3825 nodes.push(CodeNode {
3826 id: macro_id.clone(),
3827 kind: NodeKind::Function, name: format!("{}!", full_name),
3829 file_path: path.to_string(),
3830 line: Some(line),
3831 decorators: vec!["macro".to_string()],
3832 signature: Some(format!("macro_rules! {}", name)),
3833 docstring: extract_rust_docstring(node, source_str),
3834 line_count,
3835 is_test: false,
3836 });
3837
3838 edges.push(CodeEdge::defined_in(¯o_id, file_id));
3839 }
3840
3841 _ => {}
3842 }
3843}
3844
3845fn extract_rust_method(
3847 node: tree_sitter::Node,
3848 source: &[u8],
3849 source_str: &str,
3850 path: &str,
3851 parent_id: &str,
3852 nodes: &mut Vec<CodeNode>,
3853 edges: &mut Vec<CodeEdge>,
3854) {
3855 let name = node.child_by_field_name("name")
3856 .and_then(|n| n.utf8_text(source).ok())
3857 .unwrap_or("")
3858 .to_string();
3859 if name.is_empty() { return; }
3860
3861 let line = node.start_position().row + 1;
3862 let parent_name = parent_id.rsplit(':').next().unwrap_or("");
3865 let method_id = if parent_name.is_empty() {
3866 format!("method:{}:{}", path, name)
3867 } else {
3868 format!("method:{}:{}.{}", path, parent_name, name)
3869 };
3870
3871 let signature = extract_rust_signature(node, source_str);
3872 let docstring = extract_rust_docstring(node, source_str);
3873 let line_count = node.end_position().row - node.start_position().row + 1;
3874 let attrs = extract_rust_attributes(node, source);
3875 let is_test = path.contains("/tests/") || name.starts_with("test_") ||
3876 attrs.iter().any(|a| a.contains("test"));
3877
3878 nodes.push(CodeNode {
3879 id: method_id.clone(),
3880 kind: NodeKind::Function,
3881 name,
3882 file_path: path.to_string(),
3883 line: Some(line),
3884 decorators: attrs,
3885 signature,
3886 docstring,
3887 line_count,
3888 is_test,
3889 });
3890
3891 edges.push(CodeEdge {
3892 from: method_id,
3893 to: parent_id.to_string(),
3894 relation: EdgeRelation::DefinedIn,
3895 weight: 0.5,
3896 call_count: 1,
3897 in_error_path: false,
3898 confidence: 1.0,
3899 });
3900}
3901
3902fn extract_rust_attributes(node: tree_sitter::Node, source: &[u8]) -> Vec<String> {
3904 let mut attrs = Vec::new();
3905 if let Some(parent) = node.parent() {
3907 let mut cursor = parent.walk();
3908 let mut prev_was_attr = false;
3909 for child in parent.children(&mut cursor) {
3910 if child.kind() == "attribute_item" {
3911 if let Ok(attr_text) = child.utf8_text(source) {
3912 let clean = attr_text.trim_start_matches("#[").trim_end_matches(']');
3913 attrs.push(clean.to_string());
3914 }
3915 prev_was_attr = true;
3916 } else if child.id() == node.id() && prev_was_attr {
3917 break;
3918 } else {
3919 if prev_was_attr && child.kind() != "line_comment" {
3921 attrs.clear();
3922 }
3923 prev_was_attr = false;
3924 }
3925 }
3926 }
3927
3928 let mut cursor = node.walk();
3930 for child in node.children(&mut cursor) {
3931 if child.kind() == "attribute_item" {
3932 if let Ok(attr_text) = child.utf8_text(source) {
3933 let clean = attr_text.trim_start_matches("#[").trim_end_matches(']');
3934 attrs.push(clean.to_string());
3935 }
3936 }
3937 }
3938
3939 attrs
3940}
3941
3942fn extract_rust_signature(node: tree_sitter::Node, source_str: &str) -> Option<String> {
3944 let start = node.start_byte();
3945 if start >= source_str.len() { return None; }
3946
3947 let sig_text = &source_str[start..];
3948 let sig_end = sig_text.find(" {")
3950 .or_else(|| sig_text.find("\n{"))
3951 .or_else(|| sig_text.find(";\n"))
3952 .or_else(|| sig_text.find(';'))
3953 .unwrap_or(sig_text.len().min(200));
3954
3955 let sig = sig_text[..sig_end].trim();
3956 if sig.is_empty() { None } else { Some(sig.to_string()) }
3957}
3958
3959fn extract_rust_docstring(node: tree_sitter::Node, source_str: &str) -> Option<String> {
3961 let start_line = node.start_position().row;
3963 if start_line == 0 { return None; }
3964
3965 let lines: Vec<&str> = source_str.lines().collect();
3966 let mut doc_lines: Vec<&str> = Vec::new();
3967
3968 for i in (0..start_line).rev() {
3970 if i >= lines.len() { continue; }
3971 let line = lines[i].trim();
3972 if line.starts_with("///") {
3973 doc_lines.push(line.trim_start_matches("///").trim());
3974 } else if line.starts_with("//!") {
3975 doc_lines.push(line.trim_start_matches("//!").trim());
3976 } else if line.is_empty() || line.starts_with("#[") {
3977 continue;
3979 } else {
3980 break;
3981 }
3982 }
3983
3984 if doc_lines.is_empty() {
3985 return None;
3986 }
3987
3988 doc_lines.reverse();
3989 let first_line = doc_lines.first().copied().unwrap_or("");
3990 let truncated = if first_line.len() > 100 {
3991 &first_line[..100]
3992 } else {
3993 first_line
3994 };
3995
3996 if truncated.is_empty() { None } else { Some(truncated.to_string()) }
3997}
3998
3999fn extract_typescript_tree_sitter(
4004 path: &str,
4005 content: &str,
4006 parser: &mut Parser,
4007 class_id_map: &mut HashMap<String, String>,
4008 extension: &str,
4009) -> (Vec<CodeNode>, Vec<CodeEdge>, HashSet<String>) {
4010 let mut nodes = Vec::new();
4011 let mut edges = Vec::new();
4012 let mut imports = HashSet::new();
4013
4014 let lang_result = match extension {
4016 "tsx" => parser.set_language(&tree_sitter_typescript::LANGUAGE_TSX.into()),
4017 "ts" => parser.set_language(&tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
4018 "jsx" => parser.set_language(&tree_sitter_javascript::LANGUAGE.into()),
4019 _ => parser.set_language(&tree_sitter_javascript::LANGUAGE.into()), };
4021
4022 if lang_result.is_err() {
4023 return (nodes, edges, imports);
4024 }
4025
4026 let tree = match parser.parse(content, None) {
4027 Some(t) => t,
4028 None => return (nodes, edges, imports),
4029 };
4030
4031 let file_id = format!("file:{}", path);
4032 let source = content.as_bytes();
4033 let root = tree.root_node();
4034
4035 let mut cursor = root.walk();
4036 for child in root.children(&mut cursor) {
4037 extract_typescript_node(
4038 child,
4039 source,
4040 content,
4041 path,
4042 &file_id,
4043 &mut nodes,
4044 &mut edges,
4045 class_id_map,
4046 &mut imports,
4047 );
4048 }
4049
4050 (nodes, edges, imports)
4051}
4052
4053fn extract_typescript_node(
4055 node: tree_sitter::Node,
4056 source: &[u8],
4057 source_str: &str,
4058 path: &str,
4059 file_id: &str,
4060 nodes: &mut Vec<CodeNode>,
4061 edges: &mut Vec<CodeEdge>,
4062 class_id_map: &mut HashMap<String, String>,
4063 imports: &mut HashSet<String>,
4064) {
4065 let text = |n: tree_sitter::Node| -> String {
4066 n.utf8_text(source).unwrap_or("").to_string()
4067 };
4068
4069 match node.kind() {
4070 "import_statement" => {
4071 let import_text = text(node);
4073 if let Some(from_idx) = import_text.rfind(" from ") {
4074 let module_part = import_text[from_idx + 6..].trim();
4075 let module = module_part.trim_matches(|c| c == '\'' || c == '"' || c == ';');
4076 if module.starts_with('.') || module.starts_with("@/") {
4077 edges.push(CodeEdge {
4078 from: file_id.to_string(),
4079 to: format!("module_ref:{}", module),
4080 relation: EdgeRelation::Imports,
4081 weight: 0.5,
4082 call_count: 1,
4083 in_error_path: false,
4084 confidence: 1.0,
4085 });
4086 }
4087 imports.insert(module.to_string());
4088
4089 if let Some(start) = import_text.find('{') {
4091 if let Some(end) = import_text.find('}') {
4092 let names_part = &import_text[start+1..end];
4093 for name in names_part.split(',') {
4094 let clean = name.trim().split(" as ").next().unwrap_or("").trim();
4095 if !clean.is_empty() {
4096 imports.insert(clean.to_string());
4097 }
4098 }
4099 }
4100 }
4101 }
4102 }
4103
4104 "class_declaration" | "class" => {
4105 extract_typescript_class(node, source, source_str, path, file_id, nodes, edges, class_id_map);
4106 }
4107
4108 "abstract_class_declaration" => {
4109 extract_typescript_class(node, source, source_str, path, file_id, nodes, edges, class_id_map);
4110 }
4111
4112 "interface_declaration" => {
4113 let name = node.child_by_field_name("name")
4114 .and_then(|n| n.utf8_text(source).ok())
4115 .unwrap_or("")
4116 .to_string();
4117 if name.is_empty() { return; }
4118
4119 let line = node.start_position().row + 1;
4120 let interface_id = format!("class:{}:{}", path, name);
4121
4122 let signature = extract_typescript_signature(node, source_str);
4123 let line_count = node.end_position().row - node.start_position().row + 1;
4124
4125 nodes.push(CodeNode {
4126 id: interface_id.clone(),
4127 kind: NodeKind::Class,
4128 name: name.clone(),
4129 file_path: path.to_string(),
4130 line: Some(line),
4131 decorators: vec!["interface".to_string()],
4132 signature,
4133 docstring: extract_typescript_docstring(node, source_str),
4134 line_count,
4135 is_test: path.contains("/test") || name.contains("Test"),
4136 });
4137
4138 edges.push(CodeEdge::defined_in(&interface_id, file_id));
4139 class_id_map.insert(name, interface_id);
4140 }
4141
4142 "function_declaration" | "function" => {
4143 let name = node.child_by_field_name("name")
4144 .and_then(|n| n.utf8_text(source).ok())
4145 .unwrap_or("")
4146 .to_string();
4147 if name.is_empty() { return; }
4148
4149 let line = node.start_position().row + 1;
4150 let func_id = format!("func:{}:{}", path, name);
4151
4152 let signature = extract_typescript_signature(node, source_str);
4153 let docstring = extract_typescript_docstring(node, source_str);
4154 let line_count = node.end_position().row - node.start_position().row + 1;
4155 let decorators = extract_typescript_decorators(node, source);
4156
4157 nodes.push(CodeNode {
4158 id: func_id.clone(),
4159 kind: NodeKind::Function,
4160 name,
4161 file_path: path.to_string(),
4162 line: Some(line),
4163 decorators,
4164 signature,
4165 docstring,
4166 line_count,
4167 is_test: path.contains("/test") || path.contains(".test.") || path.contains(".spec."),
4168 });
4169
4170 edges.push(CodeEdge::defined_in(&func_id, file_id));
4171 }
4172
4173 "lexical_declaration" | "variable_declaration" => {
4174 let mut cursor = node.walk();
4176 for child in node.children(&mut cursor) {
4177 if child.kind() == "variable_declarator" {
4178 let name = child.child_by_field_name("name")
4179 .and_then(|n| n.utf8_text(source).ok())
4180 .unwrap_or("")
4181 .to_string();
4182
4183 if let Some(value) = child.child_by_field_name("value") {
4184 if value.kind() == "arrow_function" || value.kind() == "function" {
4185 if name.is_empty() { continue; }
4186
4187 let line = node.start_position().row + 1;
4188 let func_id = format!("func:{}:{}", path, name);
4189
4190 let signature = extract_typescript_signature(node, source_str);
4191 let line_count = node.end_position().row - node.start_position().row + 1;
4192
4193 nodes.push(CodeNode {
4194 id: func_id.clone(),
4195 kind: NodeKind::Function,
4196 name,
4197 file_path: path.to_string(),
4198 line: Some(line),
4199 decorators: Vec::new(),
4200 signature,
4201 docstring: extract_typescript_docstring(node, source_str),
4202 line_count,
4203 is_test: path.contains("/test") || path.contains(".test.") || path.contains(".spec."),
4204 });
4205
4206 edges.push(CodeEdge::defined_in(&func_id, file_id));
4207 }
4208 }
4209 }
4210 }
4211 }
4212
4213 "enum_declaration" => {
4214 let name = node.child_by_field_name("name")
4215 .and_then(|n| n.utf8_text(source).ok())
4216 .unwrap_or("")
4217 .to_string();
4218 if name.is_empty() { return; }
4219
4220 let line = node.start_position().row + 1;
4221 let enum_id = format!("class:{}:{}", path, name);
4222
4223 let signature = extract_typescript_signature(node, source_str);
4224 let line_count = node.end_position().row - node.start_position().row + 1;
4225
4226 nodes.push(CodeNode {
4227 id: enum_id.clone(),
4228 kind: NodeKind::Class,
4229 name: name.clone(),
4230 file_path: path.to_string(),
4231 line: Some(line),
4232 decorators: vec!["enum".to_string()],
4233 signature,
4234 docstring: extract_typescript_docstring(node, source_str),
4235 line_count,
4236 is_test: false,
4237 });
4238
4239 edges.push(CodeEdge::defined_in(&enum_id, file_id));
4240 class_id_map.insert(name, enum_id);
4241 }
4242
4243 "type_alias_declaration" => {
4244 let name = node.child_by_field_name("name")
4245 .and_then(|n| n.utf8_text(source).ok())
4246 .unwrap_or("")
4247 .to_string();
4248 if name.is_empty() { return; }
4249
4250 let line = node.start_position().row + 1;
4251 let type_id = format!("class:{}:{}", path, name);
4252
4253 let signature = extract_typescript_signature(node, source_str);
4254 let line_count = node.end_position().row - node.start_position().row + 1;
4255
4256 nodes.push(CodeNode {
4257 id: type_id.clone(),
4258 kind: NodeKind::Class,
4259 name: name.clone(),
4260 file_path: path.to_string(),
4261 line: Some(line),
4262 decorators: vec!["type".to_string()],
4263 signature,
4264 docstring: None,
4265 line_count,
4266 is_test: false,
4267 });
4268
4269 edges.push(CodeEdge::defined_in(&type_id, file_id));
4270 class_id_map.insert(name, type_id);
4271 }
4272
4273 "export_statement" => {
4274 let mut cursor = node.walk();
4276 for child in node.children(&mut cursor) {
4277 match child.kind() {
4278 "class_declaration" | "class" | "abstract_class_declaration" |
4279 "interface_declaration" | "function_declaration" | "function" |
4280 "lexical_declaration" | "variable_declaration" | "enum_declaration" |
4281 "type_alias_declaration" => {
4282 extract_typescript_node(child, source, source_str, path, file_id, nodes, edges, class_id_map, imports);
4283 }
4284 _ => {}
4285 }
4286 }
4287 }
4288
4289 "expression_statement" => {
4290 let mut cursor = node.walk();
4292 for child in node.children(&mut cursor) {
4293 extract_typescript_node(child, source, source_str, path, file_id, nodes, edges, class_id_map, imports);
4294 }
4295 }
4296
4297 "module" | "internal_module" | "namespace" => {
4298 let name = node.child_by_field_name("name")
4300 .and_then(|n| n.utf8_text(source).ok())
4301 .unwrap_or("")
4302 .to_string();
4303
4304 if !name.is_empty() {
4305 let line = node.start_position().row + 1;
4306 let module_id = format!("class:{}:{}", path, name);
4307
4308 nodes.push(CodeNode {
4309 id: module_id.clone(),
4310 kind: NodeKind::Class,
4311 name: name.clone(),
4312 file_path: path.to_string(),
4313 line: Some(line),
4314 decorators: vec!["namespace".to_string()],
4315 signature: Some(format!("namespace {}", name)),
4316 docstring: None,
4317 line_count: node.end_position().row - node.start_position().row + 1,
4318 is_test: false,
4319 });
4320
4321 edges.push(CodeEdge::defined_in(&module_id, file_id));
4322 }
4323
4324 if let Some(body) = node.child_by_field_name("body") {
4326 let mut body_cursor = body.walk();
4327 for body_child in body.children(&mut body_cursor) {
4328 extract_typescript_node(body_child, source, source_str, path, file_id, nodes, edges, class_id_map, imports);
4329 }
4330 }
4331 }
4332
4333 _ => {}
4334 }
4335}
4336
4337fn extract_typescript_class(
4339 node: tree_sitter::Node,
4340 source: &[u8],
4341 source_str: &str,
4342 path: &str,
4343 file_id: &str,
4344 nodes: &mut Vec<CodeNode>,
4345 edges: &mut Vec<CodeEdge>,
4346 class_id_map: &mut HashMap<String, String>,
4347) {
4348 let name = node.child_by_field_name("name")
4349 .and_then(|n| n.utf8_text(source).ok())
4350 .unwrap_or("")
4351 .to_string();
4352 if name.is_empty() { return; }
4353
4354 let line = node.start_position().row + 1;
4355 let class_id = format!("class:{}:{}", path, name);
4356
4357 let signature = extract_typescript_signature(node, source_str);
4358 let docstring = extract_typescript_docstring(node, source_str);
4359 let line_count = node.end_position().row - node.start_position().row + 1;
4360 let decorators = extract_typescript_decorators(node, source);
4361
4362 nodes.push(CodeNode {
4363 id: class_id.clone(),
4364 kind: NodeKind::Class,
4365 name: name.clone(),
4366 file_path: path.to_string(),
4367 line: Some(line),
4368 decorators,
4369 signature,
4370 docstring,
4371 line_count,
4372 is_test: path.contains("/test") || name.contains("Test"),
4373 });
4374
4375 edges.push(CodeEdge::defined_in(&class_id, file_id));
4376 class_id_map.insert(name.clone(), class_id.clone());
4377
4378 fn find_extends_identifier(node: tree_sitter::Node, source: &[u8]) -> Option<String> {
4380 let mut cursor = node.walk();
4381 for child in node.children(&mut cursor) {
4382 match child.kind() {
4383 "identifier" | "type_identifier" => {
4384 return child.utf8_text(source).ok().map(|s| s.to_string());
4385 }
4386 "extends_clause" | "class_heritage" | "extends_type_clause" => {
4387 if let Some(name) = find_extends_identifier(child, source) {
4388 return Some(name);
4389 }
4390 }
4391 _ => {}
4392 }
4393 }
4394 None
4395 }
4396
4397 let mut cursor = node.walk();
4398 for child in node.children(&mut cursor) {
4399 if child.kind() == "class_heritage" || child.kind() == "extends_clause" {
4400 if let Some(parent_name) = find_extends_identifier(child, source) {
4401 if !parent_name.is_empty() {
4402 edges.push(CodeEdge {
4403 from: class_id.clone(),
4404 to: format!("class_ref:{}", parent_name),
4405 relation: EdgeRelation::Inherits,
4406 weight: 0.5,
4407 call_count: 1,
4408 in_error_path: false,
4409 confidence: 1.0,
4410 });
4411 }
4412 }
4413 }
4414 }
4415
4416 if let Some(body) = node.child_by_field_name("body") {
4418 let mut body_cursor = body.walk();
4419 for body_child in body.children(&mut body_cursor) {
4420 match body_child.kind() {
4421 "method_definition" | "public_field_definition" | "method_signature" => {
4422 extract_typescript_method(body_child, source, source_str, path, &class_id, nodes, edges);
4423 }
4424 _ => {}
4425 }
4426 }
4427 }
4428}
4429
4430fn extract_typescript_method(
4432 node: tree_sitter::Node,
4433 source: &[u8],
4434 source_str: &str,
4435 path: &str,
4436 class_id: &str,
4437 nodes: &mut Vec<CodeNode>,
4438 edges: &mut Vec<CodeEdge>,
4439) {
4440 let mut name = node.child_by_field_name("name")
4441 .and_then(|n| n.utf8_text(source).ok())
4442 .unwrap_or("")
4443 .to_string();
4444
4445 if name.is_empty() {
4447 let mut cursor = node.walk();
4448 for child in node.children(&mut cursor) {
4449 if child.kind() == "property_identifier" || child.kind() == "identifier" {
4450 if let Ok(text) = child.utf8_text(source) {
4451 name = text.to_string();
4452 break;
4453 }
4454 }
4455 }
4456 }
4457
4458 if name.is_empty() { return; }
4459
4460 let line = node.start_position().row + 1;
4461 let parent_name = class_id.rsplit(':').next().unwrap_or("");
4463 let method_id = if parent_name.is_empty() {
4464 format!("method:{}:{}", path, name)
4465 } else {
4466 format!("method:{}:{}.{}", path, parent_name, name)
4467 };
4468
4469 let signature = extract_typescript_signature(node, source_str);
4470 let docstring = extract_typescript_docstring(node, source_str);
4471 let line_count = node.end_position().row - node.start_position().row + 1;
4472 let decorators = extract_typescript_decorators(node, source);
4473
4474 nodes.push(CodeNode {
4475 id: method_id.clone(),
4476 kind: NodeKind::Function,
4477 name,
4478 file_path: path.to_string(),
4479 line: Some(line),
4480 decorators,
4481 signature,
4482 docstring,
4483 line_count,
4484 is_test: path.contains("/test") || path.contains(".test.") || path.contains(".spec."),
4485 });
4486
4487 edges.push(CodeEdge {
4488 from: method_id,
4489 to: class_id.to_string(),
4490 relation: EdgeRelation::DefinedIn,
4491 weight: 0.5,
4492 call_count: 1,
4493 in_error_path: false,
4494 confidence: 1.0,
4495 });
4496}
4497
4498fn extract_typescript_decorators(node: tree_sitter::Node, source: &[u8]) -> Vec<String> {
4500 let mut decorators = Vec::new();
4501
4502 if let Some(parent) = node.parent() {
4504 let mut cursor = parent.walk();
4505 for child in parent.children(&mut cursor) {
4506 if child.kind() == "decorator" {
4507 if let Ok(dec_text) = child.utf8_text(source) {
4508 let name = dec_text.trim_start_matches('@');
4509 let name = name.split('(').next().unwrap_or(name).trim();
4510 if !name.is_empty() {
4511 decorators.push(name.to_string());
4512 }
4513 }
4514 }
4515 if child.id() == node.id() {
4516 break;
4517 }
4518 }
4519 }
4520
4521 decorators
4522}
4523
4524fn extract_typescript_signature(node: tree_sitter::Node, source_str: &str) -> Option<String> {
4526 let start = node.start_byte();
4527 if start >= source_str.len() { return None; }
4528
4529 let sig_text = &source_str[start..];
4530 let sig_end = sig_text.find(" {")
4532 .or_else(|| sig_text.find("\n{"))
4533 .or_else(|| sig_text.find("{\n"))
4534 .unwrap_or(sig_text.len().min(200));
4535
4536 let sig = sig_text[..sig_end].trim();
4537 if sig.is_empty() { None } else { Some(sig.to_string()) }
4538}
4539
4540fn extract_typescript_docstring(node: tree_sitter::Node, source_str: &str) -> Option<String> {
4542 let start_line = node.start_position().row;
4543 if start_line == 0 { return None; }
4544
4545 let lines: Vec<&str> = source_str.lines().collect();
4546
4547 for i in (0..start_line).rev() {
4549 if i >= lines.len() { continue; }
4550 let line = lines[i].trim();
4551
4552 if line.ends_with("*/") {
4553 let mut doc_lines: Vec<&str> = Vec::new();
4555 for j in (0..=i).rev() {
4556 if j >= lines.len() { continue; }
4557 let doc_line = lines[j].trim();
4558 if doc_line.starts_with("/**") {
4559 let first = doc_line.trim_start_matches("/**").trim_start_matches('*').trim();
4560 if !first.is_empty() && !first.starts_with('@') {
4561 doc_lines.push(first);
4562 }
4563 break;
4564 } else if doc_line.starts_with('*') {
4565 let content = doc_line.trim_start_matches('*').trim();
4566 if !content.is_empty() && !content.starts_with('@') {
4567 doc_lines.push(content);
4568 }
4569 }
4570 }
4571
4572 if doc_lines.is_empty() {
4573 return None;
4574 }
4575
4576 doc_lines.reverse();
4577 let first_line = doc_lines.first().copied().unwrap_or("");
4578 let truncated = if first_line.len() > 100 {
4579 &first_line[..100]
4580 } else {
4581 first_line
4582 };
4583
4584 return if truncated.is_empty() { None } else { Some(truncated.to_string()) };
4585 } else if line.is_empty() || line.starts_with('@') || line.starts_with("//") {
4586 continue;
4587 } else {
4588 break;
4589 }
4590 }
4591
4592 None
4593}
4594
4595#[allow(dead_code)]
4599fn extract_rust_regex(path: &str, content: &str) -> (Vec<CodeNode>, Vec<CodeEdge>, HashSet<String>) {
4600 let mut nodes = Vec::new();
4601 let mut edges = Vec::new();
4602
4603 let file_id = format!("file:{}", path);
4604
4605 let re_use = Regex::new(r"(?m)^use\s+([\w:]+)").unwrap();
4606 let re_struct = Regex::new(r"(?m)^(?:pub\s+)?struct\s+(\w+)").unwrap();
4607 let re_enum = Regex::new(r"(?m)^(?:pub\s+)?enum\s+(\w+)").unwrap();
4608 let re_impl = Regex::new(r"(?m)^impl(?:<[^>]+>)?\s+(?:(\w+)\s+for\s+)?(\w+)").unwrap();
4609 let re_fn = Regex::new(r"(?m)^\s*(?:pub\s+)?(?:async\s+)?fn\s+(\w+)").unwrap();
4610
4611 for cap in re_use.captures_iter(content) {
4612 let module = cap[1].to_string();
4613 if !module.starts_with("std::") && !module.starts_with("core::") {
4614 edges.push(CodeEdge::new(
4615 &file_id,
4616 &format!("module_ref:{}", module),
4617 EdgeRelation::Imports,
4618 ));
4619 }
4620 }
4621
4622 for cap in re_struct.captures_iter(content) {
4623 let name = cap[1].to_string();
4624 let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
4625 let node = CodeNode::new_class(path, &name, line);
4626 edges.push(CodeEdge::defined_in(&node.id, &file_id));
4627 nodes.push(node);
4628 }
4629
4630 for cap in re_enum.captures_iter(content) {
4631 let name = cap[1].to_string();
4632 let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
4633 let node = CodeNode::new_class(path, &name, line);
4634 edges.push(CodeEdge::defined_in(&node.id, &file_id));
4635 nodes.push(node);
4636 }
4637
4638 for cap in re_impl.captures_iter(content) {
4639 if let Some(trait_match) = cap.get(1) {
4640 let type_name = &cap[2];
4641 let trait_name = trait_match.as_str();
4642 if let Some(type_node) = nodes.iter().find(|n| n.name == type_name) {
4643 edges.push(CodeEdge::new(
4644 &type_node.id,
4645 &format!("class_ref:{}", trait_name),
4646 EdgeRelation::Inherits,
4647 ));
4648 }
4649 }
4650 }
4651
4652 for cap in re_fn.captures_iter(content) {
4653 let name = cap[1].to_string();
4654 let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
4655 let node = CodeNode::new_function(path, &name, line, false);
4656 edges.push(CodeEdge::defined_in(&node.id, &file_id));
4657 nodes.push(node);
4658 }
4659
4660 (nodes, edges, HashSet::new())
4661}
4662
4663#[allow(dead_code)]
4665fn extract_typescript_regex(path: &str, content: &str) -> (Vec<CodeNode>, Vec<CodeEdge>, HashSet<String>) {
4666 let mut nodes = Vec::new();
4667 let mut edges = Vec::new();
4668
4669 let file_id = format!("file:{}", path);
4670
4671 let re_import = Regex::new(r#"(?m)^import\s+.*?\s+from\s+['"]([^'"]+)['"]"#).unwrap();
4672 let re_class = Regex::new(r"(?m)^(?:export\s+)?(?:abstract\s+)?class\s+(\w+)(?:\s+extends\s+(\w+))?").unwrap();
4673 let re_interface = Regex::new(r"(?m)^(?:export\s+)?interface\s+(\w+)(?:\s+extends\s+(\w+))?").unwrap();
4674 let re_function = Regex::new(r"(?m)^(?:export\s+)?(?:async\s+)?function\s+(\w+)").unwrap();
4675 let re_arrow = Regex::new(r"(?m)^(?:export\s+)?(?:const|let)\s+(\w+)\s*=\s*(?:async\s+)?\([^)]*\)\s*=>").unwrap();
4676
4677 for cap in re_import.captures_iter(content) {
4678 let module = cap[1].to_string();
4679 if module.starts_with('.') || module.starts_with("@/") {
4680 edges.push(CodeEdge::new(
4681 &file_id,
4682 &format!("module_ref:{}", module),
4683 EdgeRelation::Imports,
4684 ));
4685 }
4686 }
4687
4688 for cap in re_class.captures_iter(content) {
4689 let name = cap[1].to_string();
4690 let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
4691 let node = CodeNode::new_class(path, &name, line);
4692 edges.push(CodeEdge::defined_in(&node.id, &file_id));
4693
4694 if let Some(parent) = cap.get(2) {
4695 edges.push(CodeEdge::new(
4696 &node.id,
4697 &format!("class_ref:{}", parent.as_str()),
4698 EdgeRelation::Inherits,
4699 ));
4700 }
4701
4702 nodes.push(node);
4703 }
4704
4705 for cap in re_interface.captures_iter(content) {
4706 let name = cap[1].to_string();
4707 let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
4708 let node = CodeNode::new_class(path, &name, line);
4709 edges.push(CodeEdge::defined_in(&node.id, &file_id));
4710 nodes.push(node);
4711 }
4712
4713 for cap in re_function.captures_iter(content) {
4714 let name = cap[1].to_string();
4715 let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
4716 let node = CodeNode::new_function(path, &name, line, false);
4717 edges.push(CodeEdge::defined_in(&node.id, &file_id));
4718 nodes.push(node);
4719 }
4720
4721 for cap in re_arrow.captures_iter(content) {
4722 let name = cap[1].to_string();
4723 let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
4724 let node = CodeNode::new_function(path, &name, line, false);
4725 edges.push(CodeEdge::defined_in(&node.id, &file_id));
4726 nodes.push(node);
4727 }
4728
4729 (nodes, edges, HashSet::new())
4730}
4731
4732fn is_python_builtin(name: &str) -> bool {
4735 matches!(
4736 name,
4737 "if" | "for"
4738 | "while"
4739 | "return"
4740 | "print"
4741 | "len"
4742 | "range"
4743 | "str"
4744 | "int"
4745 | "float"
4746 | "list"
4747 | "dict"
4748 | "set"
4749 | "tuple"
4750 | "type"
4751 | "isinstance"
4752 | "issubclass"
4753 | "super"
4754 | "hasattr"
4755 | "getattr"
4756 | "setattr"
4757 | "property"
4758 | "staticmethod"
4759 | "classmethod"
4760 | "enumerate"
4761 | "zip"
4762 | "map"
4763 | "filter"
4764 | "sorted"
4765 | "reversed"
4766 | "any"
4767 | "all"
4768 | "min"
4769 | "max"
4770 | "sum"
4771 | "abs"
4772 | "bool"
4773 | "repr"
4774 | "hash"
4775 | "id"
4776 | "open"
4777 | "format"
4778 | "not"
4779 | "and"
4780 | "or"
4781 | "bytes"
4782 | "bytearray"
4783 | "memoryview"
4784 | "object"
4785 | "complex"
4786 | "frozenset"
4787 | "iter"
4788 | "next"
4789 | "callable"
4790 | "delattr"
4791 | "dir"
4792 | "divmod"
4793 | "eval"
4794 | "exec"
4795 | "globals"
4796 | "hex"
4797 | "input"
4798 | "locals"
4799 | "oct"
4800 | "ord"
4801 | "pow"
4802 | "round"
4803 | "slice"
4804 | "vars"
4805 | "chr"
4806 | "bin"
4807 | "breakpoint"
4808 | "compile"
4809 | "__import__"
4810 | "ValueError"
4811 | "TypeError"
4812 | "KeyError"
4813 | "IndexError"
4814 | "AttributeError"
4815 | "RuntimeError"
4816 | "Exception"
4817 | "NotImplementedError"
4818 | "StopIteration"
4819 | "OSError"
4820 | "IOError"
4821 | "FileNotFoundError"
4822 | "ImportError"
4823 | "AssertionError"
4824 | "NameError"
4825 | "OverflowError"
4826 | "ZeroDivisionError"
4827 | "UnicodeError"
4828 | "SyntaxError"
4829 )
4830}
4831
4832fn is_stdlib(module: &str) -> bool {
4833 let stdlib_prefixes = [
4834 "os", "sys", "re", "json", "math", "io", "abc", "collections", "typing", "unittest",
4835 "pytest", "copy", "functools", "itertools", "pathlib", "shutil", "tempfile", "logging",
4836 "warnings", "inspect", "textwrap", "string", "datetime", "time", "hashlib", "base64",
4837 "pickle", "csv", "xml", "html", "http", "urllib", "socket", "threading",
4838 "multiprocessing", "subprocess", "contextlib", "enum", "dataclasses", "struct", "array",
4839 "queue", "heapq", "bisect", "decimal", "fractions", "random", "statistics", "operator",
4840 "pdb", "traceback", "dis", "ast", "token", "importlib", "pkgutil", "site", "zipimport",
4841 "numpy", "scipy", "matplotlib", "pandas", "setuptools", "pip", "wheel", "pkg_resources",
4842 "distutils",
4843 ];
4844
4845 let first_part = module.split('.').next().unwrap_or(module);
4846 stdlib_prefixes.contains(&first_part)
4847}
4848
4849#[cfg(test)]
4850mod tests {
4851 use super::*;
4852
4853 #[test]
4854 fn test_extract_python() {
4855 let content = r#"
4856import os
4857from pathlib import Path
4858
4859class MyClass(BaseClass):
4860 def method(self):
4861 pass
4862
4863def top_level():
4864 pass
4865"#;
4866 let mut parser = Parser::new();
4867 let language = tree_sitter_python::LANGUAGE;
4868 parser.set_language(&language.into()).unwrap();
4869 let mut class_map = HashMap::new();
4870
4871 let (nodes, edges, _) = extract_python_tree_sitter("test.py", content, &mut parser, &mut class_map);
4872
4873 assert!(nodes.iter().any(|n| n.name == "MyClass"));
4874 assert!(nodes.iter().any(|n| n.name == "method"));
4875 assert!(nodes.iter().any(|n| n.name == "top_level"));
4876 assert!(edges.iter().any(|e| e.to.contains("BaseClass")));
4877 }
4878
4879 #[test]
4880 fn test_extract_rust() {
4881 let content = r#"
4882use std::path::Path;
4883use crate::module;
4884
4885pub struct MyStruct {
4886 field: i32,
4887}
4888
4889impl MyTrait for MyStruct {
4890 fn method(&self) {}
4891}
4892
4893pub fn top_level() {}
4894"#;
4895 let mut parser = Parser::new();
4896 let mut class_map = HashMap::new();
4897 let (nodes, edges, _) = extract_rust_tree_sitter("test.rs", content, &mut parser, &mut class_map);
4898
4899 assert!(nodes.iter().any(|n| n.name == "MyStruct"), "Should find MyStruct");
4900 assert!(nodes.iter().any(|n| n.name == "method"), "Should find method");
4901 assert!(nodes.iter().any(|n| n.name == "top_level"), "Should find top_level");
4902 assert!(edges.iter().any(|e| e.to.contains("module")), "Should have module import edge");
4903
4904 assert!(edges.iter().any(|e| e.relation == EdgeRelation::Inherits && e.to.contains("MyTrait")),
4906 "Should capture trait impl inheritance");
4907 }
4908
4909 #[test]
4910 fn test_extract_rust_comprehensive() {
4911 let content = r#"
4912use crate::foo::bar;
4913
4914/// A documented struct
4915pub struct Person {
4916 name: String,
4917 age: u32,
4918}
4919
4920/// A documented enum
4921pub enum Status {
4922 Active,
4923 Inactive,
4924}
4925
4926/// A trait
4927pub trait Greeter {
4928 fn greet(&self) -> String;
4929}
4930
4931impl Greeter for Person {
4932 fn greet(&self) -> String {
4933 format!("Hello, {}", self.name)
4934 }
4935}
4936
4937impl Person {
4938 pub fn new(name: String) -> Self {
4939 Self { name, age: 0 }
4940 }
4941
4942 pub fn birthday(&mut self) {
4943 self.age += 1;
4944 }
4945}
4946
4947mod inner {
4948 pub fn nested_fn() {}
4949}
4950
4951type MyAlias = Vec<String>;
4952
4953pub fn standalone() {}
4954
4955#[test]
4956fn test_something() {}
4957"#;
4958 let mut parser = Parser::new();
4959 let mut class_map = HashMap::new();
4960 let (nodes, edges, _) = extract_rust_tree_sitter("test.rs", content, &mut parser, &mut class_map);
4961
4962 assert!(nodes.iter().any(|n| n.name == "Person"), "Should find Person struct");
4964 assert!(nodes.iter().any(|n| n.name == "Status"), "Should find Status enum");
4965
4966 assert!(nodes.iter().any(|n| n.name == "Greeter"), "Should find Greeter trait");
4968
4969 assert!(nodes.iter().any(|n| n.name == "greet"), "Should find greet method");
4971 assert!(nodes.iter().any(|n| n.name == "new"), "Should find new method");
4972 assert!(nodes.iter().any(|n| n.name == "birthday"), "Should find birthday method");
4973
4974 assert!(nodes.iter().any(|n| n.name.contains("nested_fn")), "Should find nested_fn");
4976
4977 assert!(nodes.iter().any(|n| n.name == "MyAlias"), "Should find type alias");
4979
4980 assert!(nodes.iter().any(|n| n.name == "standalone"), "Should find standalone fn");
4982
4983 let test_node = nodes.iter().find(|n| n.name == "test_something");
4985 assert!(test_node.is_some(), "Should find test function");
4986 assert!(test_node.unwrap().is_test, "Test function should be marked as test");
4987
4988 let greet_edges: Vec<_> = edges.iter()
4990 .filter(|e| e.from.contains("greet") && e.relation == EdgeRelation::DefinedIn)
4991 .collect();
4992 assert!(!greet_edges.is_empty(), "greet should have DefinedIn edge");
4993 }
4994
4995 #[test]
4996 fn test_extract_typescript() {
4997 let content = r#"
4998import { Component } from './component';
4999
5000export class MyClass extends BaseClass {
5001 method(): void {}
5002}
5003
5004export function topLevel(): void {}
5005
5006export const arrowFn = () => {};
5007"#;
5008 let mut parser = Parser::new();
5009 let mut class_map = HashMap::new();
5010 let (nodes, edges, _) = extract_typescript_tree_sitter("test.ts", content, &mut parser, &mut class_map, "ts");
5011
5012 assert!(nodes.iter().any(|n| n.name == "MyClass"), "Should find MyClass");
5013 assert!(nodes.iter().any(|n| n.name == "topLevel"), "Should find topLevel");
5014 assert!(nodes.iter().any(|n| n.name == "arrowFn"), "Should find arrowFn");
5015 assert!(edges.iter().any(|e| e.to.contains("component")), "Should have component import");
5016
5017 assert!(nodes.iter().any(|n| n.name == "method"), "Should find method inside class");
5019
5020 assert!(edges.iter().any(|e| e.relation == EdgeRelation::Inherits && e.to.contains("BaseClass")),
5022 "Should capture class inheritance");
5023 }
5024
5025 #[test]
5026 fn test_extract_typescript_comprehensive() {
5027 let content = r#"
5028import { Injectable } from '@angular/core';
5029import type { User } from './types';
5030
5031/**
5032 * A service class
5033 */
5034@Injectable()
5035export class UserService {
5036 private users: User[] = [];
5037
5038 /**
5039 * Get all users
5040 */
5041 getUsers(): User[] {
5042 return this.users;
5043 }
5044
5045 addUser(user: User): void {
5046 this.users.push(user);
5047 }
5048}
5049
5050export interface IRepository<T> {
5051 find(id: string): T | undefined;
5052 save(item: T): void;
5053}
5054
5055export type UserId = string;
5056
5057export enum UserRole {
5058 Admin = 'admin',
5059 User = 'user',
5060}
5061
5062export function createUser(name: string): User {
5063 return { name };
5064}
5065
5066export const fetchUser = async (id: string) => {
5067 return null;
5068};
5069
5070export default class DefaultExport {}
5071
5072namespace MyNamespace {
5073 export function innerFn() {}
5074}
5075"#;
5076 let mut parser = Parser::new();
5077 let mut class_map = HashMap::new();
5078 let (nodes, edges, _) = extract_typescript_tree_sitter("test.ts", content, &mut parser, &mut class_map, "ts");
5079
5080 assert!(nodes.iter().any(|n| n.name == "UserService"), "Should find UserService class");
5082 assert!(nodes.iter().any(|n| n.name == "DefaultExport"), "Should find default export class");
5083
5084 assert!(nodes.iter().any(|n| n.name == "getUsers"), "Should find getUsers method");
5086 assert!(nodes.iter().any(|n| n.name == "addUser"), "Should find addUser method");
5087
5088 assert!(nodes.iter().any(|n| n.name == "IRepository"), "Should find interface");
5090
5091 assert!(nodes.iter().any(|n| n.name == "UserId"), "Should find type alias");
5093
5094 assert!(nodes.iter().any(|n| n.name == "UserRole"), "Should find enum");
5096
5097 assert!(nodes.iter().any(|n| n.name == "createUser"), "Should find function");
5099
5100 assert!(nodes.iter().any(|n| n.name == "fetchUser"), "Should find arrow function");
5102
5103 assert!(nodes.iter().any(|n| n.name == "MyNamespace"), "Should find namespace");
5105
5106 assert!(edges.iter().any(|e| e.relation == EdgeRelation::Imports), "Should have import edges");
5108 }
5109}