1use std::collections::{BinaryHeap, HashMap, HashSet, VecDeque};
15use std::path::Path;
16use serde::{Deserialize, Serialize};
17use regex::Regex;
18use walkdir::WalkDir;
19use tree_sitter::Parser;
20
21#[derive(Debug, Clone, Default, Serialize, Deserialize)]
25pub struct CodeGraph {
26 pub nodes: Vec<CodeNode>,
27 pub edges: Vec<CodeEdge>,
28 #[serde(skip)]
30 pub outgoing: HashMap<String, Vec<usize>>,
31 #[serde(skip)]
33 pub incoming: HashMap<String, Vec<usize>>,
34 #[serde(skip)]
36 pub node_index: HashMap<String, usize>,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct CodeNode {
42 pub id: String,
43 pub kind: NodeKind,
44 pub name: String,
45 pub file_path: String,
46 pub line: Option<usize>,
47 #[serde(default, skip_serializing_if = "Vec::is_empty")]
48 pub decorators: Vec<String>,
49 #[serde(default, skip_serializing_if = "Option::is_none")]
50 pub signature: Option<String>,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub docstring: Option<String>,
53 #[serde(default)]
54 pub line_count: usize,
55 #[serde(default)]
56 pub is_test: bool,
57}
58
59impl CodeNode {
60 pub fn new_file(path: &str) -> Self {
61 Self {
62 id: format!("file:{}", path),
63 kind: NodeKind::File,
64 name: path.rsplit('/').next().unwrap_or(path).to_string(),
65 file_path: path.to_string(),
66 line: None,
67 decorators: Vec::new(),
68 signature: None,
69 docstring: None,
70 line_count: 0,
71 is_test: path.contains("/test") || path.contains("_test."),
72 }
73 }
74
75 pub fn new_class(path: &str, name: &str, line: usize) -> Self {
76 Self {
77 id: format!("class:{}:{}", path, name),
78 kind: NodeKind::Class,
79 name: name.to_string(),
80 file_path: path.to_string(),
81 line: Some(line),
82 decorators: Vec::new(),
83 signature: None,
84 docstring: None,
85 line_count: 0,
86 is_test: name.starts_with("Test") || path.contains("/test"),
87 }
88 }
89
90 pub fn new_function(path: &str, name: &str, line: usize, is_method: bool) -> Self {
91 let prefix = if is_method { "method" } else { "func" };
92 Self {
93 id: format!("{}:{}:{}", prefix, path, name),
94 kind: NodeKind::Function,
95 name: name.to_string(),
96 file_path: path.to_string(),
97 line: Some(line),
98 decorators: Vec::new(),
99 signature: None,
100 docstring: None,
101 line_count: 0,
102 is_test: name.starts_with("test_") || name.starts_with("Test") || path.contains("/test"),
103 }
104 }
105}
106
107#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
109#[serde(rename_all = "lowercase")]
110pub enum NodeKind {
111 File,
112 Class,
113 Function,
114 Module,
115}
116
117#[derive(Debug, Clone, Serialize, Deserialize)]
119pub struct CodeEdge {
120 pub from: String,
121 pub to: String,
122 pub relation: EdgeRelation,
123 #[serde(default)]
124 pub weight: f32,
125 #[serde(default)]
126 pub call_count: u32,
127 #[serde(default)]
128 pub in_error_path: bool,
129 #[serde(default)]
130 pub confidence: f32,
131}
132
133impl CodeEdge {
134 pub fn new(from: &str, to: &str, relation: EdgeRelation) -> Self {
135 Self {
136 from: from.to_string(),
137 to: to.to_string(),
138 relation,
139 weight: 0.5,
140 call_count: 1,
141 in_error_path: false,
142 confidence: 1.0,
143 }
144 }
145
146 pub fn imports(from: &str, to: &str) -> Self {
147 Self::new(from, to, EdgeRelation::Imports)
148 }
149
150 pub fn calls(from: &str, to: &str) -> Self {
151 Self::new(from, to, EdgeRelation::Calls)
152 }
153
154 pub fn inherits(from: &str, to: &str) -> Self {
155 Self::new(from, to, EdgeRelation::Inherits)
156 }
157
158 pub fn defined_in(from: &str, to: &str) -> Self {
159 Self::new(from, to, EdgeRelation::DefinedIn)
160 }
161
162 pub fn compute_weight(&mut self) {
164 if self.relation == EdgeRelation::Calls {
165 let count_norm = (self.call_count as f32 / 10.0).min(1.0);
166 let error_factor = if self.in_error_path { 0.8 } else { 0.5 };
167 self.weight = 0.4 * count_norm + 0.3 * error_factor + 0.3 * self.confidence;
168 } else {
169 self.weight = 0.7; }
171 }
172}
173
174#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
176#[serde(rename_all = "snake_case")]
177pub enum EdgeRelation {
178 Imports,
180 Inherits,
182 DefinedIn,
184 Calls,
186 TestsFor,
188 Overrides,
190}
191
192impl std::fmt::Display for EdgeRelation {
193 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
194 match self {
195 EdgeRelation::Imports => write!(f, "imports"),
196 EdgeRelation::Inherits => write!(f, "inherits"),
197 EdgeRelation::DefinedIn => write!(f, "defined_in"),
198 EdgeRelation::Calls => write!(f, "calls"),
199 EdgeRelation::TestsFor => write!(f, "tests_for"),
200 EdgeRelation::Overrides => write!(f, "overrides"),
201 }
202 }
203}
204
205#[derive(Debug)]
209pub struct ImpactReport<'a> {
210 pub affected_source: Vec<&'a CodeNode>,
211 pub affected_tests: Vec<&'a CodeNode>,
212}
213
214#[derive(Debug, Clone)]
216pub struct CausalChain {
217 pub symptom_node_id: String,
218 pub chain: Vec<ChainNode>,
219}
220
221#[derive(Debug, Clone)]
222pub struct ChainNode {
223 pub node_id: String,
224 pub node_name: String,
225 pub file_path: String,
226 pub line: Option<usize>,
227 pub edge_to_next: Option<String>,
228}
229
230#[derive(Debug, Clone, Copy, PartialEq, Eq)]
233pub enum Language {
234 Rust,
235 TypeScript,
236 Python,
237 Unknown,
238}
239
240impl Language {
241 pub fn from_path(path: &Path) -> Self {
242 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
243 match ext {
244 "rs" => Language::Rust,
245 "ts" | "tsx" => Language::TypeScript,
246 "js" | "jsx" => Language::TypeScript, "py" => Language::Python,
248 _ => Language::Unknown,
249 }
250 }
251}
252
253impl CodeGraph {
256 pub fn extract_cached(repo_dir: &Path, repo_name: &str, base_commit: &str) -> Self {
260 let cache_dir = repo_dir.parent().unwrap_or(repo_dir).join(".graph-cache");
261 let _ = std::fs::create_dir_all(&cache_dir);
262
263 let safe_repo = repo_name.replace('/', "__");
265 let short_commit = &base_commit[..base_commit.len().min(8)];
266 let cache_file = cache_dir.join(format!("{}__{}.json", safe_repo, short_commit));
267
268 if cache_file.exists() {
270 if let Ok(data) = std::fs::read_to_string(&cache_file) {
271 if let Ok(mut graph) = serde_json::from_str::<CodeGraph>(&data) {
272 graph.build_indexes();
273 tracing::info!(
274 "Loaded code graph from cache: {} ({} nodes, {} edges)",
275 cache_file.display(),
276 graph.nodes.len(),
277 graph.edges.len()
278 );
279 return graph;
280 }
281 }
282 let _ = std::fs::remove_file(&cache_file);
284 }
285
286 let graph = Self::extract_from_dir(repo_dir);
288
289 if let Ok(json) = serde_json::to_string(&graph) {
291 let _ = std::fs::write(&cache_file, json);
292 tracing::info!(
293 "Saved code graph to cache: {} ({} nodes, {} edges)",
294 cache_file.display(),
295 graph.nodes.len(),
296 graph.edges.len()
297 );
298 }
299
300 graph
301 }
302
303 pub fn extract_from_dir(dir: &Path) -> Self {
305 let mut nodes = Vec::new();
306 let mut edges = Vec::new();
307
308 let mut class_map: HashMap<String, String> = HashMap::new();
310 let mut func_map: HashMap<String, Vec<String>> = HashMap::new();
311 let mut module_map: HashMap<String, String> = HashMap::new();
312
313 let mut method_to_class: HashMap<String, String> = HashMap::new();
315 let mut class_methods: HashMap<String, Vec<String>> = HashMap::new();
316
317 let mut class_parents: HashMap<String, Vec<String>> = HashMap::new();
319
320 let mut file_imported_names: HashMap<String, HashSet<String>> = HashMap::new();
322
323 let mut file_entries: Vec<(String, String, Language)> = Vec::new();
325
326 for entry in WalkDir::new(dir)
327 .follow_links(false)
328 .max_depth(20)
329 .into_iter()
330 .filter_entry(|e| {
331 let name = e.file_name().to_str().unwrap_or("");
332 !name.starts_with('.')
333 && name != "node_modules"
334 && name != "__pycache__"
335 && name != "target"
336 && name != "build"
337 && name != "dist"
338 && name != ".git"
339 && name != ".eggs"
340 && name != ".tox"
341 })
342 {
343 let entry = match entry {
344 Ok(e) => e,
345 Err(_) => continue,
346 };
347
348 if !entry.file_type().is_file() {
349 continue;
350 }
351
352 let path = entry.path();
353 let lang = Language::from_path(path);
354 if lang == Language::Unknown {
355 continue;
356 }
357
358 let rel_path = path
359 .strip_prefix(dir)
360 .unwrap_or(path)
361 .to_string_lossy()
362 .to_string();
363
364 if rel_path == "setup.py" || rel_path == "conftest.py" || rel_path.contains("__pycache__") {
366 continue;
367 }
368
369 let content = match std::fs::read_to_string(path) {
370 Ok(c) => c,
371 Err(_) => continue,
372 };
373
374 let module_path = rel_path
376 .replace('/', ".")
377 .trim_end_matches(".py")
378 .trim_end_matches(".rs")
379 .trim_end_matches(".ts")
380 .trim_end_matches(".tsx")
381 .trim_end_matches(".js")
382 .trim_end_matches(".jsx")
383 .to_string();
384
385 let file_id = format!("file:{}", rel_path);
386 module_map.insert(module_path.clone(), file_id.clone());
387
388 let parts: Vec<&str> = module_path.split('.').collect();
390 for start in 1..parts.len() {
391 let partial = parts[start..].join(".");
392 module_map.entry(partial).or_insert_with(|| file_id.clone());
393 }
394
395 file_entries.push((rel_path, content, lang));
396 }
397
398 let mut parser = Parser::new();
400 let python_language = tree_sitter_python::LANGUAGE;
401 parser.set_language(&python_language.into()).ok();
402
403 for (rel_path, content, lang) in &file_entries {
404 let _file_id = format!("file:{}", rel_path);
405
406 let (file_nodes, file_edges, imports) = match lang {
407 Language::Python => {
408 extract_python_tree_sitter(
409 rel_path,
410 content,
411 &mut parser,
412 &mut class_map,
413 )
414 }
415 Language::Rust => {
416 extract_rust_tree_sitter(
417 rel_path,
418 content,
419 &mut parser,
420 &mut class_map,
421 )
422 }
423 Language::TypeScript => {
424 let ext = rel_path.rsplit('.').next().unwrap_or("ts");
425 extract_typescript_tree_sitter(
426 rel_path,
427 content,
428 &mut parser,
429 &mut class_map,
430 ext,
431 )
432 }
433 Language::Unknown => continue,
434 };
435
436 for node in &file_nodes {
438 if node.kind == NodeKind::Class {
439 class_map.insert(node.name.clone(), node.id.clone());
440 } else if node.kind == NodeKind::Function {
441 func_map
442 .entry(node.name.clone())
443 .or_default()
444 .push(node.id.clone());
445 }
446 }
447
448 for edge in &file_edges {
450 if edge.relation == EdgeRelation::DefinedIn {
451 if edge.from.starts_with("method:") && edge.to.starts_with("class:") {
452 method_to_class.insert(edge.from.clone(), edge.to.clone());
453 class_methods
454 .entry(edge.to.clone())
455 .or_default()
456 .push(edge.from.clone());
457 }
458 }
459 if edge.relation == EdgeRelation::Inherits {
460 if let Some(parent_id) = class_map.get(
461 edge.to.strip_prefix("class_ref:").unwrap_or(&edge.to),
462 ) {
463 class_parents
464 .entry(edge.from.clone())
465 .or_default()
466 .push(parent_id.clone());
467 }
468 }
469 }
470
471 if !imports.is_empty() {
473 file_imported_names.insert(rel_path.clone(), imports);
474 }
475
476 if !file_nodes.is_empty() {
478 nodes.push(CodeNode::new_file(rel_path));
479 }
480
481 nodes.extend(file_nodes);
482 edges.extend(file_edges);
483 }
484
485 let class_init_map: HashMap<String, Vec<(String, String)>> = {
487 let mut map: HashMap<String, Vec<(String, String)>> = HashMap::new();
488 for node in &nodes {
489 if node.kind == NodeKind::Function && node.name == "__init__" && !node.is_test {
490 if let Some(class_id) = method_to_class.get(&node.id) {
491 if let Some(class_name) = class_id.rsplit(':').next() {
492 map.entry(class_name.to_string())
493 .or_default()
494 .push((node.file_path.clone(), node.id.clone()));
495 }
496 }
497 }
498 }
499 map
500 };
501
502 let node_pkg_map: HashMap<String, String> = nodes
504 .iter()
505 .map(|n| {
506 let pkg = n.file_path.rsplitn(2, '/').nth(1).unwrap_or("").to_string();
507 (n.id.clone(), pkg)
508 })
509 .collect();
510
511 for (rel_path, content, lang) in &file_entries {
513 if *lang != Language::Python {
514 continue;
515 }
516
517 let file_func_ids: HashSet<String> = nodes
518 .iter()
519 .filter(|n| n.file_path == *rel_path && n.kind == NodeKind::Function)
520 .map(|n| n.id.clone())
521 .collect();
522
523 let package_dir = rel_path.rsplitn(2, '/').nth(1).unwrap_or("");
524
525 if let Some(tree) = parser.parse(content, None) {
526 let source = content.as_bytes();
527 let root = tree.root_node();
528
529 extract_calls_from_tree(
530 root,
531 source,
532 rel_path,
533 &func_map,
534 &method_to_class,
535 &class_parents,
536 &file_func_ids,
537 &file_imported_names,
538 package_dir,
539 &class_init_map,
540 &node_pkg_map,
541 &mut edges,
542 );
543 }
544
545 let is_test_file = rel_path.contains("/tests/") || rel_path.contains("/test_");
547 if is_test_file {
548 let file_id = format!("file:{}", rel_path);
549 let re_from_import = Regex::new(r"^from\s+([\w.]+)\s+import").unwrap();
550
551 for line in content.lines() {
552 if let Some(cap) = re_from_import.captures(line) {
553 let module = cap[1].to_string();
554 if let Some(source_file_id) = module_map.get(&module) {
555 edges.push(CodeEdge {
556 from: file_id.clone(),
557 to: source_file_id.clone(),
558 relation: EdgeRelation::TestsFor,
559 weight: 0.5,
560 call_count: 1,
561 in_error_path: false,
562 confidence: 1.0,
563 });
564 }
565 }
566 }
567 }
568 }
569
570 let mut resolved_edges = Vec::new();
572 for edge in edges {
573 if edge.to.starts_with("class_ref:") {
574 let class_name = &edge.to["class_ref:".len()..];
575 if let Some(class_id) = class_map.get(class_name) {
576 resolved_edges.push(CodeEdge {
577 from: edge.from,
578 to: class_id.clone(),
579 relation: edge.relation,
580 weight: edge.weight,
581 call_count: edge.call_count,
582 in_error_path: edge.in_error_path,
583 confidence: edge.confidence,
584 });
585 }
586 } else if edge.to.starts_with("module_ref:") {
587 let module = &edge.to["module_ref:".len()..];
588 if let Some(file_id) = module_map.get(module) {
589 resolved_edges.push(CodeEdge {
590 from: edge.from,
591 to: file_id.clone(),
592 relation: edge.relation,
593 weight: edge.weight,
594 call_count: edge.call_count,
595 in_error_path: edge.in_error_path,
596 confidence: edge.confidence,
597 });
598 }
599 } else if edge.to.starts_with("func_ref:") {
600 let func_name = &edge.to["func_ref:".len()..];
601 if let Some(func_ids) = func_map.get(func_name) {
602 if let Some(func_id) = func_ids.first() {
603 resolved_edges.push(CodeEdge {
604 from: edge.from,
605 to: func_id.clone(),
606 relation: edge.relation,
607 weight: edge.weight,
608 call_count: edge.call_count,
609 in_error_path: edge.in_error_path,
610 confidence: edge.confidence,
611 });
612 }
613 }
614 } else {
615 resolved_edges.push(edge);
616 }
617 }
618
619 let mut edge_map: HashMap<(String, String), CodeEdge> = HashMap::new();
621 let mut other_edges: Vec<CodeEdge> = Vec::new();
622
623 for edge in resolved_edges {
624 if edge.relation == EdgeRelation::Calls {
625 let key = (edge.from.clone(), edge.to.clone());
626 let entry = edge_map.entry(key).or_insert_with(|| {
627 let mut e = edge.clone();
628 e.call_count = 0;
629 e
630 });
631 entry.call_count += 1;
632 if edge.confidence > entry.confidence {
633 entry.confidence = edge.confidence;
634 }
635 if edge.in_error_path {
636 entry.in_error_path = true;
637 }
638 } else {
639 other_edges.push(edge);
640 }
641 }
642
643 let mut final_edges: Vec<CodeEdge> = edge_map.into_values().collect();
644 final_edges.extend(other_edges);
645
646 for edge in &mut final_edges {
648 edge.compute_weight();
649 }
650
651 add_override_edges(&nodes, &mut final_edges);
653
654 let mut graph = CodeGraph {
655 nodes,
656 edges: final_edges,
657 outgoing: HashMap::new(),
658 incoming: HashMap::new(),
659 node_index: HashMap::new(),
660 };
661 graph.build_indexes();
662 graph
663 }
664
665 pub fn build_indexes(&mut self) {
667 self.node_index.clear();
668 self.outgoing.clear();
669 self.incoming.clear();
670
671 for (i, node) in self.nodes.iter().enumerate() {
672 self.node_index.insert(node.id.clone(), i);
673 }
674
675 for (i, edge) in self.edges.iter().enumerate() {
676 self.outgoing.entry(edge.from.clone()).or_default().push(i);
677 self.incoming.entry(edge.to.clone()).or_default().push(i);
678 }
679 }
680
681 #[inline]
685 pub fn outgoing_edges(&self, node_id: &str) -> impl Iterator<Item = &CodeEdge> {
686 self.outgoing
687 .get(node_id)
688 .map(|indices| indices.as_slice())
689 .unwrap_or(&[])
690 .iter()
691 .map(move |&i| &self.edges[i])
692 }
693
694 #[inline]
696 pub fn incoming_edges(&self, node_id: &str) -> impl Iterator<Item = &CodeEdge> {
697 self.incoming
698 .get(node_id)
699 .map(|indices| indices.as_slice())
700 .unwrap_or(&[])
701 .iter()
702 .map(move |&i| &self.edges[i])
703 }
704
705 #[inline]
707 pub fn node_by_id(&self, node_id: &str) -> Option<&CodeNode> {
708 self.node_index.get(node_id).map(|&i| &self.nodes[i])
709 }
710
711 pub fn get_callers(&self, node_id: &str) -> Vec<&CodeNode> {
713 self.incoming_edges(node_id)
714 .filter(|e| e.relation == EdgeRelation::Calls)
715 .filter_map(|e| self.node_by_id(&e.from))
716 .collect()
717 }
718
719 pub fn get_callees(&self, node_id: &str) -> Vec<&CodeNode> {
721 self.outgoing_edges(node_id)
722 .filter(|e| e.relation == EdgeRelation::Calls)
723 .filter_map(|e| self.node_by_id(&e.to))
724 .collect()
725 }
726
727 pub fn get_dependencies(&self, node_id: &str) -> Vec<&CodeNode> {
729 self.outgoing_edges(node_id)
730 .filter_map(|e| self.node_by_id(&e.to))
731 .collect()
732 }
733
734 pub fn get_impact(&self, node_id: &str) -> Vec<&CodeNode> {
736 let mut impacted = Vec::new();
737 let mut visited = HashSet::new();
738 self.collect_dependents(node_id, &mut impacted, &mut visited);
739 impacted
740 }
741
742 fn collect_dependents<'a>(
743 &'a self,
744 node_id: &str,
745 result: &mut Vec<&'a CodeNode>,
746 visited: &mut HashSet<String>,
747 ) {
748 if !visited.insert(node_id.to_string()) {
749 return;
750 }
751
752 for edge in self.incoming_edges(node_id) {
753 if let Some(node) = self.node_by_id(&edge.from) {
754 result.push(node);
755 self.collect_dependents(&edge.from, result, visited);
756 }
757 }
758 }
759
760 pub fn find_relevant_nodes(&self, keywords: &[&str]) -> Vec<&CodeNode> {
762 let mut scored: Vec<(usize, &CodeNode)> = self
763 .nodes
764 .iter()
765 .map(|n| {
766 let score: usize = keywords
767 .iter()
768 .filter(|kw| {
769 let kw_lower = kw.to_lowercase();
770 let name_lower = n.name.to_lowercase();
771 let path_lower = n.file_path.to_lowercase();
772 name_lower.contains(&kw_lower)
773 || path_lower.contains(&kw_lower)
774 || (name_lower.len() >= 5
775 && kw_lower.contains(name_lower.trim_start_matches('_')))
776 })
777 .count();
778 (score, n)
779 })
780 .filter(|(score, _)| *score > 0)
781 .collect();
782
783 scored.sort_by(|a, b| b.0.cmp(&a.0));
784 let mut results: Vec<&CodeNode> = scored.into_iter().map(|(_, n)| n).collect();
785
786 let relevant_files: HashSet<String> = results.iter().map(|n| n.file_path.clone()).collect();
788
789 for node in &self.nodes {
790 if relevant_files.contains(&node.file_path) && !results.iter().any(|r| r.id == node.id) {
791 results.push(node);
792 }
793 }
794
795 let mut inheritance_additions: Vec<&CodeNode> = Vec::new();
797 let result_ids: HashSet<String> = results.iter().map(|n| n.id.clone()).collect();
798
799 for node in &results {
800 if node.kind == NodeKind::Class {
801 let chain = self.get_inheritance_chain(&node.id);
802 for ancestor_id in &chain {
803 if !result_ids.contains(ancestor_id) {
804 if let Some(ancestor) = self.node_by_id(ancestor_id) {
805 inheritance_additions.push(ancestor);
806 }
807 }
808 }
809 for edge in self.incoming_edges(&node.id) {
810 if edge.relation == EdgeRelation::Inherits && !result_ids.contains(&edge.from) {
811 if let Some(child) = self.node_by_id(&edge.from) {
812 inheritance_additions.push(child);
813 }
814 }
815 }
816 }
817 }
818
819 let mut extra_files: HashSet<String> = HashSet::new();
820 for node in &inheritance_additions {
821 if !results.iter().any(|r| r.id == node.id) {
822 extra_files.insert(node.file_path.clone());
823 results.push(node);
824 }
825 }
826 for node in &self.nodes {
827 if extra_files.contains(&node.file_path) && !results.iter().any(|r| r.id == node.id) {
828 results.push(node);
829 }
830 }
831
832 let mut import_additions: Vec<&CodeNode> = Vec::new();
835 let current_ids: HashSet<String> = results.iter().map(|n| n.id.clone()).collect();
836
837 for node in &results {
838 if node.kind == NodeKind::File {
839 for edge in self.outgoing_edges(&node.id) {
840 if edge.relation == EdgeRelation::Imports {
841 if !current_ids.contains(&edge.to) {
842 if let Some(imported) = self.node_by_id(&edge.to) {
843 import_additions.push(imported);
844 }
845 }
846 }
847 }
848 }
849 }
850
851 for node in &import_additions {
853 if node.kind == NodeKind::File {
854 let has_keyword_match = self
855 .nodes
856 .iter()
857 .filter(|n| n.file_path == node.file_path && n.kind != NodeKind::File)
858 .any(|n| {
859 let name_lower = n.name.to_lowercase();
860 keywords.iter().any(|kw| {
861 let kw_lower = kw.to_lowercase();
862 name_lower.contains(&kw_lower) || kw_lower.contains(&name_lower)
863 })
864 });
865 if has_keyword_match && !results.iter().any(|r| r.id == node.id) {
866 results.push(node);
867 for entity in &self.nodes {
869 if entity.file_path == node.file_path
870 && !results.iter().any(|r| r.id == entity.id)
871 {
872 results.push(entity);
873 }
874 }
875 }
876 }
877 }
878
879 results
880 }
881
882 pub fn impact_analysis(&self, changed_node_ids: &[&str]) -> ImpactReport<'_> {
884 let mut affected_nodes = Vec::new();
885 let mut affected_tests = Vec::new();
886 let mut seen = HashSet::new();
887
888 for node_id in changed_node_ids {
889 let impacted = self.get_impact(node_id);
890 for node in impacted {
891 if seen.insert(node.id.clone()) {
892 if node.file_path.contains("/tests/") || node.file_path.contains("/test_") {
893 affected_tests.push(node);
894 } else {
895 affected_nodes.push(node);
896 }
897 }
898 }
899 }
900
901 let related_tests = self.find_related_tests(changed_node_ids);
902 for test in related_tests {
903 if seen.insert(test.id.clone()) {
904 affected_tests.push(test);
905 }
906 }
907
908 ImpactReport {
909 affected_source: affected_nodes,
910 affected_tests,
911 }
912 }
913
914 pub fn find_related_tests(&self, source_node_ids: &[&str]) -> Vec<&CodeNode> {
916 let mut test_nodes = Vec::new();
917 let mut seen = HashSet::new();
918
919 let source_files: HashSet<String> = source_node_ids
920 .iter()
921 .filter_map(|id| self.node_by_id(id))
922 .map(|n| n.file_path.clone())
923 .collect();
924
925 let source_file_ids: HashSet<String> = source_files.iter().map(|f| format!("file:{}", f)).collect();
926
927 for source_fid in &source_file_ids {
929 for edge in self.incoming_edges(source_fid.as_str()) {
930 if edge.relation == EdgeRelation::TestsFor {
931 if let Some(test_node) = self.node_by_id(&edge.from) {
932 if seen.insert(test_node.id.clone()) {
933 test_nodes.push(test_node);
934 }
935 for node in &self.nodes {
936 if node.file_path == test_node.file_path
937 && node.kind != NodeKind::File
938 && seen.insert(node.id.clone())
939 {
940 test_nodes.push(node);
941 }
942 }
943 }
944 }
945 }
946 }
947
948 for source_id in source_node_ids.iter() {
950 for edge in self.incoming_edges(source_id) {
951 if edge.relation == EdgeRelation::Calls {
952 if let Some(caller) = self.node_by_id(&edge.from) {
953 if caller.file_path.contains("/tests/") || caller.file_path.contains("/test_") {
954 if seen.insert(caller.id.clone()) {
955 test_nodes.push(caller);
956 }
957 }
958 }
959 }
960 }
961 }
962
963 test_nodes
964 }
965
966 pub fn format_impact_for_llm(&self, changed_node_ids: &[&str], repo_dir: &Path) -> String {
968 let report = self.impact_analysis(changed_node_ids);
969 let mut result = String::new();
970
971 if !report.affected_source.is_empty() {
972 result.push_str("**⚠️ Impact Analysis — Code affected by your change:**\n");
973 for node in &report.affected_source {
974 let prefix = match node.kind {
975 NodeKind::File => "📄",
976 NodeKind::Class => "🔷",
977 NodeKind::Function => "🔹",
978 NodeKind::Module => "📦",
979 };
980 result.push_str(&format!("{} {} (`{}`)\n", prefix, node.name, node.file_path));
981 }
982 result.push('\n');
983 }
984
985 if !report.affected_tests.is_empty() {
986 result.push_str("**🧪 Tests that exercise the code you're changing:**\n");
987 result.push_str("DO NOT break these tests! Make minimal changes.\n\n");
988
989 let mut test_files: HashSet<String> = HashSet::new();
990 for node in &report.affected_tests {
991 test_files.insert(node.file_path.clone());
992 }
993
994 for test_file in &test_files {
995 result.push_str(&format!("📋 `{}`\n", test_file));
996 let funcs: Vec<&str> = report
997 .affected_tests
998 .iter()
999 .filter(|n| n.file_path == *test_file && n.kind == NodeKind::Function)
1000 .map(|n| n.name.as_str())
1001 .collect();
1002 if !funcs.is_empty() {
1003 for func in funcs.iter().take(10) {
1004 result.push_str(&format!(" - {}\n", func));
1005 }
1006 if funcs.len() > 10 {
1007 result.push_str(&format!(" ... and {} more\n", funcs.len() - 10));
1008 }
1009 }
1010 }
1011 result.push('\n');
1012
1013 let test_nodes_refs: Vec<&CodeNode> = report
1014 .affected_tests
1015 .iter()
1016 .filter(|n| n.kind == NodeKind::Function)
1017 .take(10)
1018 .copied()
1019 .collect();
1020
1021 if !test_nodes_refs.is_empty() {
1022 let test_snippets = self.extract_snippets(&test_nodes_refs, repo_dir, 30);
1023 if !test_snippets.is_empty() {
1024 result.push_str("**Key test code (DO NOT break these):**\n```python\n");
1025 for (node_id, snippet) in test_snippets.iter().take(5) {
1026 let name = self.node_name(node_id);
1027 result.push_str(&format!("# --- {} ---\n{}\n\n", name, snippet));
1028 }
1029 result.push_str("```\n");
1030 }
1031 }
1032 }
1033
1034 result
1035 }
1036
1037 pub fn trace_causal_chains_from_symptoms(
1039 &self,
1040 symptom_node_ids: &[&str],
1041 max_depth: usize,
1042 max_chains: usize,
1043 ) -> Vec<CausalChain> {
1044 #[derive(Clone)]
1045 struct WeightedPath {
1046 node_id: String,
1047 accumulated_weight: f32,
1048 chain: Vec<ChainNode>,
1049 }
1050
1051 impl PartialEq for WeightedPath {
1052 fn eq(&self, other: &Self) -> bool {
1053 self.accumulated_weight
1054 .total_cmp(&other.accumulated_weight)
1055 == std::cmp::Ordering::Equal
1056 }
1057 }
1058 impl Eq for WeightedPath {}
1059 impl PartialOrd for WeightedPath {
1060 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
1061 Some(self.cmp(other))
1062 }
1063 }
1064 impl Ord for WeightedPath {
1065 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
1066 self.accumulated_weight.total_cmp(&other.accumulated_weight)
1067 }
1068 }
1069
1070 let mut all_chains: Vec<CausalChain> = Vec::new();
1071
1072 for symptom_id in symptom_node_ids {
1073 let symptom_node = match self.node_by_id(symptom_id) {
1074 Some(n) => n,
1075 None => continue,
1076 };
1077
1078 {
1080 let mut heap: BinaryHeap<WeightedPath> = BinaryHeap::new();
1081 let mut visited = HashSet::new();
1082 visited.insert(symptom_id.to_string());
1083
1084 let start_chain_node = ChainNode {
1085 node_id: symptom_id.to_string(),
1086 node_name: symptom_node.name.clone(),
1087 file_path: symptom_node.file_path.clone(),
1088 line: symptom_node.line,
1089 edge_to_next: None,
1090 };
1091 heap.push(WeightedPath {
1092 node_id: symptom_id.to_string(),
1093 accumulated_weight: 1.0,
1094 chain: vec![start_chain_node],
1095 });
1096
1097 while let Some(current) = heap.pop() {
1098 if current.chain.len() > max_depth {
1099 continue;
1100 }
1101
1102 for edge in self.outgoing_edges(¤t.node_id) {
1103 let (target_id, edge_label) = match edge.relation {
1104 EdgeRelation::Calls => (&edge.to, "calls"),
1105 EdgeRelation::Inherits => (&edge.to, "inherits"),
1106 EdgeRelation::Imports => (&edge.to, "imports"),
1107 EdgeRelation::Overrides => (&edge.to, "overrides"),
1108 EdgeRelation::TestsFor => (&edge.to, "tests"),
1109 _ => continue,
1110 };
1111 if visited.contains(target_id) {
1112 continue;
1113 }
1114 if let Some(target_node) = self.node_by_id(target_id) {
1115 visited.insert(target_node.id.clone());
1116 let new_weight = current.accumulated_weight * edge.weight;
1117
1118 let mut new_chain = current.chain.clone();
1119 if let Some(last) = new_chain.last_mut() {
1120 last.edge_to_next = Some(edge_label.to_string());
1121 }
1122 new_chain.push(ChainNode {
1123 node_id: target_node.id.clone(),
1124 node_name: target_node.name.clone(),
1125 file_path: target_node.file_path.clone(),
1126 line: target_node.line,
1127 edge_to_next: None,
1128 });
1129
1130 if new_chain.len() >= 2 {
1131 all_chains.push(CausalChain {
1132 symptom_node_id: symptom_id.to_string(),
1133 chain: new_chain.clone(),
1134 });
1135 }
1136
1137 if new_chain.len() < max_depth {
1138 heap.push(WeightedPath {
1139 node_id: target_node.id.clone(),
1140 accumulated_weight: new_weight,
1141 chain: new_chain,
1142 });
1143 }
1144 }
1145 }
1146 }
1147 }
1148
1149 {
1151 let mut heap: BinaryHeap<WeightedPath> = BinaryHeap::new();
1152 let mut visited = HashSet::new();
1153 visited.insert(symptom_id.to_string());
1154
1155 let start_chain_node = ChainNode {
1156 node_id: symptom_id.to_string(),
1157 node_name: symptom_node.name.clone(),
1158 file_path: symptom_node.file_path.clone(),
1159 line: symptom_node.line,
1160 edge_to_next: None,
1161 };
1162 heap.push(WeightedPath {
1163 node_id: symptom_id.to_string(),
1164 accumulated_weight: 1.0,
1165 chain: vec![start_chain_node],
1166 });
1167
1168 while let Some(current) = heap.pop() {
1169 if current.chain.len() > max_depth {
1170 continue;
1171 }
1172
1173 for edge in self.incoming_edges(¤t.node_id) {
1174 if edge.relation != EdgeRelation::Calls
1175 && edge.relation != EdgeRelation::Imports
1176 && edge.relation != EdgeRelation::Overrides
1177 {
1178 continue;
1179 }
1180 if visited.contains(&edge.from) {
1181 continue;
1182 }
1183 if let Some(caller) = self.node_by_id(&edge.from) {
1184 if caller.file_path.contains("/tests/")
1185 || caller.file_path.contains("/test_")
1186 {
1187 continue;
1188 }
1189 visited.insert(caller.id.clone());
1190 let new_weight = current.accumulated_weight * edge.weight;
1191
1192 let edge_label = match edge.relation {
1193 EdgeRelation::Imports => "imported_by",
1194 EdgeRelation::Overrides => "overridden_by",
1195 _ => "called_by",
1196 };
1197 let mut new_chain = current.chain.clone();
1198 if let Some(last) = new_chain.last_mut() {
1199 last.edge_to_next = Some(edge_label.to_string());
1200 }
1201 new_chain.push(ChainNode {
1202 node_id: caller.id.clone(),
1203 node_name: caller.name.clone(),
1204 file_path: caller.file_path.clone(),
1205 line: caller.line,
1206 edge_to_next: None,
1207 });
1208
1209 if new_chain.len() >= 2 {
1210 all_chains.push(CausalChain {
1211 symptom_node_id: symptom_id.to_string(),
1212 chain: new_chain.clone(),
1213 });
1214 }
1215
1216 if new_chain.len() < max_depth {
1217 heap.push(WeightedPath {
1218 node_id: caller.id.clone(),
1219 accumulated_weight: new_weight,
1220 chain: new_chain,
1221 });
1222 }
1223 }
1224 }
1225 }
1226 }
1227 }
1228
1229 all_chains.sort_by(|a, b| {
1231 let len_cmp = a.chain.len().cmp(&b.chain.len());
1232 if len_cmp != std::cmp::Ordering::Equal {
1233 return len_cmp;
1234 }
1235 let a_source = a
1236 .chain
1237 .iter()
1238 .filter(|n| !n.file_path.contains("/tests/") && !n.file_path.contains("/test_"))
1239 .count();
1240 let b_source = b
1241 .chain
1242 .iter()
1243 .filter(|n| !n.file_path.contains("/tests/") && !n.file_path.contains("/test_"))
1244 .count();
1245 b_source.cmp(&a_source)
1246 });
1247
1248 let mut deduped: Vec<CausalChain> = Vec::new();
1249 for chain in &all_chains {
1250 let is_prefix = deduped.iter().any(|existing| {
1251 existing.chain.len() > chain.chain.len()
1252 && chain
1253 .chain
1254 .iter()
1255 .zip(existing.chain.iter())
1256 .all(|(a, b)| a.node_id == b.node_id)
1257 });
1258 if is_prefix {
1259 continue;
1260 }
1261 deduped.retain(|existing| {
1262 !(existing.chain.len() < chain.chain.len()
1263 && existing
1264 .chain
1265 .iter()
1266 .zip(chain.chain.iter())
1267 .all(|(a, b)| a.node_id == b.node_id))
1268 });
1269 deduped.push(chain.clone());
1270 }
1271
1272 deduped.truncate(max_chains);
1273 deduped
1274 }
1275
1276 pub fn trace_causal_chains(
1278 &self,
1279 changed_node_ids: &[&str],
1280 failed_p2p_tests: &[String],
1281 failed_f2p_tests: &[String],
1282 ) -> String {
1283 if failed_p2p_tests.is_empty() && failed_f2p_tests.is_empty() {
1284 return String::new();
1285 }
1286
1287 let mut result = String::new();
1288
1289 if !failed_p2p_tests.is_empty() {
1290 result.push_str("## 🚨 CAUSAL ANALYSIS — Why Your Fix Broke Existing Tests\n\n");
1291 result.push_str(
1292 "These tests PASSED before your change and now FAIL. You MUST fix these regressions.\n\n",
1293 );
1294
1295 for test_name in failed_p2p_tests {
1296 let short_name = test_name.split("::").last().unwrap_or(test_name);
1297 result.push_str(&format!("### ❌ REGRESSION: `{}`\n", short_name));
1298
1299 let test_node = self.nodes.iter().find(|n| {
1300 n.name == short_name
1301 || n.name.ends_with(short_name)
1302 || (n.file_path.contains("/test") && n.name == short_name)
1303 });
1304
1305 if let Some(test) = test_node {
1306 let chains = self.find_paths_to_test(changed_node_ids, &test.id);
1307
1308 if !chains.is_empty() {
1309 result.push_str("**Causal chain(s):**\n");
1310 for chain in chains.iter().take(3) {
1311 let chain_str: Vec<String> = chain
1312 .iter()
1313 .map(|id| {
1314 self.nodes
1315 .iter()
1316 .find(|n| n.id == *id)
1317 .map(|n| format!("`{}` ({})", n.name, n.file_path))
1318 .unwrap_or_else(|| id.to_string())
1319 })
1320 .collect();
1321 result.push_str(&format!(" 🔗 {}\n", chain_str.join(" → ")));
1322 }
1323 result.push_str("\n**What this means:** Your change propagated through the dependency chain above and broke this test.\n");
1324 result.push_str("**How to fix:** Make your change more surgical — ensure the modified function's behavior is backward-compatible for the callers in this chain.\n\n");
1325 } else {
1326 let changed_files: HashSet<String> = changed_node_ids
1328 .iter()
1329 .filter_map(|id| self.node_by_id(id))
1330 .map(|n| n.file_path.clone())
1331 .collect();
1332
1333 if changed_files
1334 .iter()
1335 .any(|f| test.file_path.contains(f.as_str()))
1336 || self.shares_import(&test.id, changed_node_ids)
1337 {
1338 result.push_str("**Connection:** Indirect — test imports or uses a module you changed.\n");
1339 result.push_str("**How to fix:** Check that your change doesn't alter the public API or default behavior of the module.\n\n");
1340 } else {
1341 result.push_str("**Connection:** Could not trace via graph (may be via dynamic dispatch, monkey-patching, or shared global state).\n");
1342 result.push_str("**How to fix:** Read the test's assertion error carefully — it will tell you what behavior changed.\n\n");
1343 }
1344 }
1345 } else {
1346 result.push_str(
1347 "**Note:** Test not found in code graph. Read the error output to understand what broke.\n\n",
1348 );
1349 }
1350 }
1351
1352 result.push_str("### 🎯 Overall Regression Fix Strategy\n");
1353 result.push_str(
1354 "1. **Don't change your approach** — your bug fix logic is likely correct\n",
1355 );
1356 result.push_str("2. **Narrow the scope** — guard your change with a condition so it only applies to the bug case\n");
1357 result.push_str("3. **Add backward compatibility** — if you changed a return type/value, ensure callers still get what they expect\n");
1358 result.push_str("4. **Check default parameters** — if you changed defaults, existing callers rely on the old defaults\n\n");
1359 }
1360
1361 if !failed_f2p_tests.is_empty() {
1362 result.push_str("## ⚠️ Original Bug Not Fixed\n");
1363 result.push_str("These tests still fail — your fix is incomplete or incorrect:\n");
1364 for test_name in failed_f2p_tests {
1365 let short_name = test_name.split("::").last().unwrap_or(test_name);
1366 result.push_str(&format!("- `{}`\n", short_name));
1367 }
1368 result.push('\n');
1369 }
1370
1371 result
1372 }
1373
1374 fn find_paths_to_test(&self, changed_node_ids: &[&str], test_node_id: &str) -> Vec<Vec<String>> {
1375 let mut paths = Vec::new();
1376
1377 for changed_id in changed_node_ids {
1378 if let Some(path) = self.bfs_path(test_node_id, changed_id, 5) {
1379 let mut p = path;
1380 p.reverse();
1381 paths.push(p);
1382 }
1383 }
1384
1385 paths
1386 }
1387
1388 pub fn bfs_path(&self, from: &str, to: &str, max_depth: usize) -> Option<Vec<String>> {
1390 let mut queue: VecDeque<(String, Vec<String>)> = VecDeque::new();
1391 let mut visited = HashSet::new();
1392
1393 queue.push_back((from.to_string(), vec![from.to_string()]));
1394 visited.insert(from.to_string());
1395
1396 while let Some((current, path)) = queue.pop_front() {
1397 if path.len() > max_depth {
1398 continue;
1399 }
1400
1401 for edge in self.outgoing_edges(¤t) {
1402 if edge.to == to {
1403 let mut final_path = path.clone();
1404 final_path.push(edge.to.clone());
1405 return Some(final_path);
1406 }
1407 if !visited.contains(&edge.to) {
1408 visited.insert(edge.to.clone());
1409 let mut new_path = path.clone();
1410 new_path.push(edge.to.clone());
1411 queue.push_back((edge.to.clone(), new_path));
1412 }
1413 }
1414 }
1415 None
1416 }
1417
1418 pub fn get_node_summary(&self, node_id: &str, repo_dir: &Path) -> String {
1420 let node = match self.node_by_id(node_id) {
1421 Some(n) => n,
1422 None => return format!("[unknown node: {}]", node_id),
1423 };
1424
1425 let mut result = format!(
1426 "{} ({}:{})",
1427 node.name,
1428 node.file_path,
1429 node.line.map(|l| l.to_string()).unwrap_or_else(|| "?".to_string()),
1430 );
1431
1432 let full_path = repo_dir.join(&node.file_path);
1433 if let Ok(content) = std::fs::read_to_string(&full_path) {
1434 let lines: Vec<&str> = content.lines().collect();
1435 if let Some(start_line) = node.line {
1436 if start_line > 0 && start_line <= lines.len() {
1437 let start_idx = start_line - 1;
1438 let end_idx = (start_idx + 15).min(lines.len());
1439 let preview: String = lines[start_idx..end_idx]
1440 .iter()
1441 .map(|l| *l)
1442 .collect::<Vec<_>>()
1443 .join("\n");
1444 result.push('\n');
1445 result.push_str(&preview);
1446 }
1447 }
1448 }
1449
1450 result
1451 }
1452
1453 pub fn extract_snippets(
1455 &self,
1456 nodes: &[&CodeNode],
1457 repo_dir: &Path,
1458 max_lines: usize,
1459 ) -> HashMap<String, String> {
1460 let mut snippets = HashMap::new();
1461 let mut file_cache: HashMap<String, Vec<String>> = HashMap::new();
1462
1463 for node in nodes {
1464 if node.kind == NodeKind::File {
1465 continue;
1466 }
1467
1468 let file_path = repo_dir.join(&node.file_path);
1469 let lines = file_cache.entry(node.file_path.clone()).or_insert_with(|| {
1470 std::fs::read_to_string(&file_path)
1471 .unwrap_or_default()
1472 .lines()
1473 .map(|l| l.to_string())
1474 .collect()
1475 });
1476
1477 if let Some(start_line) = node.line {
1478 if start_line == 0 || start_line > lines.len() {
1479 continue;
1480 }
1481 let start_idx = start_line - 1;
1482
1483 let base_indent = lines[start_idx]
1484 .chars()
1485 .take_while(|c| c.is_whitespace())
1486 .count();
1487
1488 let mut end_idx = start_idx + 1;
1489 while end_idx < lines.len() && end_idx < start_idx + max_lines {
1490 let line = &lines[end_idx];
1491 if line.trim().is_empty() {
1492 end_idx += 1;
1493 continue;
1494 }
1495 let indent = line.chars().take_while(|c| c.is_whitespace()).count();
1496 if indent <= base_indent && !line.trim().is_empty() {
1497 break;
1498 }
1499 end_idx += 1;
1500 }
1501
1502 let snippet: String = lines[start_idx..end_idx.min(lines.len())]
1503 .iter()
1504 .map(|l| l.as_str())
1505 .collect::<Vec<_>>()
1506 .join("\n");
1507
1508 if !snippet.trim().is_empty() {
1509 snippets.insert(node.id.clone(), snippet);
1510 }
1511 }
1512 }
1513
1514 snippets
1515 }
1516
1517 pub fn format_for_llm(&self, keywords: &[&str], max_chars: usize) -> String {
1519 let relevant = self.find_relevant_nodes(keywords);
1520
1521 if relevant.is_empty() {
1522 return self.format_file_summary(max_chars);
1523 }
1524
1525 let mut result = String::from("**Code structure (relevant to issue):**\n");
1526
1527 result.push_str("\nRelevant files/classes/functions:\n");
1528 let relevant_ids: HashSet<&str> = relevant.iter().map(|n| n.id.as_str()).collect();
1529
1530 for node in relevant.iter().take(20) {
1531 let prefix = match node.kind {
1532 NodeKind::File => "📄",
1533 NodeKind::Class => "🔷",
1534 NodeKind::Function => "🔹",
1535 NodeKind::Module => "📦",
1536 };
1537 let line_info = node.line.map(|l| format!(" (line {})", l)).unwrap_or_default();
1538 result.push_str(&format!(
1539 "{} {} — `{}`{}\n",
1540 prefix, node.name, node.file_path, line_info
1541 ));
1542
1543 if result.len() > max_chars / 2 {
1544 break;
1545 }
1546 }
1547
1548 let relevant_edges: Vec<&CodeEdge> = self
1549 .edges
1550 .iter()
1551 .filter(|e| {
1552 relevant_ids.contains(e.from.as_str()) || relevant_ids.contains(e.to.as_str())
1553 })
1554 .filter(|e| e.relation != EdgeRelation::DefinedIn)
1555 .collect();
1556
1557 if !relevant_edges.is_empty() {
1558 result.push_str("\nRelationships:\n");
1559 for edge in relevant_edges.iter().take(15) {
1560 let from_name = self.node_name(&edge.from);
1561 let to_name = self.node_name(&edge.to);
1562 result.push_str(&format!(
1563 " {} --[{}]--> {}\n",
1564 from_name, edge.relation, to_name
1565 ));
1566
1567 if result.len() > max_chars {
1568 break;
1569 }
1570 }
1571 }
1572
1573 let relevant_classes: Vec<&&CodeNode> = relevant
1574 .iter()
1575 .filter(|n| n.kind == NodeKind::Class)
1576 .collect();
1577
1578 if !relevant_classes.is_empty() {
1579 result.push_str("\nInheritance:\n");
1580 for cls in relevant_classes.iter().take(5) {
1581 let chain = self.get_inheritance_chain(&cls.id);
1582 if chain.len() > 1 {
1583 let names: Vec<String> =
1584 chain.iter().map(|id| self.node_name(id)).collect();
1585 result.push_str(&format!(" {} \n", names.join(" → ")));
1586 }
1587 }
1588 }
1589
1590 let file_count = self.nodes.iter().filter(|n| n.kind == NodeKind::File).count();
1591 let class_count = self.nodes.iter().filter(|n| n.kind == NodeKind::Class).count();
1592 let import_count = self
1593 .edges
1594 .iter()
1595 .filter(|e| e.relation == EdgeRelation::Imports)
1596 .count();
1597 let inherit_count = self
1598 .edges
1599 .iter()
1600 .filter(|e| e.relation == EdgeRelation::Inherits)
1601 .count();
1602
1603 result.push_str(&format!(
1604 "\nGraph: {} files, {} classes, {} imports, {} inheritance edges\n",
1605 file_count, class_count, import_count, inherit_count
1606 ));
1607
1608 if result.len() > max_chars {
1609 result.truncate(max_chars);
1610 result.push_str("\n...[truncated]\n");
1611 }
1612
1613 result
1614 }
1615
1616 fn format_file_summary(&self, max_chars: usize) -> String {
1617 let mut result = String::from("**Repository files:**\n");
1618
1619 let files: Vec<&CodeNode> = self
1620 .nodes
1621 .iter()
1622 .filter(|n| n.kind == NodeKind::File)
1623 .collect();
1624
1625 for file in &files {
1626 let classes: Vec<String> = self
1627 .nodes
1628 .iter()
1629 .filter(|n| n.kind == NodeKind::Class && n.file_path == file.file_path)
1630 .map(|n| n.name.clone())
1631 .collect();
1632
1633 let mut line = format!("- `{}`", file.file_path);
1634 if !classes.is_empty() {
1635 line.push_str(&format!(" — {}", classes.join(", ")));
1636 }
1637 line.push('\n');
1638
1639 if result.len() + line.len() > max_chars {
1640 result.push_str(&format!("... and {} more files\n", files.len()));
1641 break;
1642 }
1643 result.push_str(&line);
1644 }
1645
1646 result
1647 }
1648
1649 fn node_name(&self, id: &str) -> String {
1650 self.nodes
1651 .iter()
1652 .find(|n| n.id == id)
1653 .map(|n| n.name.clone())
1654 .unwrap_or_else(|| id.to_string())
1655 }
1656
1657 fn get_inheritance_chain(&self, class_id: &str) -> Vec<String> {
1658 let mut chain = vec![class_id.to_string()];
1659 let mut current = class_id.to_string();
1660
1661 for _ in 0..10 {
1662 let parent = self
1663 .edges
1664 .iter()
1665 .find(|e| e.from == current && e.relation == EdgeRelation::Inherits);
1666 match parent {
1667 Some(edge) => {
1668 chain.push(edge.to.clone());
1669 current = edge.to.clone();
1670 }
1671 None => break,
1672 }
1673 }
1674
1675 chain
1676 }
1677
1678 fn shares_import(&self, test_node_id: &str, changed_node_ids: &[&str]) -> bool {
1681 let test_imports: HashSet<String> = self
1682 .edges
1683 .iter()
1684 .filter(|e| e.from == test_node_id && e.relation == EdgeRelation::Imports)
1685 .map(|e| e.to.clone())
1686 .collect();
1687
1688 let changed_files: HashSet<String> = changed_node_ids
1689 .iter()
1690 .filter_map(|id| self.node_by_id(id))
1691 .flat_map(|n| {
1692 let file_id = format!("file:{}", n.file_path);
1693 vec![n.id.clone(), file_id]
1694 })
1695 .collect();
1696
1697 test_imports.intersection(&changed_files).next().is_some()
1698 }
1699
1700 pub fn grep_for_identifiers(&self, repo_dir: &Path, identifiers: &[&str]) -> Vec<CodeNode> {
1702 let mut found_nodes = Vec::new();
1703 let existing_names: HashSet<String> = self.nodes.iter().map(|n| n.name.clone()).collect();
1704
1705 for ident in identifiers {
1706 if existing_names.contains(*ident) {
1707 continue;
1708 }
1709
1710 let patterns = [
1711 format!("class {}[:(]", ident),
1712 format!("def {}[(]", ident),
1713 format!("class {}\\b", ident),
1714 ];
1715
1716 for pattern in &patterns {
1717 if let Ok(output) = std::process::Command::new("grep")
1718 .args(["-rn", pattern, "--include=*.py", "-l"])
1719 .current_dir(repo_dir)
1720 .output()
1721 {
1722 let stdout = String::from_utf8_lossy(&output.stdout);
1723 for file_path in stdout.lines().take(3) {
1724 let file_path = file_path.trim();
1725 if file_path.is_empty()
1726 || file_path.contains("/tests/")
1727 || file_path.contains("/test_")
1728 {
1729 continue;
1730 }
1731
1732 if let Ok(line_output) = std::process::Command::new("grep")
1733 .args(["-n", pattern, file_path])
1734 .current_dir(repo_dir)
1735 .output()
1736 {
1737 let line_stdout = String::from_utf8_lossy(&line_output.stdout);
1738 if let Some(first_line) = line_stdout.lines().next() {
1739 let line_num: usize = first_line
1740 .split(':')
1741 .next()
1742 .unwrap_or("0")
1743 .parse()
1744 .unwrap_or(0);
1745
1746 let is_class = first_line.contains("class ");
1747 found_nodes.push(CodeNode {
1748 id: format!("grep:{}:{}", file_path, ident),
1749 kind: if is_class {
1750 NodeKind::Class
1751 } else {
1752 NodeKind::Function
1753 },
1754 name: ident.to_string(),
1755 file_path: file_path.to_string(),
1756 line: if line_num > 0 { Some(line_num) } else { None },
1757 decorators: Vec::new(),
1758 signature: None,
1759 docstring: None,
1760 line_count: 0,
1761 is_test: false,
1762 });
1763 break;
1764 }
1765 }
1766 }
1767 }
1768 if found_nodes.iter().any(|n| n.name == *ident) {
1769 break;
1770 }
1771 }
1772 }
1773
1774 found_nodes
1775 }
1776
1777 pub fn extract_keywords(problem_statement: &str) -> Vec<&str> {
1779 let mut keywords = Vec::new();
1780
1781 for word in
1782 problem_statement.split(|c: char| !c.is_alphanumeric() && c != '_' && c != '.')
1783 {
1784 let trimmed = word.trim();
1785 if trimmed.len() < 3 {
1786 continue;
1787 }
1788 let lower = trimmed.to_lowercase();
1789 if [
1790 "the", "and", "for", "that", "this", "with", "from", "not", "but", "are", "was",
1791 "has", "have", "can", "should", "would", "when", "what", "how", "does", "bug",
1792 "fix", "issue", "error", "problem", "description",
1793 ]
1794 .contains(&lower.as_str())
1795 {
1796 continue;
1797 }
1798 if trimmed.contains('_')
1799 || trimmed.contains('.')
1800 || trimmed.chars().any(|c| c.is_uppercase())
1801 || trimmed.ends_with(".py")
1802 {
1803 keywords.push(trimmed);
1804 }
1805 }
1806
1807 keywords.dedup();
1808 keywords.truncate(20);
1809 keywords
1810 }
1811
1812 pub fn has_node(&self, file_path: &str, name: &str) -> bool {
1814 let needle = file_path.strip_prefix("./").unwrap_or(file_path);
1815 self.nodes.iter().any(|n| {
1816 let hay = n.file_path.strip_prefix("./").unwrap_or(&n.file_path);
1817 hay == needle && n.name == name
1818 })
1819 }
1820
1821 pub fn find_node(&self, file_path: &str, name: &str) -> Option<&CodeNode> {
1823 let needle = file_path.strip_prefix("./").unwrap_or(file_path);
1824 self.nodes.iter().find(|n| {
1825 let hay = n.file_path.strip_prefix("./").unwrap_or(&n.file_path);
1826 hay == needle && n.name == name
1827 })
1828 }
1829
1830 pub fn add_file_nodes(
1832 &mut self,
1833 repo_dir: &Path,
1834 file_path: &Path,
1835 target_names: Option<&[String]>,
1836 ) -> anyhow::Result<()> {
1837 use anyhow::Context;
1838
1839 let full_path = repo_dir.join(file_path);
1840 if !full_path.exists() {
1841 anyhow::bail!("File not found: {:?}", full_path);
1842 }
1843
1844 let source = std::fs::read_to_string(&full_path)
1845 .context(format!("Failed to read {:?}", full_path))?;
1846
1847 let mut parser = Parser::new();
1848 let language = tree_sitter_python::LANGUAGE;
1849 parser
1850 .set_language(&language.into())
1851 .context("Failed to set Python language")?;
1852
1853 let tree = parser
1854 .parse(&source, None)
1855 .context("Failed to parse Python file")?;
1856
1857 let file_path_str = file_path.to_string_lossy().to_string();
1858
1859 let root = tree.root_node();
1860
1861 fn extract_from_node(
1862 node: tree_sitter::Node,
1863 source: &str,
1864 file_path: &str,
1865 nodes: &mut Vec<CodeNode>,
1866 target_names: Option<&[String]>,
1867 ) {
1868 if node.kind() == "function_definition" {
1869 if let Some(name_node) = node.child_by_field_name("name") {
1870 let name = &source[name_node.byte_range()];
1871 let matched =
1872 target_names.map_or(true, |targets| targets.iter().any(|t| t == name));
1873 if matched {
1874 let line = name_node.start_position().row + 1;
1875 let id = format!("func:{}:{}", file_path, name);
1876 nodes.push(CodeNode {
1877 id,
1878 kind: NodeKind::Function,
1879 name: name.to_string(),
1880 file_path: file_path.to_string(),
1881 line: Some(line),
1882 decorators: vec![],
1883 signature: None,
1884 docstring: None,
1885 line_count: 0,
1886 is_test: false,
1887 });
1888 }
1889 }
1890 } else if node.kind() == "class_definition" {
1891 if let Some(name_node) = node.child_by_field_name("name") {
1892 let name = &source[name_node.byte_range()];
1893 let matched =
1894 target_names.map_or(true, |targets| targets.iter().any(|t| t == name));
1895 if matched {
1896 let line = name_node.start_position().row + 1;
1897 let id = format!("class:{}:{}", file_path, name);
1898 nodes.push(CodeNode {
1899 id,
1900 kind: NodeKind::Class,
1901 name: name.to_string(),
1902 file_path: file_path.to_string(),
1903 line: Some(line),
1904 decorators: vec![],
1905 signature: None,
1906 docstring: None,
1907 line_count: 0,
1908 is_test: false,
1909 });
1910 }
1911 }
1912 }
1913
1914 for child in node.children(&mut node.walk()) {
1915 extract_from_node(child, source, file_path, nodes, target_names);
1916 }
1917 }
1918
1919 extract_from_node(root, &source, &file_path_str, &mut self.nodes, target_names);
1920 self.build_indexes();
1921
1922 Ok(())
1923 }
1924
1925 pub fn get_schema(&self) -> String {
1927 let node_kinds: HashSet<&str> = self.nodes.iter().map(|n| match n.kind {
1928 NodeKind::File => "File",
1929 NodeKind::Class => "Class",
1930 NodeKind::Function => "Function",
1931 NodeKind::Module => "Module",
1932 }).collect();
1933
1934 let edge_relations: HashSet<&str> = self.edges.iter().map(|e| match e.relation {
1935 EdgeRelation::Imports => "imports",
1936 EdgeRelation::Inherits => "inherits",
1937 EdgeRelation::DefinedIn => "defined_in",
1938 EdgeRelation::Calls => "calls",
1939 EdgeRelation::TestsFor => "tests_for",
1940 EdgeRelation::Overrides => "overrides",
1941 }).collect();
1942
1943 format!(
1944 "Schema:\n Node kinds: {:?}\n Edge relations: {:?}\n Total nodes: {}\n Total edges: {}",
1945 node_kinds,
1946 edge_relations,
1947 self.nodes.len(),
1948 self.edges.len()
1949 )
1950 }
1951
1952 pub fn get_file_summary(&self, file_path: &str) -> String {
1954 let file_nodes: Vec<&CodeNode> = self.nodes.iter()
1955 .filter(|n| n.file_path == file_path)
1956 .collect();
1957
1958 if file_nodes.is_empty() {
1959 return format!("No nodes found for file: {}", file_path);
1960 }
1961
1962 let classes: Vec<&str> = file_nodes.iter()
1963 .filter(|n| n.kind == NodeKind::Class)
1964 .map(|n| n.name.as_str())
1965 .collect();
1966
1967 let functions: Vec<&str> = file_nodes.iter()
1968 .filter(|n| n.kind == NodeKind::Function)
1969 .map(|n| n.name.as_str())
1970 .collect();
1971
1972 format!(
1973 "File: {}\n Classes ({}): {}\n Functions ({}): {}",
1974 file_path,
1975 classes.len(),
1976 classes.join(", "),
1977 functions.len(),
1978 functions.join(", ")
1979 )
1980 }
1981
1982 pub fn analyze_test_failures(
1987 &self,
1988 changed_node_ids: &[&str],
1989 failed_test_names: &[String],
1990 _repo_dir: &Path,
1991 ) -> String {
1992 let mut analysis = String::new();
1993 analysis.push_str("## 🔍 Graph-based Failure Analysis\n\n");
1994
1995 let changed_names: Vec<String> = changed_node_ids.iter()
1997 .filter_map(|id| self.node_by_id(id))
1998 .map(|n| n.name.clone())
1999 .collect();
2000
2001 let changed_files: HashSet<String> = changed_node_ids.iter()
2002 .filter_map(|id| self.node_by_id(id))
2003 .map(|n| n.file_path.clone())
2004 .collect();
2005
2006 for test_name in failed_test_names {
2008 let short_name = test_name.split("::").last().unwrap_or(test_name);
2011
2012 let test_node = self.nodes.iter().find(|n| {
2014 n.name == short_name
2015 || n.name.ends_with(short_name)
2016 || (n.file_path.contains("/test") && n.name == short_name)
2017 });
2018
2019 analysis.push_str(&format!("### ❌ {}\n", short_name));
2020
2021 if let Some(test) = test_node {
2022 let callees = self.get_callees(&test.id);
2024 let mut found_connection = false;
2025
2026 for callee in &callees {
2027 if changed_node_ids.contains(&callee.id.as_str())
2028 || changed_names.contains(&callee.name)
2029 {
2030 analysis.push_str(&format!(
2031 "**Direct call chain:** `{}` → `{}` (YOU CHANGED THIS)\n",
2032 short_name, callee.name
2033 ));
2034 found_connection = true;
2035
2036 let other_callers = self.get_callers(&callee.id);
2038 let other_caller_names: Vec<&str> = other_callers.iter()
2039 .filter(|c| c.id != test.id)
2040 .map(|c| c.name.as_str())
2041 .take(5)
2042 .collect();
2043 if !other_caller_names.is_empty() {
2044 analysis.push_str(&format!(
2045 "**Other callers of `{}`:** {}\n",
2046 callee.name,
2047 other_caller_names.join(", ")
2048 ));
2049 }
2050 }
2051 }
2052
2053 if !found_connection {
2055 for callee in &callees {
2056 let sub_callees = self.get_callees(&callee.id);
2057 for sub in &sub_callees {
2058 if changed_node_ids.contains(&sub.id.as_str())
2059 || changed_names.contains(&sub.name)
2060 {
2061 analysis.push_str(&format!(
2062 "**Indirect chain:** `{}` → `{}` → `{}` (YOU CHANGED THIS)\n",
2063 short_name, callee.name, sub.name
2064 ));
2065 found_connection = true;
2066 break;
2067 }
2068 }
2069 if found_connection { break; }
2070 }
2071 }
2072
2073 if !found_connection {
2075 let test_file = &test.file_path;
2076 let test_file_id = format!("file:{}", test_file);
2077
2078 for edge in self.outgoing_edges(&test_file_id) {
2079 if edge.relation == EdgeRelation::TestsFor {
2080 if let Some(target) = self.node_by_id(&edge.to) {
2081 if changed_files.contains(&target.file_path) {
2082 analysis.push_str(&format!(
2083 "**File-level connection:** test file `{}` tests `{}` which you modified\n",
2084 test_file, target.file_path
2085 ));
2086 found_connection = true;
2087 break;
2088 }
2089 }
2090 }
2091 }
2092 }
2093
2094 if !found_connection {
2095 analysis.push_str("**Connection:** Could not trace via graph (may be indirect import)\n");
2096 }
2097 } else {
2098 analysis.push_str("**Note:** Test not found in code graph\n");
2099 }
2100 analysis.push('\n');
2101 }
2102
2103 if !changed_names.is_empty() {
2105 analysis.push_str("### Summary\n");
2106 analysis.push_str(&format!("**You changed:** {}\n", changed_names.join(", ")));
2107
2108 let total_callers: usize = changed_node_ids.iter()
2109 .map(|id| self.get_callers(id).len())
2110 .sum();
2111 analysis.push_str(&format!(
2112 "**Total callers of changed code:** {}\n",
2113 total_callers
2114 ));
2115 analysis.push_str("**Repair strategy:** Keep the fix but make it backward-compatible with all callers.\n");
2116 }
2117
2118 analysis
2119 }
2120
2121 pub fn find_symptom_nodes(&self, problem_statement: &str, test_names: &str) -> Vec<&CodeNode> {
2127 let mut result: Vec<&CodeNode> = Vec::new();
2128 let mut seen = HashSet::new();
2129
2130 let test_list: Vec<String> = serde_json::from_str(test_names)
2132 .unwrap_or_else(|_| {
2133 test_names.lines()
2134 .map(|s| s.trim().to_string())
2135 .filter(|s| !s.is_empty())
2136 .collect()
2137 });
2138
2139 for test_id in &test_list {
2140 let short_name = if test_id.contains("::") {
2144 test_id.split("::").last().unwrap_or(test_id)
2145 } else if test_id.contains(" (") {
2146 test_id.split(" (").next().unwrap_or(test_id).trim()
2147 } else {
2148 test_id.as_str()
2149 };
2150
2151 for node in &self.nodes {
2153 if node.kind == NodeKind::Function
2154 && (node.name == short_name || node.name.ends_with(short_name))
2155 && (node.file_path.contains("/tests/")
2156 || node.file_path.contains("/test_")
2157 || node.name.starts_with("test_"))
2158 {
2159 if seen.insert(node.id.clone()) {
2160 result.push(node);
2161 }
2162 }
2163 }
2164 }
2165
2166 for line in problem_statement.lines() {
2168 let trimmed = line.trim();
2169
2170 if trimmed.contains(", in ") {
2172 if let Some(func_part) = trimmed.rsplit(", in ").next() {
2173 let func_name = func_part.trim().trim_start_matches('<').trim_end_matches('>');
2174 if func_name.len() >= 3 && func_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
2175 for node in &self.nodes {
2176 if node.name == func_name && node.kind == NodeKind::Function {
2177 if seen.insert(node.id.clone()) {
2178 result.push(node);
2179 }
2180 }
2181 }
2182 }
2183 }
2184 }
2185
2186 for quote in &['\'', '"', '`'] {
2188 let parts: Vec<&str> = trimmed.split(*quote).collect();
2189 for i in (1..parts.len()).step_by(2) {
2190 let word = parts[i].trim();
2191 if word.len() >= 3
2192 && word.len() <= 60
2193 && word.chars().all(|c| c.is_alphanumeric() || c == '_')
2194 {
2195 for node in &self.nodes {
2196 if node.name == word && (node.kind == NodeKind::Function || node.kind == NodeKind::Class) {
2197 if seen.insert(node.id.clone()) {
2198 result.push(node);
2199 }
2200 }
2201 }
2202 }
2203 }
2204 }
2205 }
2206
2207 for word in problem_statement.split(|c: char| c.is_whitespace() || c == ',' || c == '(' || c == ')' || c == '\'' || c == '"' || c == '`') {
2209 let word = word.trim_matches(|c: char| c == '.' || c == ':' || c == ';');
2210 if word.len() < 4 { continue; }
2211 let has_upper = word.chars().filter(|c| c.is_uppercase()).count() >= 2;
2212 let has_lower = word.chars().any(|c| c.is_lowercase());
2213 let is_ident = word.chars().all(|c| c.is_alphanumeric() || c == '_');
2214 if has_upper && has_lower && is_ident {
2215 for node in &self.nodes {
2216 if node.name == word && node.kind == NodeKind::Class {
2217 if seen.insert(node.id.clone()) {
2218 result.push(node);
2219 }
2220 }
2221 }
2222 }
2223 }
2224
2225 if result.is_empty() {
2227 for test_id in &test_list {
2228 let short_name = if test_id.contains("::") {
2229 test_id.split("::").last().unwrap_or(test_id)
2230 } else if test_id.contains(" (") {
2231 test_id.split(" (").next().unwrap_or(test_id).trim()
2232 } else {
2233 test_id.as_str()
2234 };
2235
2236 let kws: Vec<&str> = short_name.split('_')
2238 .filter(|w| w.len() >= 3 && *w != "test" && *w != "tests")
2239 .collect();
2240 if kws.is_empty() { continue; }
2241
2242 for node in &self.nodes {
2244 if node.file_path.contains("/tests/") || node.file_path.contains("/test_") {
2245 continue;
2246 }
2247 let name_lower = node.name.to_lowercase();
2248 let match_count = kws.iter()
2249 .filter(|kw| name_lower.contains(&kw.to_lowercase()))
2250 .count();
2251 if match_count >= 2 || (match_count >= 1 && kws.len() == 1) {
2252 if seen.insert(node.id.clone()) {
2253 result.push(node);
2254 }
2255 }
2256 }
2257
2258 if test_id.contains(" (") {
2261 let class_part = test_id
2262 .split(" (")
2263 .nth(1)
2264 .unwrap_or("")
2265 .trim_end_matches(')');
2266 let class_name = class_part.rsplit('.').next().unwrap_or("");
2267 if !class_name.is_empty() {
2268 for node in &self.nodes {
2269 if node.kind == NodeKind::Class && node.name == class_name {
2270 let file_id = format!("file:{}", node.file_path);
2271 for edge in self.outgoing_edges(&file_id) {
2272 if edge.relation == EdgeRelation::TestsFor {
2273 if let Some(target) = self.node_by_id(&edge.to) {
2274 if target.kind != NodeKind::File {
2275 if seen.insert(target.id.clone()) {
2276 result.push(target);
2277 }
2278 }
2279 }
2280 for src_node in &self.nodes {
2281 if format!("file:{}", src_node.file_path) == edge.to
2282 && src_node.kind != NodeKind::File
2283 {
2284 if seen.insert(src_node.id.clone()) {
2285 result.push(src_node);
2286 }
2287 }
2288 }
2289 }
2290 }
2291 }
2292 }
2293 }
2294 }
2295 }
2296 }
2297
2298 result
2299 }
2300
2301 pub fn build_unified_graph(
2304 &self,
2305 relevant_nodes: &[&CodeNode],
2306 snippets: &HashMap<String, String>,
2307 issue_id: &str,
2308 issue_description: &str,
2309 ) -> UnifiedGraphResult {
2310 let relevant_ids: HashSet<&str> = relevant_nodes.iter()
2311 .map(|n| n.id.as_str())
2312 .collect();
2313
2314 let mut nodes: Vec<UnifiedNode> = Vec::new();
2316 for code_node in relevant_nodes {
2317 let node_id = code_node.name.replace(|c: char| !c.is_alphanumeric() && c != '_', "_");
2318
2319 let (node_type, layer) = match code_node.kind {
2320 NodeKind::File => ("File".to_string(), "infrastructure"),
2321 NodeKind::Class => ("Component".to_string(), "domain"),
2322 NodeKind::Function | NodeKind::Module => ("Component".to_string(), "application"),
2323 };
2324
2325 let snippet = snippets.get(&code_node.id).cloned();
2326
2327 nodes.push(UnifiedNode {
2328 id: node_id,
2329 node_type,
2330 layer: layer.to_string(),
2331 description: format!("{} in {}", code_node.name, code_node.file_path),
2332 path: Some(code_node.file_path.clone()),
2333 line: code_node.line,
2334 code: snippet,
2335 });
2336 }
2337
2338 let mut edges: Vec<UnifiedEdge> = Vec::new();
2340 let mut seen_keys: HashSet<(String, String, String)> = HashSet::new();
2341
2342 for rel_id in &relevant_ids {
2343 for edge in self.outgoing_edges(rel_id) {
2344 if let (Some(from), Some(to)) = (self.node_by_id(&edge.from), self.node_by_id(&edge.to)) {
2345 let from_id = from.name.replace(|c: char| !c.is_alphanumeric() && c != '_', "_");
2346 let to_id = to.name.replace(|c: char| !c.is_alphanumeric() && c != '_', "_");
2347 let rel = edge.relation.to_string();
2348 let key = (from_id.clone(), to_id.clone(), rel.clone());
2349
2350 if nodes.iter().any(|n| n.id == from_id)
2351 && nodes.iter().any(|n| n.id == to_id)
2352 && seen_keys.insert(key)
2353 {
2354 edges.push(UnifiedEdge {
2355 from: from_id,
2356 to: to_id,
2357 relation: rel,
2358 });
2359 }
2360 }
2361 }
2362 }
2363
2364 let description = if issue_description.len() > 100 {
2365 let mut end = 100;
2366 while end > 0 && !issue_description.is_char_boundary(end) { end -= 1; }
2367 format!("{}...", &issue_description[..end])
2368 } else {
2369 issue_description.to_string()
2370 };
2371
2372 UnifiedGraphResult {
2373 issue_id: issue_id.to_string(),
2374 description,
2375 nodes,
2376 edges,
2377 }
2378 }
2379}
2380
2381#[derive(Debug, Clone, Serialize, Deserialize)]
2383pub struct UnifiedGraphResult {
2384 pub issue_id: String,
2385 pub description: String,
2386 pub nodes: Vec<UnifiedNode>,
2387 pub edges: Vec<UnifiedEdge>,
2388}
2389
2390#[derive(Debug, Clone, Serialize, Deserialize)]
2392pub struct UnifiedNode {
2393 pub id: String,
2394 pub node_type: String,
2395 pub layer: String,
2396 pub description: String,
2397 pub path: Option<String>,
2398 pub line: Option<usize>,
2399 pub code: Option<String>,
2400}
2401
2402#[derive(Debug, Clone, Serialize, Deserialize)]
2404pub struct UnifiedEdge {
2405 pub from: String,
2406 pub to: String,
2407 pub relation: String,
2408}
2409
2410fn collect_decorators(node: tree_sitter::Node, source: &[u8]) -> Vec<String> {
2413 let mut decorators = Vec::new();
2414 let mut cursor = node.walk();
2415 for child in node.children(&mut cursor) {
2416 if child.kind() == "decorator" {
2417 let dec_text = child.utf8_text(source).unwrap_or("").trim().to_string();
2418 let name = dec_text.trim_start_matches('@');
2419 let name = name.split('(').next().unwrap_or(name).trim();
2420 if !name.is_empty() {
2421 decorators.push(name.to_string());
2422 }
2423 }
2424 }
2425 decorators
2426}
2427
2428fn extract_docstring(node: tree_sitter::Node, source: &str) -> Option<String> {
2429 let body = node.child_by_field_name("body")?;
2430 let mut cursor = body.walk();
2431 for child in body.children(&mut cursor) {
2432 if child.kind() == "comment" {
2433 continue;
2434 }
2435 if child.kind() == "expression_statement" {
2436 if let Some(str_node) = child.child(0) {
2437 if str_node.kind() == "string" || str_node.kind() == "concatenated_string" {
2438 if str_node.start_byte() < source.len() && str_node.end_byte() <= source.len() {
2439 let doc_text = &source[str_node.start_byte()..str_node.end_byte()];
2440 let doc_clean = doc_text
2441 .trim_start_matches("\"\"\"")
2442 .trim_end_matches("\"\"\"")
2443 .trim_start_matches("'''")
2444 .trim_end_matches("'''")
2445 .trim_start_matches('"')
2446 .trim_end_matches('"')
2447 .trim_start_matches('\'')
2448 .trim_end_matches('\'')
2449 .trim();
2450 let first_line = doc_clean.lines().find(|l| !l.trim().is_empty()).unwrap_or("");
2451 if first_line.is_empty() {
2452 return None;
2453 }
2454 let truncated = if first_line.len() > 100 {
2455 let mut end = 100;
2456 while end > 0 && !first_line.is_char_boundary(end) {
2457 end -= 1;
2458 }
2459 &first_line[..end]
2460 } else {
2461 first_line
2462 };
2463 return Some(truncated.to_string());
2464 }
2465 }
2466 }
2467 }
2468 break;
2469 }
2470 None
2471}
2472
2473fn is_in_error_path(node: &tree_sitter::Node, source: &[u8]) -> bool {
2474 let source_str = std::str::from_utf8(source).unwrap_or("");
2475 let mut current = node.parent();
2476 let mut levels = 0;
2477 while let Some(parent) = current {
2478 levels += 1;
2479 if levels > 10 {
2480 break;
2481 }
2482 match parent.kind() {
2483 "except_clause" | "raise_statement" => return true,
2484 "try_statement" => return true,
2485 "if_statement" => {
2486 if let Some(cond) = parent.child_by_field_name("condition") {
2487 if cond.start_byte() < source_str.len() && cond.end_byte() <= source_str.len() {
2488 let cond_text = &source_str[cond.start_byte()..cond.end_byte()];
2489 let lower = cond_text.to_lowercase();
2490 if lower.contains("error")
2491 || lower.contains("exception")
2492 || lower.contains("err")
2493 || lower.contains("fail")
2494 || lower.contains("none")
2495 {
2496 return true;
2497 }
2498 }
2499 }
2500 }
2501 _ => {}
2502 }
2503 current = parent.parent();
2504 }
2505 false
2506}
2507
2508fn extract_python_tree_sitter(
2510 path: &str,
2511 content: &str,
2512 parser: &mut Parser,
2513 class_id_map: &mut HashMap<String, String>,
2514) -> (Vec<CodeNode>, Vec<CodeEdge>, HashSet<String>) {
2515 let mut nodes = Vec::new();
2516 let mut edges = Vec::new();
2517 let mut imports = HashSet::new();
2518
2519 let tree = match parser.parse(content, None) {
2520 Some(t) => t,
2521 None => return (nodes, edges, imports),
2522 };
2523
2524 let file_id = format!("file:{}", path);
2525 let source = content.as_bytes();
2526 let root = tree.root_node();
2527
2528 let text = |node: tree_sitter::Node| -> String {
2529 node.utf8_text(source).unwrap_or("").to_string()
2530 };
2531
2532 let mut cursor = root.walk();
2533 for child in root.children(&mut cursor) {
2534 match child.kind() {
2535 "class_definition" => {
2536 extract_class_node(
2537 child,
2538 source,
2539 content,
2540 path,
2541 &file_id,
2542 &[],
2543 &mut nodes,
2544 &mut edges,
2545 class_id_map,
2546 );
2547 }
2548 "function_definition" => {
2549 extract_function_node(child, source, content, path, &file_id, &[], &mut nodes, &mut edges);
2550 }
2551 "decorated_definition" => {
2552 let decorators = collect_decorators(child, source);
2553 let mut inner_cursor = child.walk();
2554 for inner in child.children(&mut inner_cursor) {
2555 match inner.kind() {
2556 "class_definition" => {
2557 extract_class_node(
2558 inner,
2559 source,
2560 content,
2561 path,
2562 &file_id,
2563 &decorators,
2564 &mut nodes,
2565 &mut edges,
2566 class_id_map,
2567 );
2568 }
2569 "function_definition" => {
2570 extract_function_node(
2571 inner, source, content, path, &file_id, &decorators, &mut nodes, &mut edges,
2572 );
2573 }
2574 _ => {}
2575 }
2576 }
2577 }
2578 "import_statement" => {
2579 let import_text = text(child);
2580 let re_import = Regex::new(r"import\s+([\w.]+)").unwrap();
2581 if let Some(cap) = re_import.captures(&import_text) {
2582 let module = cap[1].to_string();
2583 if !is_stdlib(&module) {
2584 edges.push(CodeEdge {
2585 from: file_id.clone(),
2586 to: format!("module_ref:{}", module),
2587 relation: EdgeRelation::Imports,
2588 weight: 0.5,
2589 call_count: 1,
2590 in_error_path: false,
2591 confidence: 1.0,
2592 });
2593 }
2594 }
2595 }
2596 "import_from_statement" => {
2597 let mut mod_cursor = child.walk();
2598 for mod_child in child.children(&mut mod_cursor) {
2599 if mod_child.kind() == "dotted_name" {
2600 let module = text(mod_child);
2601 if !is_stdlib(&module) {
2602 edges.push(CodeEdge {
2603 from: file_id.clone(),
2604 to: format!("module_ref:{}", module),
2605 relation: EdgeRelation::Imports,
2606 weight: 0.5,
2607 call_count: 1,
2608 in_error_path: false,
2609 confidence: 1.0,
2610 });
2611 }
2612 break;
2613 }
2614 if mod_child.kind() == "relative_import" {
2615 let rel_import_text = text(mod_child);
2616 let trimmed = rel_import_text.trim_start_matches('.');
2617 if !trimmed.is_empty() && !is_stdlib(trimmed) {
2618 edges.push(CodeEdge {
2619 from: file_id.clone(),
2620 to: format!("module_ref:{}", trimmed),
2621 relation: EdgeRelation::Imports,
2622 weight: 0.5,
2623 call_count: 1,
2624 in_error_path: false,
2625 confidence: 1.0,
2626 });
2627 }
2628 break;
2629 }
2630 }
2631
2632 let import_text = child.utf8_text(source).unwrap_or("");
2634 if let Some(after_import) = import_text.split(" import ").nth(1) {
2635 for name in after_import.split(',') {
2636 let clean = name.trim().split(" as ").next().unwrap_or("").trim();
2637 if !clean.is_empty() && clean != "*" && clean != "(" && clean != ")" {
2638 imports.insert(clean.to_string());
2639 }
2640 }
2641 }
2642 }
2643 _ => {}
2644 }
2645 }
2646
2647 (nodes, edges, imports)
2648}
2649
2650fn extract_class_node(
2651 node: tree_sitter::Node,
2652 source: &[u8],
2653 source_str: &str,
2654 path: &str,
2655 file_id: &str,
2656 decorators: &[String],
2657 nodes: &mut Vec<CodeNode>,
2658 edges: &mut Vec<CodeEdge>,
2659 class_id_map: &mut HashMap<String, String>,
2660) {
2661 let class_name = node
2662 .child_by_field_name("name")
2663 .and_then(|n| n.utf8_text(source).ok())
2664 .unwrap_or("")
2665 .to_string();
2666
2667 if class_name.is_empty() {
2668 return;
2669 }
2670
2671 let line_num = node.start_position().row + 1;
2672 let class_id = format!("class:{}:{}", path, class_name);
2673
2674 let class_sig = {
2675 let sig_text = &source_str[node.start_byte()..];
2676 let sig_end = sig_text
2677 .find(":\n")
2678 .or_else(|| sig_text.find(":\r"))
2679 .unwrap_or(sig_text.len().min(200));
2680 Some(sig_text[..sig_end].trim().to_string())
2681 };
2682
2683 let class_docstring = extract_docstring(node, source_str);
2684 let class_line_count = node.end_position().row - node.start_position().row + 1;
2685 let class_is_test =
2686 path.contains("/tests/") || path.contains("/test_") || class_name.starts_with("Test");
2687
2688 nodes.push(CodeNode {
2689 id: class_id.clone(),
2690 kind: NodeKind::Class,
2691 name: class_name.clone(),
2692 file_path: path.to_string(),
2693 line: Some(line_num),
2694 decorators: decorators.to_vec(),
2695 signature: class_sig,
2696 docstring: class_docstring,
2697 line_count: class_line_count,
2698 is_test: class_is_test,
2699 });
2700
2701 edges.push(CodeEdge {
2702 from: class_id.clone(),
2703 to: file_id.to_string(),
2704 relation: EdgeRelation::DefinedIn,
2705 weight: 0.5,
2706 call_count: 1,
2707 in_error_path: false,
2708 confidence: 1.0,
2709 });
2710
2711 class_id_map.insert(class_name.clone(), class_id.clone());
2712
2713 if let Some(superclasses) = node.child_by_field_name("superclasses") {
2715 let mut sc_cursor = superclasses.walk();
2716 for sc_child in superclasses.children(&mut sc_cursor) {
2717 let kind = sc_child.kind();
2718 if kind == "identifier" || kind == "attribute" {
2719 let parent_text = sc_child.utf8_text(source).unwrap_or("");
2720 let parent_name = parent_text.split('.').last().unwrap_or("").trim();
2721 if !parent_name.is_empty() && parent_name != "object" {
2722 edges.push(CodeEdge {
2723 from: class_id.clone(),
2724 to: format!("class_ref:{}", parent_name),
2725 relation: EdgeRelation::Inherits,
2726 weight: 0.5,
2727 call_count: 1,
2728 in_error_path: false,
2729 confidence: 1.0,
2730 });
2731 }
2732 }
2733 }
2734 }
2735
2736 if let Some(body) = node.child_by_field_name("body") {
2738 let mut body_cursor = body.walk();
2739 for body_child in body.children(&mut body_cursor) {
2740 match body_child.kind() {
2741 "function_definition" => {
2742 extract_method_node(body_child, source, source_str, path, &class_id, &[], nodes, edges);
2743 }
2744 "decorated_definition" => {
2745 let method_decorators = collect_decorators(body_child, source);
2746 let mut inner_cursor = body_child.walk();
2747 for inner in body_child.children(&mut inner_cursor) {
2748 if inner.kind() == "function_definition" {
2749 extract_method_node(
2750 inner,
2751 source,
2752 source_str,
2753 path,
2754 &class_id,
2755 &method_decorators,
2756 nodes,
2757 edges,
2758 );
2759 }
2760 }
2761 }
2762 _ => {}
2763 }
2764 }
2765 }
2766}
2767
2768fn extract_method_node(
2769 node: tree_sitter::Node,
2770 source: &[u8],
2771 source_str: &str,
2772 path: &str,
2773 class_id: &str,
2774 decorators: &[String],
2775 nodes: &mut Vec<CodeNode>,
2776 edges: &mut Vec<CodeEdge>,
2777) {
2778 let func_name = node
2779 .child_by_field_name("name")
2780 .and_then(|n| n.utf8_text(source).ok())
2781 .unwrap_or("")
2782 .to_string();
2783
2784 if func_name.is_empty() {
2785 return;
2786 }
2787
2788 let line_num = node.start_position().row + 1;
2789 let method_id = format!("method:{}:{}", path, func_name);
2790
2791 let signature = {
2792 let sig_text = &source_str[node.start_byte()..];
2793 let sig_end = sig_text
2794 .find(":\n")
2795 .or_else(|| sig_text.find(":\r"))
2796 .unwrap_or(sig_text.len().min(200));
2797 Some(sig_text[..sig_end].trim().to_string())
2798 };
2799 let docstring = extract_docstring(node, source_str);
2800 let line_count = node.end_position().row - node.start_position().row + 1;
2801 let is_test = path.contains("/tests/")
2802 || path.contains("/test_")
2803 || func_name.starts_with("test_")
2804 || func_name.starts_with("Test");
2805
2806 nodes.push(CodeNode {
2807 id: method_id.clone(),
2808 kind: NodeKind::Function,
2809 name: func_name,
2810 file_path: path.to_string(),
2811 line: Some(line_num),
2812 decorators: decorators.to_vec(),
2813 signature,
2814 docstring,
2815 line_count,
2816 is_test,
2817 });
2818
2819 edges.push(CodeEdge {
2820 from: method_id,
2821 to: class_id.to_string(),
2822 relation: EdgeRelation::DefinedIn,
2823 weight: 0.5,
2824 call_count: 1,
2825 in_error_path: false,
2826 confidence: 1.0,
2827 });
2828}
2829
2830fn extract_function_node(
2831 node: tree_sitter::Node,
2832 source: &[u8],
2833 source_str: &str,
2834 path: &str,
2835 file_id: &str,
2836 decorators: &[String],
2837 nodes: &mut Vec<CodeNode>,
2838 edges: &mut Vec<CodeEdge>,
2839) {
2840 let func_name = node
2841 .child_by_field_name("name")
2842 .and_then(|n| n.utf8_text(source).ok())
2843 .unwrap_or("")
2844 .to_string();
2845
2846 if func_name.is_empty() {
2847 return;
2848 }
2849
2850 let line_num = node.start_position().row + 1;
2851 let func_id = format!("func:{}:{}", path, func_name);
2852
2853 let signature = {
2854 let sig_text = &source_str[node.start_byte()..];
2855 let sig_end = sig_text
2856 .find(":\n")
2857 .or_else(|| sig_text.find(":\r"))
2858 .unwrap_or(sig_text.len().min(200));
2859 Some(sig_text[..sig_end].trim().to_string())
2860 };
2861 let docstring = extract_docstring(node, source_str);
2862 let line_count = node.end_position().row - node.start_position().row + 1;
2863 let is_test = path.contains("/tests/")
2864 || path.contains("/test_")
2865 || func_name.starts_with("test_")
2866 || func_name.starts_with("Test");
2867
2868 nodes.push(CodeNode {
2869 id: func_id.clone(),
2870 kind: NodeKind::Function,
2871 name: func_name,
2872 file_path: path.to_string(),
2873 line: Some(line_num),
2874 decorators: decorators.to_vec(),
2875 signature,
2876 docstring,
2877 line_count,
2878 is_test,
2879 });
2880
2881 edges.push(CodeEdge {
2882 from: func_id,
2883 to: file_id.to_string(),
2884 relation: EdgeRelation::DefinedIn,
2885 weight: 0.5,
2886 call_count: 1,
2887 in_error_path: false,
2888 confidence: 1.0,
2889 });
2890}
2891
2892fn extract_calls_from_tree(
2894 root: tree_sitter::Node,
2895 source: &[u8],
2896 rel_path: &str,
2897 func_name_map: &HashMap<String, Vec<String>>,
2898 method_to_class: &HashMap<String, String>,
2899 class_parents: &HashMap<String, Vec<String>>,
2900 file_func_ids: &HashSet<String>,
2901 file_imported_names: &HashMap<String, HashSet<String>>,
2902 package_dir: &str,
2903 class_init_map: &HashMap<String, Vec<(String, String)>>,
2904 node_pkg_map: &HashMap<String, String>,
2905 edges: &mut Vec<CodeEdge>,
2906) {
2907 let mut scope_map: Vec<(usize, usize, String, Option<String>)> = Vec::new();
2909 build_scope_map(root, source, rel_path, &mut scope_map);
2910
2911 let mut stack = vec![root];
2913 while let Some(node) = stack.pop() {
2914 if node.kind() == "string"
2915 || node.kind() == "comment"
2916 || node.kind() == "string_content"
2917 || node.kind() == "concatenated_string"
2918 {
2919 continue;
2920 }
2921
2922 if node.kind() == "call" {
2923 let call_line = node.start_position().row + 1;
2924 let error_path = is_in_error_path(&node, source);
2925
2926 let scope = scope_map
2927 .iter()
2928 .filter(|(start, end, _, _)| call_line >= *start && call_line <= *end)
2929 .max_by_key(|(start, _, _, _)| *start);
2930
2931 if let Some((_start, _end, caller_id, caller_class)) = scope {
2932 if let Some(function_node) = node.child_by_field_name("function") {
2933 let edges_before = edges.len();
2934 match function_node.kind() {
2935 "identifier" => {
2936 let callee_name = function_node.utf8_text(source).unwrap_or("");
2937 if !callee_name.is_empty() && !is_python_builtin(callee_name) {
2938 resolve_and_add_call_edge(
2939 caller_id,
2940 callee_name,
2941 func_name_map,
2942 file_func_ids,
2943 file_imported_names,
2944 rel_path,
2945 package_dir,
2946 class_init_map,
2947 node_pkg_map,
2948 false,
2949 edges,
2950 );
2951 }
2952 }
2953 "attribute" => {
2954 let obj_node = function_node.child_by_field_name("object");
2955 let attr_node = function_node.child_by_field_name("attribute");
2956
2957 if let (Some(obj), Some(attr)) = (obj_node, attr_node) {
2958 let obj_text = obj.utf8_text(source).unwrap_or("");
2959 let method_name = attr.utf8_text(source).unwrap_or("");
2960
2961 if (obj_text == "self" || obj_text == "cls") && !method_name.is_empty() {
2962 resolve_self_method_call(
2963 caller_id,
2964 method_name,
2965 caller_class.as_deref(),
2966 func_name_map,
2967 method_to_class,
2968 class_parents,
2969 file_func_ids,
2970 edges,
2971 );
2972 } else if !method_name.is_empty() && !is_python_builtin(method_name) {
2973 resolve_and_add_call_edge(
2974 caller_id,
2975 method_name,
2976 func_name_map,
2977 file_func_ids,
2978 file_imported_names,
2979 rel_path,
2980 package_dir,
2981 class_init_map,
2982 node_pkg_map,
2983 true,
2984 edges,
2985 );
2986 }
2987 }
2988 }
2989 _ => {}
2990 }
2991 if error_path {
2992 for edge in edges[edges_before..].iter_mut() {
2993 edge.in_error_path = true;
2994 }
2995 }
2996 }
2997 }
2998 }
2999
3000 let child_count = node.child_count();
3001 for i in (0..child_count).rev() {
3002 if let Some(child) = node.child(i) {
3003 stack.push(child);
3004 }
3005 }
3006 }
3007}
3008
3009fn build_scope_map(
3010 node: tree_sitter::Node,
3011 source: &[u8],
3012 rel_path: &str,
3013 scope_map: &mut Vec<(usize, usize, String, Option<String>)>,
3014) {
3015 let mut stack: Vec<(tree_sitter::Node, Option<String>)> = vec![(node, None)];
3016
3017 while let Some((current, class_ctx)) = stack.pop() {
3018 match current.kind() {
3019 "class_definition" => {
3020 let class_name = current
3021 .child_by_field_name("name")
3022 .and_then(|n| n.utf8_text(source).ok())
3023 .unwrap_or("");
3024 let class_id = if !class_name.is_empty() {
3025 Some(format!("class:{}:{}", rel_path, class_name))
3026 } else {
3027 class_ctx.clone()
3028 };
3029
3030 let child_count = current.child_count();
3031 for i in (0..child_count).rev() {
3032 if let Some(child) = current.child(i) {
3033 stack.push((child, class_id.clone()));
3034 }
3035 }
3036 }
3037 "function_definition" => {
3038 let func_name = current
3039 .child_by_field_name("name")
3040 .and_then(|n| n.utf8_text(source).ok())
3041 .unwrap_or("");
3042
3043 if !func_name.is_empty() {
3044 let start_line = current.start_position().row + 1;
3045 let end_line = current.end_position().row + 1;
3046
3047 let func_id = if class_ctx.is_some() {
3048 format!("method:{}:{}", rel_path, func_name)
3049 } else {
3050 format!("func:{}:{}", rel_path, func_name)
3051 };
3052
3053 scope_map.push((start_line, end_line, func_id, class_ctx.clone()));
3054 }
3055
3056 let child_count = current.child_count();
3057 for i in (0..child_count).rev() {
3058 if let Some(child) = current.child(i) {
3059 stack.push((child, class_ctx.clone()));
3060 }
3061 }
3062 }
3063 "decorated_definition" => {
3064 let child_count = current.child_count();
3065 for i in (0..child_count).rev() {
3066 if let Some(child) = current.child(i) {
3067 stack.push((child, class_ctx.clone()));
3068 }
3069 }
3070 }
3071 _ => {
3072 let child_count = current.child_count();
3073 for i in (0..child_count).rev() {
3074 if let Some(child) = current.child(i) {
3075 stack.push((child, class_ctx.clone()));
3076 }
3077 }
3078 }
3079 }
3080 }
3081}
3082
3083fn is_common_dunder(name: &str) -> bool {
3084 matches!(
3085 name,
3086 "__init__"
3087 | "__str__"
3088 | "__repr__"
3089 | "__eq__"
3090 | "__ne__"
3091 | "__hash__"
3092 | "__len__"
3093 | "__iter__"
3094 | "__next__"
3095 | "__getitem__"
3096 | "__setitem__"
3097 | "__delitem__"
3098 | "__contains__"
3099 | "__call__"
3100 | "__enter__"
3101 | "__exit__"
3102 | "__get__"
3103 | "__set__"
3104 | "__delete__"
3105 | "__getattr__"
3106 | "__setattr__"
3107 | "__bool__"
3108 | "__lt__"
3109 | "__le__"
3110 | "__gt__"
3111 | "__ge__"
3112 | "__add__"
3113 | "__sub__"
3114 | "__mul__"
3115 | "__new__"
3116 | "__del__"
3117 | "__format__"
3118 | "get"
3119 | "set"
3120 | "update"
3121 | "delete"
3122 | "save"
3123 | "clean"
3124 | "run"
3125 | "setup"
3126 | "teardown"
3127 )
3128}
3129
3130fn resolve_and_add_call_edge(
3131 caller_id: &str,
3132 callee_name: &str,
3133 func_name_map: &HashMap<String, Vec<String>>,
3134 file_func_ids: &HashSet<String>,
3135 file_imported_names: &HashMap<String, HashSet<String>>,
3136 rel_path: &str,
3137 package_dir: &str,
3138 class_init_map: &HashMap<String, Vec<(String, String)>>,
3139 node_pkg_map: &HashMap<String, String>,
3140 is_attribute_call: bool,
3141 edges: &mut Vec<CodeEdge>,
3142) {
3143 if let Some(callee_ids) = func_name_map.get(callee_name) {
3144 let same_file: Vec<&String> = callee_ids
3145 .iter()
3146 .filter(|id| file_func_ids.contains(*id))
3147 .collect();
3148 let imported: Vec<&String> = callee_ids
3149 .iter()
3150 .filter(|_id| {
3151 file_imported_names
3152 .get(rel_path)
3153 .map(|names| names.contains(callee_name))
3154 .unwrap_or(false)
3155 })
3156 .collect();
3157 let same_pkg: Vec<&String> = callee_ids
3158 .iter()
3159 .filter(|id| {
3160 node_pkg_map
3161 .get(id.as_str())
3162 .map(|pkg| pkg == package_dir)
3163 .unwrap_or(false)
3164 })
3165 .collect();
3166
3167 let global_limit = if is_attribute_call && !is_common_dunder(callee_name) {
3168 20
3169 } else {
3170 3
3171 };
3172
3173 let confidence = if !same_file.is_empty() {
3174 0.8_f32
3175 } else if !imported.is_empty() {
3176 0.8
3177 } else if !same_pkg.is_empty() {
3178 0.7
3179 } else if is_attribute_call {
3180 0.3
3181 } else {
3182 0.5
3183 };
3184
3185 let weight = if !same_file.is_empty() || !imported.is_empty() || !same_pkg.is_empty() {
3186 0.5
3187 } else if is_attribute_call {
3188 0.8
3189 } else {
3190 0.5
3191 };
3192
3193 let targets = if !same_file.is_empty() {
3194 same_file
3195 } else if !imported.is_empty() {
3196 imported
3197 } else if !same_pkg.is_empty() {
3198 same_pkg
3199 } else if callee_ids.len() <= global_limit {
3200 callee_ids.iter().collect()
3201 } else {
3202 vec![]
3203 };
3204
3205 for callee_id in targets {
3206 if callee_id != caller_id {
3207 edges.push(CodeEdge {
3208 from: caller_id.to_string(),
3209 to: callee_id.clone(),
3210 relation: EdgeRelation::Calls,
3211 weight,
3212 call_count: 1,
3213 in_error_path: false,
3214 confidence,
3215 });
3216 }
3217 }
3218 } else if callee_name
3219 .chars()
3220 .next()
3221 .map(|c| c.is_uppercase())
3222 .unwrap_or(false)
3223 {
3224 if let Some(init_entries) = class_init_map.get(callee_name) {
3226 let same_file: Vec<&str> = init_entries
3227 .iter()
3228 .filter(|(fp, _)| fp == rel_path)
3229 .map(|(_, id)| id.as_str())
3230 .collect();
3231 let is_imported = file_imported_names
3232 .get(rel_path)
3233 .map(|names| names.contains(callee_name))
3234 .unwrap_or(false);
3235 let imported: Vec<&str> = if is_imported {
3236 init_entries.iter().map(|(_, id)| id.as_str()).collect()
3237 } else {
3238 vec![]
3239 };
3240 let same_pkg: Vec<&str> = init_entries
3241 .iter()
3242 .filter(|(fp, _)| fp.rsplitn(2, '/').nth(1).unwrap_or("") == package_dir)
3243 .map(|(_, id)| id.as_str())
3244 .collect();
3245
3246 let (targets, confidence): (Vec<&str>, f32) = if !same_file.is_empty() {
3247 (same_file, 0.8)
3248 } else if !imported.is_empty() {
3249 (imported, 0.7)
3250 } else if !same_pkg.is_empty() {
3251 (same_pkg, 0.6)
3252 } else if init_entries.len() <= 3 {
3253 (init_entries.iter().map(|(_, id)| id.as_str()).collect(), 0.5)
3254 } else {
3255 (vec![], 0.0)
3256 };
3257
3258 for init_id in targets {
3259 if init_id != caller_id {
3260 edges.push(CodeEdge {
3261 from: caller_id.to_string(),
3262 to: init_id.to_string(),
3263 relation: EdgeRelation::Calls,
3264 weight: 0.5,
3265 call_count: 1,
3266 in_error_path: false,
3267 confidence,
3268 });
3269 }
3270 }
3271 }
3272 }
3273}
3274
3275fn resolve_self_method_call(
3276 caller_id: &str,
3277 method_name: &str,
3278 caller_class: Option<&str>,
3279 func_name_map: &HashMap<String, Vec<String>>,
3280 method_to_class: &HashMap<String, String>,
3281 class_parents: &HashMap<String, Vec<String>>,
3282 file_func_ids: &HashSet<String>,
3283 edges: &mut Vec<CodeEdge>,
3284) {
3285 if let Some(callee_ids) = func_name_map.get(method_name) {
3286 if let Some(class_id) = caller_class {
3287 let mut valid_classes = vec![class_id.to_string()];
3288 if let Some(parents) = class_parents.get(class_id) {
3289 valid_classes.extend(parents.iter().cloned());
3290 }
3291
3292 let scoped: Vec<&String> = callee_ids
3293 .iter()
3294 .filter(|id| {
3295 method_to_class
3296 .get(*id)
3297 .map(|cls| valid_classes.contains(cls))
3298 .unwrap_or(false)
3299 })
3300 .collect();
3301
3302 let targets = if !scoped.is_empty() {
3303 scoped
3304 } else if callee_ids.len() <= 3 {
3305 callee_ids.iter().collect()
3306 } else {
3307 callee_ids
3308 .iter()
3309 .filter(|id| file_func_ids.contains(*id))
3310 .collect()
3311 };
3312
3313 for callee_id in targets {
3314 if callee_id != caller_id {
3315 edges.push(CodeEdge {
3316 from: caller_id.to_string(),
3317 to: callee_id.clone(),
3318 relation: EdgeRelation::Calls,
3319 weight: 0.5,
3320 call_count: 1,
3321 in_error_path: false,
3322 confidence: 0.9,
3323 });
3324 }
3325 }
3326 } else {
3327 for callee_id in callee_ids {
3328 if callee_id != caller_id && file_func_ids.contains(callee_id) {
3329 edges.push(CodeEdge {
3330 from: caller_id.to_string(),
3331 to: callee_id.clone(),
3332 relation: EdgeRelation::Calls,
3333 weight: 0.5,
3334 call_count: 1,
3335 in_error_path: false,
3336 confidence: 0.6,
3337 });
3338 }
3339 }
3340 }
3341 }
3342}
3343
3344fn add_override_edges(nodes: &[CodeNode], edges: &mut Vec<CodeEdge>) {
3345 let mut class_methods: HashMap<String, Vec<(String, String)>> = HashMap::new();
3346 for edge in edges.iter() {
3347 if edge.relation == EdgeRelation::DefinedIn && edge.to.starts_with("class:") {
3348 if let Some(method) = nodes.iter().find(|n| n.id == edge.from && n.kind == NodeKind::Function) {
3349 class_methods
3350 .entry(edge.to.clone())
3351 .or_default()
3352 .push((method.name.clone(), method.id.clone()));
3353 }
3354 }
3355 }
3356
3357 let inherits_pairs: Vec<(String, String)> = edges
3358 .iter()
3359 .filter(|e| e.relation == EdgeRelation::Inherits)
3360 .map(|e| (e.from.clone(), e.to.clone()))
3361 .collect();
3362
3363 let mut new_edges = Vec::new();
3364 for (sub_class_id, base_class_id) in &inherits_pairs {
3365 let sub_methods = match class_methods.get(sub_class_id) {
3366 Some(m) => m,
3367 None => continue,
3368 };
3369 let base_methods = match class_methods.get(base_class_id) {
3370 Some(m) => m,
3371 None => continue,
3372 };
3373
3374 for (sub_name, sub_id) in sub_methods {
3375 for (base_name, base_id) in base_methods {
3376 if sub_name == base_name && sub_id != base_id {
3377 new_edges.push(CodeEdge {
3378 from: base_id.clone(),
3379 to: sub_id.clone(),
3380 relation: EdgeRelation::Overrides,
3381 weight: 0.4,
3382 call_count: 1,
3383 in_error_path: false,
3384 confidence: 0.6,
3385 });
3386 }
3387 }
3388 }
3389 }
3390
3391 edges.extend(new_edges);
3392}
3393
3394fn extract_rust_tree_sitter(
3401 path: &str,
3402 content: &str,
3403 parser: &mut Parser,
3404 class_id_map: &mut HashMap<String, String>,
3405) -> (Vec<CodeNode>, Vec<CodeEdge>, HashSet<String>) {
3406 let mut nodes = Vec::new();
3407 let mut edges = Vec::new();
3408 let mut imports = HashSet::new();
3409
3410 if parser.set_language(&tree_sitter_rust::LANGUAGE.into()).is_err() {
3412 return (nodes, edges, imports);
3413 }
3414
3415 let tree = match parser.parse(content, None) {
3416 Some(t) => t,
3417 None => return (nodes, edges, imports),
3418 };
3419
3420 let file_id = format!("file:{}", path);
3421 let source = content.as_bytes();
3422 let root = tree.root_node();
3423
3424 let mut impl_target_map: HashMap<String, String> = HashMap::new();
3426
3427 let mut cursor = root.walk();
3428 for child in root.children(&mut cursor) {
3429 extract_rust_node(
3430 child,
3431 source,
3432 content,
3433 path,
3434 &file_id,
3435 &mut nodes,
3436 &mut edges,
3437 class_id_map,
3438 &mut impl_target_map,
3439 &mut imports,
3440 "", );
3442 }
3443
3444 (nodes, edges, imports)
3445}
3446
3447fn extract_rust_node(
3449 node: tree_sitter::Node,
3450 source: &[u8],
3451 source_str: &str,
3452 path: &str,
3453 file_id: &str,
3454 nodes: &mut Vec<CodeNode>,
3455 edges: &mut Vec<CodeEdge>,
3456 class_id_map: &mut HashMap<String, String>,
3457 impl_target_map: &mut HashMap<String, String>,
3458 imports: &mut HashSet<String>,
3459 module_prefix: &str,
3460) {
3461 let text = |n: tree_sitter::Node| -> String {
3462 n.utf8_text(source).unwrap_or("").to_string()
3463 };
3464
3465 match node.kind() {
3466 "use_declaration" => {
3467 let use_text = text(node);
3469 if let Some(path_part) = use_text.strip_prefix("use ") {
3471 let clean_path = path_part.trim_end_matches(';').trim();
3472 if !clean_path.starts_with("std::") && !clean_path.starts_with("core::") && !clean_path.starts_with("alloc::") {
3474 let module = if clean_path.contains('{') {
3476 clean_path.split("::").next().unwrap_or(clean_path).to_string()
3477 } else {
3478 clean_path.split("::").take(2).collect::<Vec<_>>().join("::")
3479 };
3480 if !module.is_empty() {
3481 edges.push(CodeEdge {
3482 from: file_id.to_string(),
3483 to: format!("module_ref:{}", module),
3484 relation: EdgeRelation::Imports,
3485 weight: 0.5,
3486 call_count: 1,
3487 in_error_path: false,
3488 confidence: 1.0,
3489 });
3490 imports.insert(module);
3491 }
3492 }
3493 }
3494 }
3495
3496 "struct_item" => {
3497 let name = node.child_by_field_name("name")
3498 .and_then(|n| n.utf8_text(source).ok())
3499 .unwrap_or("")
3500 .to_string();
3501 if name.is_empty() { return; }
3502
3503 let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
3504 let line = node.start_position().row + 1;
3505 let class_id = format!("class:{}:{}", path, full_name);
3506
3507 let signature = extract_rust_signature(node, source_str);
3508 let docstring = extract_rust_docstring(node, source_str);
3509 let line_count = node.end_position().row - node.start_position().row + 1;
3510
3511 nodes.push(CodeNode {
3512 id: class_id.clone(),
3513 kind: NodeKind::Class,
3514 name: full_name.clone(),
3515 file_path: path.to_string(),
3516 line: Some(line),
3517 decorators: extract_rust_attributes(node, source),
3518 signature,
3519 docstring,
3520 line_count,
3521 is_test: path.contains("/tests/") || full_name.contains("Test"),
3522 });
3523
3524 edges.push(CodeEdge::defined_in(&class_id, file_id));
3525 class_id_map.insert(name.clone(), class_id);
3526 }
3527
3528 "enum_item" => {
3529 let name = node.child_by_field_name("name")
3530 .and_then(|n| n.utf8_text(source).ok())
3531 .unwrap_or("")
3532 .to_string();
3533 if name.is_empty() { return; }
3534
3535 let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
3536 let line = node.start_position().row + 1;
3537 let class_id = format!("class:{}:{}", path, full_name);
3538
3539 let signature = extract_rust_signature(node, source_str);
3540 let docstring = extract_rust_docstring(node, source_str);
3541 let line_count = node.end_position().row - node.start_position().row + 1;
3542
3543 nodes.push(CodeNode {
3544 id: class_id.clone(),
3545 kind: NodeKind::Class,
3546 name: full_name.clone(),
3547 file_path: path.to_string(),
3548 line: Some(line),
3549 decorators: extract_rust_attributes(node, source),
3550 signature,
3551 docstring,
3552 line_count,
3553 is_test: path.contains("/tests/") || full_name.contains("Test"),
3554 });
3555
3556 edges.push(CodeEdge::defined_in(&class_id, file_id));
3557 class_id_map.insert(name.clone(), class_id);
3558 }
3559
3560 "trait_item" => {
3561 let name = node.child_by_field_name("name")
3562 .and_then(|n| n.utf8_text(source).ok())
3563 .unwrap_or("")
3564 .to_string();
3565 if name.is_empty() { return; }
3566
3567 let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
3568 let line = node.start_position().row + 1;
3569 let trait_id = format!("class:{}:{}", path, full_name);
3570
3571 let signature = extract_rust_signature(node, source_str);
3572 let docstring = extract_rust_docstring(node, source_str);
3573 let line_count = node.end_position().row - node.start_position().row + 1;
3574
3575 nodes.push(CodeNode {
3576 id: trait_id.clone(),
3577 kind: NodeKind::Class,
3578 name: full_name.clone(),
3579 file_path: path.to_string(),
3580 line: Some(line),
3581 decorators: extract_rust_attributes(node, source),
3582 signature,
3583 docstring,
3584 line_count,
3585 is_test: path.contains("/tests/") || full_name.contains("Test"),
3586 });
3587
3588 edges.push(CodeEdge::defined_in(&trait_id, file_id));
3589 class_id_map.insert(name.clone(), trait_id.clone());
3590
3591 if let Some(body) = node.child_by_field_name("body") {
3593 let mut body_cursor = body.walk();
3594 for body_child in body.children(&mut body_cursor) {
3595 if body_child.kind() == "function_item" || body_child.kind() == "function_signature_item" {
3596 extract_rust_method(body_child, source, source_str, path, &trait_id, nodes, edges);
3597 }
3598 }
3599 }
3600 }
3601
3602 "impl_item" => {
3603 let mut trait_name: Option<String> = None;
3605 let mut type_name: Option<String> = None;
3606
3607 let mut cursor = node.walk();
3609 for child in node.children(&mut cursor) {
3610 match child.kind() {
3611 "type_identifier" | "generic_type" => {
3612 let name = if child.kind() == "generic_type" {
3614 child.child_by_field_name("type")
3616 .and_then(|n| n.utf8_text(source).ok())
3617 .unwrap_or("")
3618 .to_string()
3619 } else {
3620 text(child)
3621 };
3622
3623 if type_name.is_none() {
3624 type_name = Some(name);
3625 } else if trait_name.is_none() {
3626 trait_name = type_name.take();
3628 type_name = Some(name);
3629 }
3630 }
3631 _ => {}
3632 }
3633 }
3634
3635 let type_name = match type_name {
3636 Some(n) => n,
3637 None => return,
3638 };
3639
3640 let type_id = class_id_map.get(&type_name)
3642 .cloned()
3643 .unwrap_or_else(|| format!("class:{}:{}", path, type_name));
3644
3645 if let Some(ref trait_n) = trait_name {
3647 edges.push(CodeEdge {
3648 from: type_id.clone(),
3649 to: format!("class_ref:{}", trait_n),
3650 relation: EdgeRelation::Inherits,
3651 weight: 0.5,
3652 call_count: 1,
3653 in_error_path: false,
3654 confidence: 1.0,
3655 });
3656 }
3657
3658 if let Some(body) = node.child_by_field_name("body") {
3660 let mut body_cursor = body.walk();
3661 for body_child in body.children(&mut body_cursor) {
3662 if body_child.kind() == "function_item" {
3663 extract_rust_method(body_child, source, source_str, path, &type_id, nodes, edges);
3664 }
3665 }
3666 }
3667 }
3668
3669 "function_item" => {
3670 let name = node.child_by_field_name("name")
3672 .and_then(|n| n.utf8_text(source).ok())
3673 .unwrap_or("")
3674 .to_string();
3675 if name.is_empty() { return; }
3676
3677 let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
3678 let line = node.start_position().row + 1;
3679 let func_id = format!("func:{}:{}", path, full_name);
3680
3681 let signature = extract_rust_signature(node, source_str);
3682 let docstring = extract_rust_docstring(node, source_str);
3683 let line_count = node.end_position().row - node.start_position().row + 1;
3684 let is_test = path.contains("/tests/") || full_name.starts_with("test_") ||
3685 extract_rust_attributes(node, source).iter().any(|a| a.contains("test"));
3686
3687 nodes.push(CodeNode {
3688 id: func_id.clone(),
3689 kind: NodeKind::Function,
3690 name: full_name,
3691 file_path: path.to_string(),
3692 line: Some(line),
3693 decorators: extract_rust_attributes(node, source),
3694 signature,
3695 docstring,
3696 line_count,
3697 is_test,
3698 });
3699
3700 edges.push(CodeEdge::defined_in(&func_id, file_id));
3701 }
3702
3703 "mod_item" => {
3704 let name = node.child_by_field_name("name")
3705 .and_then(|n| n.utf8_text(source).ok())
3706 .unwrap_or("")
3707 .to_string();
3708 if name.is_empty() { return; }
3709
3710 let new_prefix = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
3711
3712 if let Some(body) = node.child_by_field_name("body") {
3714 let mut body_cursor = body.walk();
3715 for body_child in body.children(&mut body_cursor) {
3716 extract_rust_node(
3717 body_child,
3718 source,
3719 source_str,
3720 path,
3721 file_id,
3722 nodes,
3723 edges,
3724 class_id_map,
3725 impl_target_map,
3726 imports,
3727 &new_prefix,
3728 );
3729 }
3730 }
3731 }
3732
3733 "type_item" => {
3734 let name = node.child_by_field_name("name")
3736 .and_then(|n| n.utf8_text(source).ok())
3737 .unwrap_or("")
3738 .to_string();
3739 if name.is_empty() { return; }
3740
3741 let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
3742 let line = node.start_position().row + 1;
3743 let type_id = format!("class:{}:{}", path, full_name);
3744
3745 let signature = extract_rust_signature(node, source_str);
3746 let line_count = node.end_position().row - node.start_position().row + 1;
3747
3748 nodes.push(CodeNode {
3749 id: type_id.clone(),
3750 kind: NodeKind::Class,
3751 name: full_name.clone(),
3752 file_path: path.to_string(),
3753 line: Some(line),
3754 decorators: extract_rust_attributes(node, source),
3755 signature,
3756 docstring: None,
3757 line_count,
3758 is_test: false,
3759 });
3760
3761 edges.push(CodeEdge::defined_in(&type_id, file_id));
3762 class_id_map.insert(name, type_id);
3763 }
3764
3765 "const_item" | "static_item" => {
3766 let name = node.child_by_field_name("name")
3768 .and_then(|n| n.utf8_text(source).ok())
3769 .unwrap_or("")
3770 .to_string();
3771 if name.is_empty() || name.starts_with('_') { return; }
3772
3773 let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
3774 let line = node.start_position().row + 1;
3775 let const_id = format!("const:{}:{}", path, full_name);
3776
3777 let signature = extract_rust_signature(node, source_str);
3778
3779 nodes.push(CodeNode {
3780 id: const_id.clone(),
3781 kind: NodeKind::Class, name: full_name,
3783 file_path: path.to_string(),
3784 line: Some(line),
3785 decorators: extract_rust_attributes(node, source),
3786 signature,
3787 docstring: None,
3788 line_count: 1,
3789 is_test: false,
3790 });
3791
3792 edges.push(CodeEdge::defined_in(&const_id, file_id));
3793 }
3794
3795 "macro_definition" => {
3796 let name = node.child_by_field_name("name")
3798 .and_then(|n| n.utf8_text(source).ok())
3799 .unwrap_or("")
3800 .to_string();
3801 if name.is_empty() { return; }
3802
3803 let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
3804 let line = node.start_position().row + 1;
3805 let macro_id = format!("macro:{}:{}", path, full_name);
3806
3807 let line_count = node.end_position().row - node.start_position().row + 1;
3808
3809 nodes.push(CodeNode {
3810 id: macro_id.clone(),
3811 kind: NodeKind::Function, name: format!("{}!", full_name),
3813 file_path: path.to_string(),
3814 line: Some(line),
3815 decorators: vec!["macro".to_string()],
3816 signature: Some(format!("macro_rules! {}", name)),
3817 docstring: extract_rust_docstring(node, source_str),
3818 line_count,
3819 is_test: false,
3820 });
3821
3822 edges.push(CodeEdge::defined_in(¯o_id, file_id));
3823 }
3824
3825 _ => {}
3826 }
3827}
3828
3829fn extract_rust_method(
3831 node: tree_sitter::Node,
3832 source: &[u8],
3833 source_str: &str,
3834 path: &str,
3835 parent_id: &str,
3836 nodes: &mut Vec<CodeNode>,
3837 edges: &mut Vec<CodeEdge>,
3838) {
3839 let name = node.child_by_field_name("name")
3840 .and_then(|n| n.utf8_text(source).ok())
3841 .unwrap_or("")
3842 .to_string();
3843 if name.is_empty() { return; }
3844
3845 let line = node.start_position().row + 1;
3846 let method_id = format!("method:{}:{}", path, name);
3847
3848 let signature = extract_rust_signature(node, source_str);
3849 let docstring = extract_rust_docstring(node, source_str);
3850 let line_count = node.end_position().row - node.start_position().row + 1;
3851 let attrs = extract_rust_attributes(node, source);
3852 let is_test = path.contains("/tests/") || name.starts_with("test_") ||
3853 attrs.iter().any(|a| a.contains("test"));
3854
3855 nodes.push(CodeNode {
3856 id: method_id.clone(),
3857 kind: NodeKind::Function,
3858 name,
3859 file_path: path.to_string(),
3860 line: Some(line),
3861 decorators: attrs,
3862 signature,
3863 docstring,
3864 line_count,
3865 is_test,
3866 });
3867
3868 edges.push(CodeEdge {
3869 from: method_id,
3870 to: parent_id.to_string(),
3871 relation: EdgeRelation::DefinedIn,
3872 weight: 0.5,
3873 call_count: 1,
3874 in_error_path: false,
3875 confidence: 1.0,
3876 });
3877}
3878
3879fn extract_rust_attributes(node: tree_sitter::Node, source: &[u8]) -> Vec<String> {
3881 let mut attrs = Vec::new();
3882 if let Some(parent) = node.parent() {
3884 let mut cursor = parent.walk();
3885 let mut prev_was_attr = false;
3886 for child in parent.children(&mut cursor) {
3887 if child.kind() == "attribute_item" {
3888 if let Ok(attr_text) = child.utf8_text(source) {
3889 let clean = attr_text.trim_start_matches("#[").trim_end_matches(']');
3890 attrs.push(clean.to_string());
3891 }
3892 prev_was_attr = true;
3893 } else if child.id() == node.id() && prev_was_attr {
3894 break;
3895 } else {
3896 if prev_was_attr && child.kind() != "line_comment" {
3898 attrs.clear();
3899 }
3900 prev_was_attr = false;
3901 }
3902 }
3903 }
3904
3905 let mut cursor = node.walk();
3907 for child in node.children(&mut cursor) {
3908 if child.kind() == "attribute_item" {
3909 if let Ok(attr_text) = child.utf8_text(source) {
3910 let clean = attr_text.trim_start_matches("#[").trim_end_matches(']');
3911 attrs.push(clean.to_string());
3912 }
3913 }
3914 }
3915
3916 attrs
3917}
3918
3919fn extract_rust_signature(node: tree_sitter::Node, source_str: &str) -> Option<String> {
3921 let start = node.start_byte();
3922 if start >= source_str.len() { return None; }
3923
3924 let sig_text = &source_str[start..];
3925 let sig_end = sig_text.find(" {")
3927 .or_else(|| sig_text.find("\n{"))
3928 .or_else(|| sig_text.find(";\n"))
3929 .or_else(|| sig_text.find(';'))
3930 .unwrap_or(sig_text.len().min(200));
3931
3932 let sig = sig_text[..sig_end].trim();
3933 if sig.is_empty() { None } else { Some(sig.to_string()) }
3934}
3935
3936fn extract_rust_docstring(node: tree_sitter::Node, source_str: &str) -> Option<String> {
3938 let start_line = node.start_position().row;
3940 if start_line == 0 { return None; }
3941
3942 let lines: Vec<&str> = source_str.lines().collect();
3943 let mut doc_lines: Vec<&str> = Vec::new();
3944
3945 for i in (0..start_line).rev() {
3947 if i >= lines.len() { continue; }
3948 let line = lines[i].trim();
3949 if line.starts_with("///") {
3950 doc_lines.push(line.trim_start_matches("///").trim());
3951 } else if line.starts_with("//!") {
3952 doc_lines.push(line.trim_start_matches("//!").trim());
3953 } else if line.is_empty() || line.starts_with("#[") {
3954 continue;
3956 } else {
3957 break;
3958 }
3959 }
3960
3961 if doc_lines.is_empty() {
3962 return None;
3963 }
3964
3965 doc_lines.reverse();
3966 let first_line = doc_lines.first().copied().unwrap_or("");
3967 let truncated = if first_line.len() > 100 {
3968 &first_line[..100]
3969 } else {
3970 first_line
3971 };
3972
3973 if truncated.is_empty() { None } else { Some(truncated.to_string()) }
3974}
3975
3976fn extract_typescript_tree_sitter(
3981 path: &str,
3982 content: &str,
3983 parser: &mut Parser,
3984 class_id_map: &mut HashMap<String, String>,
3985 extension: &str,
3986) -> (Vec<CodeNode>, Vec<CodeEdge>, HashSet<String>) {
3987 let mut nodes = Vec::new();
3988 let mut edges = Vec::new();
3989 let mut imports = HashSet::new();
3990
3991 let lang_result = match extension {
3993 "tsx" => parser.set_language(&tree_sitter_typescript::LANGUAGE_TSX.into()),
3994 "ts" => parser.set_language(&tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
3995 "jsx" => parser.set_language(&tree_sitter_javascript::LANGUAGE.into()),
3996 _ => parser.set_language(&tree_sitter_javascript::LANGUAGE.into()), };
3998
3999 if lang_result.is_err() {
4000 return (nodes, edges, imports);
4001 }
4002
4003 let tree = match parser.parse(content, None) {
4004 Some(t) => t,
4005 None => return (nodes, edges, imports),
4006 };
4007
4008 let file_id = format!("file:{}", path);
4009 let source = content.as_bytes();
4010 let root = tree.root_node();
4011
4012 let mut cursor = root.walk();
4013 for child in root.children(&mut cursor) {
4014 extract_typescript_node(
4015 child,
4016 source,
4017 content,
4018 path,
4019 &file_id,
4020 &mut nodes,
4021 &mut edges,
4022 class_id_map,
4023 &mut imports,
4024 );
4025 }
4026
4027 (nodes, edges, imports)
4028}
4029
4030fn extract_typescript_node(
4032 node: tree_sitter::Node,
4033 source: &[u8],
4034 source_str: &str,
4035 path: &str,
4036 file_id: &str,
4037 nodes: &mut Vec<CodeNode>,
4038 edges: &mut Vec<CodeEdge>,
4039 class_id_map: &mut HashMap<String, String>,
4040 imports: &mut HashSet<String>,
4041) {
4042 let text = |n: tree_sitter::Node| -> String {
4043 n.utf8_text(source).unwrap_or("").to_string()
4044 };
4045
4046 match node.kind() {
4047 "import_statement" => {
4048 let import_text = text(node);
4050 if let Some(from_idx) = import_text.rfind(" from ") {
4051 let module_part = import_text[from_idx + 6..].trim();
4052 let module = module_part.trim_matches(|c| c == '\'' || c == '"' || c == ';');
4053 if module.starts_with('.') || module.starts_with("@/") {
4054 edges.push(CodeEdge {
4055 from: file_id.to_string(),
4056 to: format!("module_ref:{}", module),
4057 relation: EdgeRelation::Imports,
4058 weight: 0.5,
4059 call_count: 1,
4060 in_error_path: false,
4061 confidence: 1.0,
4062 });
4063 }
4064 imports.insert(module.to_string());
4065
4066 if let Some(start) = import_text.find('{') {
4068 if let Some(end) = import_text.find('}') {
4069 let names_part = &import_text[start+1..end];
4070 for name in names_part.split(',') {
4071 let clean = name.trim().split(" as ").next().unwrap_or("").trim();
4072 if !clean.is_empty() {
4073 imports.insert(clean.to_string());
4074 }
4075 }
4076 }
4077 }
4078 }
4079 }
4080
4081 "class_declaration" | "class" => {
4082 extract_typescript_class(node, source, source_str, path, file_id, nodes, edges, class_id_map);
4083 }
4084
4085 "abstract_class_declaration" => {
4086 extract_typescript_class(node, source, source_str, path, file_id, nodes, edges, class_id_map);
4087 }
4088
4089 "interface_declaration" => {
4090 let name = node.child_by_field_name("name")
4091 .and_then(|n| n.utf8_text(source).ok())
4092 .unwrap_or("")
4093 .to_string();
4094 if name.is_empty() { return; }
4095
4096 let line = node.start_position().row + 1;
4097 let interface_id = format!("class:{}:{}", path, name);
4098
4099 let signature = extract_typescript_signature(node, source_str);
4100 let line_count = node.end_position().row - node.start_position().row + 1;
4101
4102 nodes.push(CodeNode {
4103 id: interface_id.clone(),
4104 kind: NodeKind::Class,
4105 name: name.clone(),
4106 file_path: path.to_string(),
4107 line: Some(line),
4108 decorators: vec!["interface".to_string()],
4109 signature,
4110 docstring: extract_typescript_docstring(node, source_str),
4111 line_count,
4112 is_test: path.contains("/test") || name.contains("Test"),
4113 });
4114
4115 edges.push(CodeEdge::defined_in(&interface_id, file_id));
4116 class_id_map.insert(name, interface_id);
4117 }
4118
4119 "function_declaration" | "function" => {
4120 let name = node.child_by_field_name("name")
4121 .and_then(|n| n.utf8_text(source).ok())
4122 .unwrap_or("")
4123 .to_string();
4124 if name.is_empty() { return; }
4125
4126 let line = node.start_position().row + 1;
4127 let func_id = format!("func:{}:{}", path, name);
4128
4129 let signature = extract_typescript_signature(node, source_str);
4130 let docstring = extract_typescript_docstring(node, source_str);
4131 let line_count = node.end_position().row - node.start_position().row + 1;
4132 let decorators = extract_typescript_decorators(node, source);
4133
4134 nodes.push(CodeNode {
4135 id: func_id.clone(),
4136 kind: NodeKind::Function,
4137 name,
4138 file_path: path.to_string(),
4139 line: Some(line),
4140 decorators,
4141 signature,
4142 docstring,
4143 line_count,
4144 is_test: path.contains("/test") || path.contains(".test.") || path.contains(".spec."),
4145 });
4146
4147 edges.push(CodeEdge::defined_in(&func_id, file_id));
4148 }
4149
4150 "lexical_declaration" | "variable_declaration" => {
4151 let mut cursor = node.walk();
4153 for child in node.children(&mut cursor) {
4154 if child.kind() == "variable_declarator" {
4155 let name = child.child_by_field_name("name")
4156 .and_then(|n| n.utf8_text(source).ok())
4157 .unwrap_or("")
4158 .to_string();
4159
4160 if let Some(value) = child.child_by_field_name("value") {
4161 if value.kind() == "arrow_function" || value.kind() == "function" {
4162 if name.is_empty() { continue; }
4163
4164 let line = node.start_position().row + 1;
4165 let func_id = format!("func:{}:{}", path, name);
4166
4167 let signature = extract_typescript_signature(node, source_str);
4168 let line_count = node.end_position().row - node.start_position().row + 1;
4169
4170 nodes.push(CodeNode {
4171 id: func_id.clone(),
4172 kind: NodeKind::Function,
4173 name,
4174 file_path: path.to_string(),
4175 line: Some(line),
4176 decorators: Vec::new(),
4177 signature,
4178 docstring: extract_typescript_docstring(node, source_str),
4179 line_count,
4180 is_test: path.contains("/test") || path.contains(".test.") || path.contains(".spec."),
4181 });
4182
4183 edges.push(CodeEdge::defined_in(&func_id, file_id));
4184 }
4185 }
4186 }
4187 }
4188 }
4189
4190 "enum_declaration" => {
4191 let name = node.child_by_field_name("name")
4192 .and_then(|n| n.utf8_text(source).ok())
4193 .unwrap_or("")
4194 .to_string();
4195 if name.is_empty() { return; }
4196
4197 let line = node.start_position().row + 1;
4198 let enum_id = format!("class:{}:{}", path, name);
4199
4200 let signature = extract_typescript_signature(node, source_str);
4201 let line_count = node.end_position().row - node.start_position().row + 1;
4202
4203 nodes.push(CodeNode {
4204 id: enum_id.clone(),
4205 kind: NodeKind::Class,
4206 name: name.clone(),
4207 file_path: path.to_string(),
4208 line: Some(line),
4209 decorators: vec!["enum".to_string()],
4210 signature,
4211 docstring: extract_typescript_docstring(node, source_str),
4212 line_count,
4213 is_test: false,
4214 });
4215
4216 edges.push(CodeEdge::defined_in(&enum_id, file_id));
4217 class_id_map.insert(name, enum_id);
4218 }
4219
4220 "type_alias_declaration" => {
4221 let name = node.child_by_field_name("name")
4222 .and_then(|n| n.utf8_text(source).ok())
4223 .unwrap_or("")
4224 .to_string();
4225 if name.is_empty() { return; }
4226
4227 let line = node.start_position().row + 1;
4228 let type_id = format!("class:{}:{}", path, name);
4229
4230 let signature = extract_typescript_signature(node, source_str);
4231 let line_count = node.end_position().row - node.start_position().row + 1;
4232
4233 nodes.push(CodeNode {
4234 id: type_id.clone(),
4235 kind: NodeKind::Class,
4236 name: name.clone(),
4237 file_path: path.to_string(),
4238 line: Some(line),
4239 decorators: vec!["type".to_string()],
4240 signature,
4241 docstring: None,
4242 line_count,
4243 is_test: false,
4244 });
4245
4246 edges.push(CodeEdge::defined_in(&type_id, file_id));
4247 class_id_map.insert(name, type_id);
4248 }
4249
4250 "export_statement" => {
4251 let mut cursor = node.walk();
4253 for child in node.children(&mut cursor) {
4254 match child.kind() {
4255 "class_declaration" | "class" | "abstract_class_declaration" |
4256 "interface_declaration" | "function_declaration" | "function" |
4257 "lexical_declaration" | "variable_declaration" | "enum_declaration" |
4258 "type_alias_declaration" => {
4259 extract_typescript_node(child, source, source_str, path, file_id, nodes, edges, class_id_map, imports);
4260 }
4261 _ => {}
4262 }
4263 }
4264 }
4265
4266 "expression_statement" => {
4267 let mut cursor = node.walk();
4269 for child in node.children(&mut cursor) {
4270 extract_typescript_node(child, source, source_str, path, file_id, nodes, edges, class_id_map, imports);
4271 }
4272 }
4273
4274 "module" | "internal_module" | "namespace" => {
4275 let name = node.child_by_field_name("name")
4277 .and_then(|n| n.utf8_text(source).ok())
4278 .unwrap_or("")
4279 .to_string();
4280
4281 if !name.is_empty() {
4282 let line = node.start_position().row + 1;
4283 let module_id = format!("class:{}:{}", path, name);
4284
4285 nodes.push(CodeNode {
4286 id: module_id.clone(),
4287 kind: NodeKind::Class,
4288 name: name.clone(),
4289 file_path: path.to_string(),
4290 line: Some(line),
4291 decorators: vec!["namespace".to_string()],
4292 signature: Some(format!("namespace {}", name)),
4293 docstring: None,
4294 line_count: node.end_position().row - node.start_position().row + 1,
4295 is_test: false,
4296 });
4297
4298 edges.push(CodeEdge::defined_in(&module_id, file_id));
4299 }
4300
4301 if let Some(body) = node.child_by_field_name("body") {
4303 let mut body_cursor = body.walk();
4304 for body_child in body.children(&mut body_cursor) {
4305 extract_typescript_node(body_child, source, source_str, path, file_id, nodes, edges, class_id_map, imports);
4306 }
4307 }
4308 }
4309
4310 _ => {}
4311 }
4312}
4313
4314fn extract_typescript_class(
4316 node: tree_sitter::Node,
4317 source: &[u8],
4318 source_str: &str,
4319 path: &str,
4320 file_id: &str,
4321 nodes: &mut Vec<CodeNode>,
4322 edges: &mut Vec<CodeEdge>,
4323 class_id_map: &mut HashMap<String, String>,
4324) {
4325 let name = node.child_by_field_name("name")
4326 .and_then(|n| n.utf8_text(source).ok())
4327 .unwrap_or("")
4328 .to_string();
4329 if name.is_empty() { return; }
4330
4331 let line = node.start_position().row + 1;
4332 let class_id = format!("class:{}:{}", path, name);
4333
4334 let signature = extract_typescript_signature(node, source_str);
4335 let docstring = extract_typescript_docstring(node, source_str);
4336 let line_count = node.end_position().row - node.start_position().row + 1;
4337 let decorators = extract_typescript_decorators(node, source);
4338
4339 nodes.push(CodeNode {
4340 id: class_id.clone(),
4341 kind: NodeKind::Class,
4342 name: name.clone(),
4343 file_path: path.to_string(),
4344 line: Some(line),
4345 decorators,
4346 signature,
4347 docstring,
4348 line_count,
4349 is_test: path.contains("/test") || name.contains("Test"),
4350 });
4351
4352 edges.push(CodeEdge::defined_in(&class_id, file_id));
4353 class_id_map.insert(name.clone(), class_id.clone());
4354
4355 fn find_extends_identifier(node: tree_sitter::Node, source: &[u8]) -> Option<String> {
4357 let mut cursor = node.walk();
4358 for child in node.children(&mut cursor) {
4359 match child.kind() {
4360 "identifier" | "type_identifier" => {
4361 return child.utf8_text(source).ok().map(|s| s.to_string());
4362 }
4363 "extends_clause" | "class_heritage" | "extends_type_clause" => {
4364 if let Some(name) = find_extends_identifier(child, source) {
4365 return Some(name);
4366 }
4367 }
4368 _ => {}
4369 }
4370 }
4371 None
4372 }
4373
4374 let mut cursor = node.walk();
4375 for child in node.children(&mut cursor) {
4376 if child.kind() == "class_heritage" || child.kind() == "extends_clause" {
4377 if let Some(parent_name) = find_extends_identifier(child, source) {
4378 if !parent_name.is_empty() {
4379 edges.push(CodeEdge {
4380 from: class_id.clone(),
4381 to: format!("class_ref:{}", parent_name),
4382 relation: EdgeRelation::Inherits,
4383 weight: 0.5,
4384 call_count: 1,
4385 in_error_path: false,
4386 confidence: 1.0,
4387 });
4388 }
4389 }
4390 }
4391 }
4392
4393 if let Some(body) = node.child_by_field_name("body") {
4395 let mut body_cursor = body.walk();
4396 for body_child in body.children(&mut body_cursor) {
4397 match body_child.kind() {
4398 "method_definition" | "public_field_definition" | "method_signature" => {
4399 extract_typescript_method(body_child, source, source_str, path, &class_id, nodes, edges);
4400 }
4401 _ => {}
4402 }
4403 }
4404 }
4405}
4406
4407fn extract_typescript_method(
4409 node: tree_sitter::Node,
4410 source: &[u8],
4411 source_str: &str,
4412 path: &str,
4413 class_id: &str,
4414 nodes: &mut Vec<CodeNode>,
4415 edges: &mut Vec<CodeEdge>,
4416) {
4417 let mut name = node.child_by_field_name("name")
4418 .and_then(|n| n.utf8_text(source).ok())
4419 .unwrap_or("")
4420 .to_string();
4421
4422 if name.is_empty() {
4424 let mut cursor = node.walk();
4425 for child in node.children(&mut cursor) {
4426 if child.kind() == "property_identifier" || child.kind() == "identifier" {
4427 if let Ok(text) = child.utf8_text(source) {
4428 name = text.to_string();
4429 break;
4430 }
4431 }
4432 }
4433 }
4434
4435 if name.is_empty() { return; }
4436
4437 let line = node.start_position().row + 1;
4438 let method_id = format!("method:{}:{}", path, name);
4439
4440 let signature = extract_typescript_signature(node, source_str);
4441 let docstring = extract_typescript_docstring(node, source_str);
4442 let line_count = node.end_position().row - node.start_position().row + 1;
4443 let decorators = extract_typescript_decorators(node, source);
4444
4445 nodes.push(CodeNode {
4446 id: method_id.clone(),
4447 kind: NodeKind::Function,
4448 name,
4449 file_path: path.to_string(),
4450 line: Some(line),
4451 decorators,
4452 signature,
4453 docstring,
4454 line_count,
4455 is_test: path.contains("/test") || path.contains(".test.") || path.contains(".spec."),
4456 });
4457
4458 edges.push(CodeEdge {
4459 from: method_id,
4460 to: class_id.to_string(),
4461 relation: EdgeRelation::DefinedIn,
4462 weight: 0.5,
4463 call_count: 1,
4464 in_error_path: false,
4465 confidence: 1.0,
4466 });
4467}
4468
4469fn extract_typescript_decorators(node: tree_sitter::Node, source: &[u8]) -> Vec<String> {
4471 let mut decorators = Vec::new();
4472
4473 if let Some(parent) = node.parent() {
4475 let mut cursor = parent.walk();
4476 for child in parent.children(&mut cursor) {
4477 if child.kind() == "decorator" {
4478 if let Ok(dec_text) = child.utf8_text(source) {
4479 let name = dec_text.trim_start_matches('@');
4480 let name = name.split('(').next().unwrap_or(name).trim();
4481 if !name.is_empty() {
4482 decorators.push(name.to_string());
4483 }
4484 }
4485 }
4486 if child.id() == node.id() {
4487 break;
4488 }
4489 }
4490 }
4491
4492 decorators
4493}
4494
4495fn extract_typescript_signature(node: tree_sitter::Node, source_str: &str) -> Option<String> {
4497 let start = node.start_byte();
4498 if start >= source_str.len() { return None; }
4499
4500 let sig_text = &source_str[start..];
4501 let sig_end = sig_text.find(" {")
4503 .or_else(|| sig_text.find("\n{"))
4504 .or_else(|| sig_text.find("{\n"))
4505 .unwrap_or(sig_text.len().min(200));
4506
4507 let sig = sig_text[..sig_end].trim();
4508 if sig.is_empty() { None } else { Some(sig.to_string()) }
4509}
4510
4511fn extract_typescript_docstring(node: tree_sitter::Node, source_str: &str) -> Option<String> {
4513 let start_line = node.start_position().row;
4514 if start_line == 0 { return None; }
4515
4516 let lines: Vec<&str> = source_str.lines().collect();
4517
4518 for i in (0..start_line).rev() {
4520 if i >= lines.len() { continue; }
4521 let line = lines[i].trim();
4522
4523 if line.ends_with("*/") {
4524 let mut doc_lines: Vec<&str> = Vec::new();
4526 for j in (0..=i).rev() {
4527 if j >= lines.len() { continue; }
4528 let doc_line = lines[j].trim();
4529 if doc_line.starts_with("/**") {
4530 let first = doc_line.trim_start_matches("/**").trim_start_matches('*').trim();
4531 if !first.is_empty() && !first.starts_with('@') {
4532 doc_lines.push(first);
4533 }
4534 break;
4535 } else if doc_line.starts_with('*') {
4536 let content = doc_line.trim_start_matches('*').trim();
4537 if !content.is_empty() && !content.starts_with('@') {
4538 doc_lines.push(content);
4539 }
4540 }
4541 }
4542
4543 if doc_lines.is_empty() {
4544 return None;
4545 }
4546
4547 doc_lines.reverse();
4548 let first_line = doc_lines.first().copied().unwrap_or("");
4549 let truncated = if first_line.len() > 100 {
4550 &first_line[..100]
4551 } else {
4552 first_line
4553 };
4554
4555 return if truncated.is_empty() { None } else { Some(truncated.to_string()) };
4556 } else if line.is_empty() || line.starts_with('@') || line.starts_with("//") {
4557 continue;
4558 } else {
4559 break;
4560 }
4561 }
4562
4563 None
4564}
4565
4566#[allow(dead_code)]
4570fn extract_rust_regex(path: &str, content: &str) -> (Vec<CodeNode>, Vec<CodeEdge>, HashSet<String>) {
4571 let mut nodes = Vec::new();
4572 let mut edges = Vec::new();
4573
4574 let file_id = format!("file:{}", path);
4575
4576 let re_use = Regex::new(r"(?m)^use\s+([\w:]+)").unwrap();
4577 let re_struct = Regex::new(r"(?m)^(?:pub\s+)?struct\s+(\w+)").unwrap();
4578 let re_enum = Regex::new(r"(?m)^(?:pub\s+)?enum\s+(\w+)").unwrap();
4579 let re_impl = Regex::new(r"(?m)^impl(?:<[^>]+>)?\s+(?:(\w+)\s+for\s+)?(\w+)").unwrap();
4580 let re_fn = Regex::new(r"(?m)^\s*(?:pub\s+)?(?:async\s+)?fn\s+(\w+)").unwrap();
4581
4582 for cap in re_use.captures_iter(content) {
4583 let module = cap[1].to_string();
4584 if !module.starts_with("std::") && !module.starts_with("core::") {
4585 edges.push(CodeEdge::new(
4586 &file_id,
4587 &format!("module_ref:{}", module),
4588 EdgeRelation::Imports,
4589 ));
4590 }
4591 }
4592
4593 for cap in re_struct.captures_iter(content) {
4594 let name = cap[1].to_string();
4595 let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
4596 let node = CodeNode::new_class(path, &name, line);
4597 edges.push(CodeEdge::defined_in(&node.id, &file_id));
4598 nodes.push(node);
4599 }
4600
4601 for cap in re_enum.captures_iter(content) {
4602 let name = cap[1].to_string();
4603 let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
4604 let node = CodeNode::new_class(path, &name, line);
4605 edges.push(CodeEdge::defined_in(&node.id, &file_id));
4606 nodes.push(node);
4607 }
4608
4609 for cap in re_impl.captures_iter(content) {
4610 if let Some(trait_match) = cap.get(1) {
4611 let type_name = &cap[2];
4612 let trait_name = trait_match.as_str();
4613 if let Some(type_node) = nodes.iter().find(|n| n.name == type_name) {
4614 edges.push(CodeEdge::new(
4615 &type_node.id,
4616 &format!("class_ref:{}", trait_name),
4617 EdgeRelation::Inherits,
4618 ));
4619 }
4620 }
4621 }
4622
4623 for cap in re_fn.captures_iter(content) {
4624 let name = cap[1].to_string();
4625 let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
4626 let node = CodeNode::new_function(path, &name, line, false);
4627 edges.push(CodeEdge::defined_in(&node.id, &file_id));
4628 nodes.push(node);
4629 }
4630
4631 (nodes, edges, HashSet::new())
4632}
4633
4634#[allow(dead_code)]
4636fn extract_typescript_regex(path: &str, content: &str) -> (Vec<CodeNode>, Vec<CodeEdge>, HashSet<String>) {
4637 let mut nodes = Vec::new();
4638 let mut edges = Vec::new();
4639
4640 let file_id = format!("file:{}", path);
4641
4642 let re_import = Regex::new(r#"(?m)^import\s+.*?\s+from\s+['"]([^'"]+)['"]"#).unwrap();
4643 let re_class = Regex::new(r"(?m)^(?:export\s+)?(?:abstract\s+)?class\s+(\w+)(?:\s+extends\s+(\w+))?").unwrap();
4644 let re_interface = Regex::new(r"(?m)^(?:export\s+)?interface\s+(\w+)(?:\s+extends\s+(\w+))?").unwrap();
4645 let re_function = Regex::new(r"(?m)^(?:export\s+)?(?:async\s+)?function\s+(\w+)").unwrap();
4646 let re_arrow = Regex::new(r"(?m)^(?:export\s+)?(?:const|let)\s+(\w+)\s*=\s*(?:async\s+)?\([^)]*\)\s*=>").unwrap();
4647
4648 for cap in re_import.captures_iter(content) {
4649 let module = cap[1].to_string();
4650 if module.starts_with('.') || module.starts_with("@/") {
4651 edges.push(CodeEdge::new(
4652 &file_id,
4653 &format!("module_ref:{}", module),
4654 EdgeRelation::Imports,
4655 ));
4656 }
4657 }
4658
4659 for cap in re_class.captures_iter(content) {
4660 let name = cap[1].to_string();
4661 let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
4662 let node = CodeNode::new_class(path, &name, line);
4663 edges.push(CodeEdge::defined_in(&node.id, &file_id));
4664
4665 if let Some(parent) = cap.get(2) {
4666 edges.push(CodeEdge::new(
4667 &node.id,
4668 &format!("class_ref:{}", parent.as_str()),
4669 EdgeRelation::Inherits,
4670 ));
4671 }
4672
4673 nodes.push(node);
4674 }
4675
4676 for cap in re_interface.captures_iter(content) {
4677 let name = cap[1].to_string();
4678 let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
4679 let node = CodeNode::new_class(path, &name, line);
4680 edges.push(CodeEdge::defined_in(&node.id, &file_id));
4681 nodes.push(node);
4682 }
4683
4684 for cap in re_function.captures_iter(content) {
4685 let name = cap[1].to_string();
4686 let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
4687 let node = CodeNode::new_function(path, &name, line, false);
4688 edges.push(CodeEdge::defined_in(&node.id, &file_id));
4689 nodes.push(node);
4690 }
4691
4692 for cap in re_arrow.captures_iter(content) {
4693 let name = cap[1].to_string();
4694 let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
4695 let node = CodeNode::new_function(path, &name, line, false);
4696 edges.push(CodeEdge::defined_in(&node.id, &file_id));
4697 nodes.push(node);
4698 }
4699
4700 (nodes, edges, HashSet::new())
4701}
4702
4703fn is_python_builtin(name: &str) -> bool {
4706 matches!(
4707 name,
4708 "if" | "for"
4709 | "while"
4710 | "return"
4711 | "print"
4712 | "len"
4713 | "range"
4714 | "str"
4715 | "int"
4716 | "float"
4717 | "list"
4718 | "dict"
4719 | "set"
4720 | "tuple"
4721 | "type"
4722 | "isinstance"
4723 | "issubclass"
4724 | "super"
4725 | "hasattr"
4726 | "getattr"
4727 | "setattr"
4728 | "property"
4729 | "staticmethod"
4730 | "classmethod"
4731 | "enumerate"
4732 | "zip"
4733 | "map"
4734 | "filter"
4735 | "sorted"
4736 | "reversed"
4737 | "any"
4738 | "all"
4739 | "min"
4740 | "max"
4741 | "sum"
4742 | "abs"
4743 | "bool"
4744 | "repr"
4745 | "hash"
4746 | "id"
4747 | "open"
4748 | "format"
4749 | "not"
4750 | "and"
4751 | "or"
4752 | "bytes"
4753 | "bytearray"
4754 | "memoryview"
4755 | "object"
4756 | "complex"
4757 | "frozenset"
4758 | "iter"
4759 | "next"
4760 | "callable"
4761 | "delattr"
4762 | "dir"
4763 | "divmod"
4764 | "eval"
4765 | "exec"
4766 | "globals"
4767 | "hex"
4768 | "input"
4769 | "locals"
4770 | "oct"
4771 | "ord"
4772 | "pow"
4773 | "round"
4774 | "slice"
4775 | "vars"
4776 | "chr"
4777 | "bin"
4778 | "breakpoint"
4779 | "compile"
4780 | "__import__"
4781 | "ValueError"
4782 | "TypeError"
4783 | "KeyError"
4784 | "IndexError"
4785 | "AttributeError"
4786 | "RuntimeError"
4787 | "Exception"
4788 | "NotImplementedError"
4789 | "StopIteration"
4790 | "OSError"
4791 | "IOError"
4792 | "FileNotFoundError"
4793 | "ImportError"
4794 | "AssertionError"
4795 | "NameError"
4796 | "OverflowError"
4797 | "ZeroDivisionError"
4798 | "UnicodeError"
4799 | "SyntaxError"
4800 )
4801}
4802
4803fn is_stdlib(module: &str) -> bool {
4804 let stdlib_prefixes = [
4805 "os", "sys", "re", "json", "math", "io", "abc", "collections", "typing", "unittest",
4806 "pytest", "copy", "functools", "itertools", "pathlib", "shutil", "tempfile", "logging",
4807 "warnings", "inspect", "textwrap", "string", "datetime", "time", "hashlib", "base64",
4808 "pickle", "csv", "xml", "html", "http", "urllib", "socket", "threading",
4809 "multiprocessing", "subprocess", "contextlib", "enum", "dataclasses", "struct", "array",
4810 "queue", "heapq", "bisect", "decimal", "fractions", "random", "statistics", "operator",
4811 "pdb", "traceback", "dis", "ast", "token", "importlib", "pkgutil", "site", "zipimport",
4812 "numpy", "scipy", "matplotlib", "pandas", "setuptools", "pip", "wheel", "pkg_resources",
4813 "distutils",
4814 ];
4815
4816 let first_part = module.split('.').next().unwrap_or(module);
4817 stdlib_prefixes.contains(&first_part)
4818}
4819
4820#[cfg(test)]
4821mod tests {
4822 use super::*;
4823
4824 #[test]
4825 fn test_extract_python() {
4826 let content = r#"
4827import os
4828from pathlib import Path
4829
4830class MyClass(BaseClass):
4831 def method(self):
4832 pass
4833
4834def top_level():
4835 pass
4836"#;
4837 let mut parser = Parser::new();
4838 let language = tree_sitter_python::LANGUAGE;
4839 parser.set_language(&language.into()).unwrap();
4840 let mut class_map = HashMap::new();
4841
4842 let (nodes, edges, _) = extract_python_tree_sitter("test.py", content, &mut parser, &mut class_map);
4843
4844 assert!(nodes.iter().any(|n| n.name == "MyClass"));
4845 assert!(nodes.iter().any(|n| n.name == "method"));
4846 assert!(nodes.iter().any(|n| n.name == "top_level"));
4847 assert!(edges.iter().any(|e| e.to.contains("BaseClass")));
4848 }
4849
4850 #[test]
4851 fn test_extract_rust() {
4852 let content = r#"
4853use std::path::Path;
4854use crate::module;
4855
4856pub struct MyStruct {
4857 field: i32,
4858}
4859
4860impl MyTrait for MyStruct {
4861 fn method(&self) {}
4862}
4863
4864pub fn top_level() {}
4865"#;
4866 let mut parser = Parser::new();
4867 let mut class_map = HashMap::new();
4868 let (nodes, edges, _) = extract_rust_tree_sitter("test.rs", content, &mut parser, &mut class_map);
4869
4870 assert!(nodes.iter().any(|n| n.name == "MyStruct"), "Should find MyStruct");
4871 assert!(nodes.iter().any(|n| n.name == "method"), "Should find method");
4872 assert!(nodes.iter().any(|n| n.name == "top_level"), "Should find top_level");
4873 assert!(edges.iter().any(|e| e.to.contains("module")), "Should have module import edge");
4874
4875 assert!(edges.iter().any(|e| e.relation == EdgeRelation::Inherits && e.to.contains("MyTrait")),
4877 "Should capture trait impl inheritance");
4878 }
4879
4880 #[test]
4881 fn test_extract_rust_comprehensive() {
4882 let content = r#"
4883use crate::foo::bar;
4884
4885/// A documented struct
4886pub struct Person {
4887 name: String,
4888 age: u32,
4889}
4890
4891/// A documented enum
4892pub enum Status {
4893 Active,
4894 Inactive,
4895}
4896
4897/// A trait
4898pub trait Greeter {
4899 fn greet(&self) -> String;
4900}
4901
4902impl Greeter for Person {
4903 fn greet(&self) -> String {
4904 format!("Hello, {}", self.name)
4905 }
4906}
4907
4908impl Person {
4909 pub fn new(name: String) -> Self {
4910 Self { name, age: 0 }
4911 }
4912
4913 pub fn birthday(&mut self) {
4914 self.age += 1;
4915 }
4916}
4917
4918mod inner {
4919 pub fn nested_fn() {}
4920}
4921
4922type MyAlias = Vec<String>;
4923
4924pub fn standalone() {}
4925
4926#[test]
4927fn test_something() {}
4928"#;
4929 let mut parser = Parser::new();
4930 let mut class_map = HashMap::new();
4931 let (nodes, edges, _) = extract_rust_tree_sitter("test.rs", content, &mut parser, &mut class_map);
4932
4933 assert!(nodes.iter().any(|n| n.name == "Person"), "Should find Person struct");
4935 assert!(nodes.iter().any(|n| n.name == "Status"), "Should find Status enum");
4936
4937 assert!(nodes.iter().any(|n| n.name == "Greeter"), "Should find Greeter trait");
4939
4940 assert!(nodes.iter().any(|n| n.name == "greet"), "Should find greet method");
4942 assert!(nodes.iter().any(|n| n.name == "new"), "Should find new method");
4943 assert!(nodes.iter().any(|n| n.name == "birthday"), "Should find birthday method");
4944
4945 assert!(nodes.iter().any(|n| n.name.contains("nested_fn")), "Should find nested_fn");
4947
4948 assert!(nodes.iter().any(|n| n.name == "MyAlias"), "Should find type alias");
4950
4951 assert!(nodes.iter().any(|n| n.name == "standalone"), "Should find standalone fn");
4953
4954 let test_node = nodes.iter().find(|n| n.name == "test_something");
4956 assert!(test_node.is_some(), "Should find test function");
4957 assert!(test_node.unwrap().is_test, "Test function should be marked as test");
4958
4959 let greet_edges: Vec<_> = edges.iter()
4961 .filter(|e| e.from.contains("greet") && e.relation == EdgeRelation::DefinedIn)
4962 .collect();
4963 assert!(!greet_edges.is_empty(), "greet should have DefinedIn edge");
4964 }
4965
4966 #[test]
4967 fn test_extract_typescript() {
4968 let content = r#"
4969import { Component } from './component';
4970
4971export class MyClass extends BaseClass {
4972 method(): void {}
4973}
4974
4975export function topLevel(): void {}
4976
4977export const arrowFn = () => {};
4978"#;
4979 let mut parser = Parser::new();
4980 let mut class_map = HashMap::new();
4981 let (nodes, edges, _) = extract_typescript_tree_sitter("test.ts", content, &mut parser, &mut class_map, "ts");
4982
4983 assert!(nodes.iter().any(|n| n.name == "MyClass"), "Should find MyClass");
4984 assert!(nodes.iter().any(|n| n.name == "topLevel"), "Should find topLevel");
4985 assert!(nodes.iter().any(|n| n.name == "arrowFn"), "Should find arrowFn");
4986 assert!(edges.iter().any(|e| e.to.contains("component")), "Should have component import");
4987
4988 assert!(nodes.iter().any(|n| n.name == "method"), "Should find method inside class");
4990
4991 assert!(edges.iter().any(|e| e.relation == EdgeRelation::Inherits && e.to.contains("BaseClass")),
4993 "Should capture class inheritance");
4994 }
4995
4996 #[test]
4997 fn test_extract_typescript_comprehensive() {
4998 let content = r#"
4999import { Injectable } from '@angular/core';
5000import type { User } from './types';
5001
5002/**
5003 * A service class
5004 */
5005@Injectable()
5006export class UserService {
5007 private users: User[] = [];
5008
5009 /**
5010 * Get all users
5011 */
5012 getUsers(): User[] {
5013 return this.users;
5014 }
5015
5016 addUser(user: User): void {
5017 this.users.push(user);
5018 }
5019}
5020
5021export interface IRepository<T> {
5022 find(id: string): T | undefined;
5023 save(item: T): void;
5024}
5025
5026export type UserId = string;
5027
5028export enum UserRole {
5029 Admin = 'admin',
5030 User = 'user',
5031}
5032
5033export function createUser(name: string): User {
5034 return { name };
5035}
5036
5037export const fetchUser = async (id: string) => {
5038 return null;
5039};
5040
5041export default class DefaultExport {}
5042
5043namespace MyNamespace {
5044 export function innerFn() {}
5045}
5046"#;
5047 let mut parser = Parser::new();
5048 let mut class_map = HashMap::new();
5049 let (nodes, edges, _) = extract_typescript_tree_sitter("test.ts", content, &mut parser, &mut class_map, "ts");
5050
5051 assert!(nodes.iter().any(|n| n.name == "UserService"), "Should find UserService class");
5053 assert!(nodes.iter().any(|n| n.name == "DefaultExport"), "Should find default export class");
5054
5055 assert!(nodes.iter().any(|n| n.name == "getUsers"), "Should find getUsers method");
5057 assert!(nodes.iter().any(|n| n.name == "addUser"), "Should find addUser method");
5058
5059 assert!(nodes.iter().any(|n| n.name == "IRepository"), "Should find interface");
5061
5062 assert!(nodes.iter().any(|n| n.name == "UserId"), "Should find type alias");
5064
5065 assert!(nodes.iter().any(|n| n.name == "UserRole"), "Should find enum");
5067
5068 assert!(nodes.iter().any(|n| n.name == "createUser"), "Should find function");
5070
5071 assert!(nodes.iter().any(|n| n.name == "fetchUser"), "Should find arrow function");
5073
5074 assert!(nodes.iter().any(|n| n.name == "MyNamespace"), "Should find namespace");
5076
5077 assert!(edges.iter().any(|e| e.relation == EdgeRelation::Imports), "Should have import edges");
5079 }
5080}