use std::collections::{BinaryHeap, HashMap, HashSet, VecDeque};
use std::path::Path;
use serde::{Deserialize, Serialize};
use regex::Regex;
use walkdir::WalkDir;
use tree_sitter::Parser;
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct CodeGraph {
pub nodes: Vec<CodeNode>,
pub edges: Vec<CodeEdge>,
#[serde(skip)]
pub outgoing: HashMap<String, Vec<usize>>,
#[serde(skip)]
pub incoming: HashMap<String, Vec<usize>>,
#[serde(skip)]
pub node_index: HashMap<String, usize>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CodeNode {
pub id: String,
pub kind: NodeKind,
pub name: String,
pub file_path: String,
pub line: Option<usize>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub decorators: Vec<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub signature: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub docstring: Option<String>,
#[serde(default)]
pub line_count: usize,
#[serde(default)]
pub is_test: bool,
}
impl CodeNode {
pub fn new_file(path: &str) -> Self {
Self {
id: format!("file:{}", path),
kind: NodeKind::File,
name: path.rsplit('/').next().unwrap_or(path).to_string(),
file_path: path.to_string(),
line: None,
decorators: Vec::new(),
signature: None,
docstring: None,
line_count: 0,
is_test: path.contains("/test") || path.contains("_test."),
}
}
pub fn new_class(path: &str, name: &str, line: usize) -> Self {
Self {
id: format!("class:{}:{}", path, name),
kind: NodeKind::Class,
name: name.to_string(),
file_path: path.to_string(),
line: Some(line),
decorators: Vec::new(),
signature: None,
docstring: None,
line_count: 0,
is_test: name.starts_with("Test") || path.contains("/test"),
}
}
pub fn new_function(path: &str, name: &str, line: usize, is_method: bool) -> Self {
let prefix = if is_method { "method" } else { "func" };
Self {
id: format!("{}:{}:{}", prefix, path, name),
kind: NodeKind::Function,
name: name.to_string(),
file_path: path.to_string(),
line: Some(line),
decorators: Vec::new(),
signature: None,
docstring: None,
line_count: 0,
is_test: name.starts_with("test_") || name.starts_with("Test") || path.contains("/test"),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum NodeKind {
File,
Class,
Function,
Module,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CodeEdge {
pub from: String,
pub to: String,
pub relation: EdgeRelation,
#[serde(default)]
pub weight: f32,
#[serde(default)]
pub call_count: u32,
#[serde(default)]
pub in_error_path: bool,
#[serde(default)]
pub confidence: f32,
}
impl CodeEdge {
pub fn new(from: &str, to: &str, relation: EdgeRelation) -> Self {
Self {
from: from.to_string(),
to: to.to_string(),
relation,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 1.0,
}
}
pub fn imports(from: &str, to: &str) -> Self {
Self::new(from, to, EdgeRelation::Imports)
}
pub fn calls(from: &str, to: &str) -> Self {
Self::new(from, to, EdgeRelation::Calls)
}
pub fn inherits(from: &str, to: &str) -> Self {
Self::new(from, to, EdgeRelation::Inherits)
}
pub fn defined_in(from: &str, to: &str) -> Self {
Self::new(from, to, EdgeRelation::DefinedIn)
}
pub fn compute_weight(&mut self) {
if self.relation == EdgeRelation::Calls {
let count_norm = (self.call_count as f32 / 10.0).min(1.0);
let error_factor = if self.in_error_path { 0.8 } else { 0.5 };
self.weight = 0.4 * count_norm + 0.3 * error_factor + 0.3 * self.confidence;
} else {
self.weight = 0.7; }
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum EdgeRelation {
Imports,
Inherits,
DefinedIn,
Calls,
TestsFor,
Overrides,
}
impl std::fmt::Display for EdgeRelation {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
EdgeRelation::Imports => write!(f, "imports"),
EdgeRelation::Inherits => write!(f, "inherits"),
EdgeRelation::DefinedIn => write!(f, "defined_in"),
EdgeRelation::Calls => write!(f, "calls"),
EdgeRelation::TestsFor => write!(f, "tests_for"),
EdgeRelation::Overrides => write!(f, "overrides"),
}
}
}
#[derive(Debug)]
pub struct ImpactReport<'a> {
pub affected_source: Vec<&'a CodeNode>,
pub affected_tests: Vec<&'a CodeNode>,
}
#[derive(Debug, Clone)]
pub struct CausalChain {
pub symptom_node_id: String,
pub chain: Vec<ChainNode>,
}
#[derive(Debug, Clone)]
pub struct ChainNode {
pub node_id: String,
pub node_name: String,
pub file_path: String,
pub line: Option<usize>,
pub edge_to_next: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Language {
Rust,
TypeScript,
Python,
Unknown,
}
impl Language {
pub fn from_path(path: &Path) -> Self {
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
match ext {
"rs" => Language::Rust,
"ts" | "tsx" => Language::TypeScript,
"js" | "jsx" => Language::TypeScript, "py" => Language::Python,
_ => Language::Unknown,
}
}
}
impl CodeGraph {
pub fn extract_cached(repo_dir: &Path, repo_name: &str, base_commit: &str) -> Self {
let cache_dir = repo_dir.parent().unwrap_or(repo_dir).join(".graph-cache");
let _ = std::fs::create_dir_all(&cache_dir);
let safe_repo = repo_name.replace('/', "__");
let short_commit = &base_commit[..base_commit.len().min(8)];
let cache_file = cache_dir.join(format!("{}__{}.json", safe_repo, short_commit));
if cache_file.exists() {
if let Ok(data) = std::fs::read_to_string(&cache_file) {
if let Ok(mut graph) = serde_json::from_str::<CodeGraph>(&data) {
graph.build_indexes();
tracing::info!(
"Loaded code graph from cache: {} ({} nodes, {} edges)",
cache_file.display(),
graph.nodes.len(),
graph.edges.len()
);
return graph;
}
}
let _ = std::fs::remove_file(&cache_file);
}
let graph = Self::extract_from_dir(repo_dir);
if let Ok(json) = serde_json::to_string(&graph) {
let _ = std::fs::write(&cache_file, json);
tracing::info!(
"Saved code graph to cache: {} ({} nodes, {} edges)",
cache_file.display(),
graph.nodes.len(),
graph.edges.len()
);
}
graph
}
pub fn extract_from_dir(dir: &Path) -> Self {
let mut nodes = Vec::new();
let mut edges = Vec::new();
let mut class_map: HashMap<String, String> = HashMap::new();
let mut func_map: HashMap<String, Vec<String>> = HashMap::new();
let mut module_map: HashMap<String, String> = HashMap::new();
let mut method_to_class: HashMap<String, String> = HashMap::new();
let mut class_methods: HashMap<String, Vec<String>> = HashMap::new();
let mut class_parents: HashMap<String, Vec<String>> = HashMap::new();
let mut file_imported_names: HashMap<String, HashSet<String>> = HashMap::new();
let mut file_entries: Vec<(String, String, Language)> = Vec::new();
for entry in WalkDir::new(dir)
.follow_links(false)
.max_depth(20)
.into_iter()
.filter_entry(|e| {
let name = e.file_name().to_str().unwrap_or("");
!name.starts_with('.')
&& name != "node_modules"
&& name != "__pycache__"
&& name != "target"
&& name != "build"
&& name != "dist"
&& name != ".git"
&& name != ".eggs"
&& name != ".tox"
})
{
let entry = match entry {
Ok(e) => e,
Err(_) => continue,
};
if !entry.file_type().is_file() {
continue;
}
let path = entry.path();
let lang = Language::from_path(path);
if lang == Language::Unknown {
continue;
}
let rel_path = path
.strip_prefix(dir)
.unwrap_or(path)
.to_string_lossy()
.to_string();
if rel_path == "setup.py" || rel_path == "conftest.py" || rel_path.contains("__pycache__") {
continue;
}
let content = match std::fs::read_to_string(path) {
Ok(c) => c,
Err(_) => continue,
};
let module_path = rel_path
.replace('/', ".")
.trim_end_matches(".py")
.trim_end_matches(".rs")
.trim_end_matches(".ts")
.trim_end_matches(".tsx")
.trim_end_matches(".js")
.trim_end_matches(".jsx")
.to_string();
let file_id = format!("file:{}", rel_path);
module_map.insert(module_path.clone(), file_id.clone());
let parts: Vec<&str> = module_path.split('.').collect();
for start in 1..parts.len() {
let partial = parts[start..].join(".");
module_map.entry(partial).or_insert_with(|| file_id.clone());
}
file_entries.push((rel_path, content, lang));
}
let mut parser = Parser::new();
let python_language = tree_sitter_python::LANGUAGE;
parser.set_language(&python_language.into()).ok();
for (rel_path, content, lang) in &file_entries {
let _file_id = format!("file:{}", rel_path);
let (file_nodes, file_edges, imports) = match lang {
Language::Python => {
extract_python_tree_sitter(
rel_path,
content,
&mut parser,
&mut class_map,
)
}
Language::Rust => {
extract_rust_tree_sitter(
rel_path,
content,
&mut parser,
&mut class_map,
)
}
Language::TypeScript => {
let ext = rel_path.rsplit('.').next().unwrap_or("ts");
extract_typescript_tree_sitter(
rel_path,
content,
&mut parser,
&mut class_map,
ext,
)
}
Language::Unknown => continue,
};
for node in &file_nodes {
if node.kind == NodeKind::Class {
class_map.insert(node.name.clone(), node.id.clone());
} else if node.kind == NodeKind::Function {
func_map
.entry(node.name.clone())
.or_default()
.push(node.id.clone());
}
}
for edge in &file_edges {
if edge.relation == EdgeRelation::DefinedIn {
if edge.from.starts_with("method:") && edge.to.starts_with("class:") {
method_to_class.insert(edge.from.clone(), edge.to.clone());
class_methods
.entry(edge.to.clone())
.or_default()
.push(edge.from.clone());
}
}
if edge.relation == EdgeRelation::Inherits {
if let Some(parent_id) = class_map.get(
edge.to.strip_prefix("class_ref:").unwrap_or(&edge.to),
) {
class_parents
.entry(edge.from.clone())
.or_default()
.push(parent_id.clone());
}
}
}
if !imports.is_empty() {
file_imported_names.insert(rel_path.clone(), imports);
}
if !file_nodes.is_empty() {
nodes.push(CodeNode::new_file(rel_path));
}
nodes.extend(file_nodes);
edges.extend(file_edges);
}
let class_init_map: HashMap<String, Vec<(String, String)>> = {
let mut map: HashMap<String, Vec<(String, String)>> = HashMap::new();
for node in &nodes {
if node.kind == NodeKind::Function && node.name == "__init__" && !node.is_test {
if let Some(class_id) = method_to_class.get(&node.id) {
if let Some(class_name) = class_id.rsplit(':').next() {
map.entry(class_name.to_string())
.or_default()
.push((node.file_path.clone(), node.id.clone()));
}
}
}
}
map
};
let node_pkg_map: HashMap<String, String> = nodes
.iter()
.map(|n| {
let pkg = n.file_path.rsplitn(2, '/').nth(1).unwrap_or("").to_string();
(n.id.clone(), pkg)
})
.collect();
for (rel_path, content, lang) in &file_entries {
if *lang != Language::Python {
continue;
}
let file_func_ids: HashSet<String> = nodes
.iter()
.filter(|n| n.file_path == *rel_path && n.kind == NodeKind::Function)
.map(|n| n.id.clone())
.collect();
let package_dir = rel_path.rsplitn(2, '/').nth(1).unwrap_or("");
if let Some(tree) = parser.parse(content, None) {
let source = content.as_bytes();
let root = tree.root_node();
extract_calls_from_tree(
root,
source,
rel_path,
&func_map,
&method_to_class,
&class_parents,
&file_func_ids,
&file_imported_names,
package_dir,
&class_init_map,
&node_pkg_map,
&mut edges,
);
}
let is_test_file = rel_path.contains("/tests/") || rel_path.contains("/test_");
if is_test_file {
let file_id = format!("file:{}", rel_path);
let re_from_import = Regex::new(r"^from\s+([\w.]+)\s+import").unwrap();
for line in content.lines() {
if let Some(cap) = re_from_import.captures(line) {
let module = cap[1].to_string();
if let Some(source_file_id) = module_map.get(&module) {
edges.push(CodeEdge {
from: file_id.clone(),
to: source_file_id.clone(),
relation: EdgeRelation::TestsFor,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 1.0,
});
}
}
}
}
}
let mut resolved_edges = Vec::new();
for edge in edges {
if edge.to.starts_with("class_ref:") {
let class_name = &edge.to["class_ref:".len()..];
if let Some(class_id) = class_map.get(class_name) {
resolved_edges.push(CodeEdge {
from: edge.from,
to: class_id.clone(),
relation: edge.relation,
weight: edge.weight,
call_count: edge.call_count,
in_error_path: edge.in_error_path,
confidence: edge.confidence,
});
}
} else if edge.to.starts_with("module_ref:") {
let module = &edge.to["module_ref:".len()..];
if let Some(file_id) = module_map.get(module) {
resolved_edges.push(CodeEdge {
from: edge.from,
to: file_id.clone(),
relation: edge.relation,
weight: edge.weight,
call_count: edge.call_count,
in_error_path: edge.in_error_path,
confidence: edge.confidence,
});
}
} else if edge.to.starts_with("func_ref:") {
let func_name = &edge.to["func_ref:".len()..];
if let Some(func_ids) = func_map.get(func_name) {
if let Some(func_id) = func_ids.first() {
resolved_edges.push(CodeEdge {
from: edge.from,
to: func_id.clone(),
relation: edge.relation,
weight: edge.weight,
call_count: edge.call_count,
in_error_path: edge.in_error_path,
confidence: edge.confidence,
});
}
}
} else {
resolved_edges.push(edge);
}
}
let mut edge_map: HashMap<(String, String), CodeEdge> = HashMap::new();
let mut other_edges: Vec<CodeEdge> = Vec::new();
for edge in resolved_edges {
if edge.relation == EdgeRelation::Calls {
let key = (edge.from.clone(), edge.to.clone());
let entry = edge_map.entry(key).or_insert_with(|| {
let mut e = edge.clone();
e.call_count = 0;
e
});
entry.call_count += 1;
if edge.confidence > entry.confidence {
entry.confidence = edge.confidence;
}
if edge.in_error_path {
entry.in_error_path = true;
}
} else {
other_edges.push(edge);
}
}
let mut final_edges: Vec<CodeEdge> = edge_map.into_values().collect();
final_edges.extend(other_edges);
for edge in &mut final_edges {
edge.compute_weight();
}
add_override_edges(&nodes, &mut final_edges);
let mut graph = CodeGraph {
nodes,
edges: final_edges,
outgoing: HashMap::new(),
incoming: HashMap::new(),
node_index: HashMap::new(),
};
graph.build_indexes();
graph
}
pub fn build_indexes(&mut self) {
self.node_index.clear();
self.outgoing.clear();
self.incoming.clear();
for (i, node) in self.nodes.iter().enumerate() {
self.node_index.insert(node.id.clone(), i);
}
for (i, edge) in self.edges.iter().enumerate() {
self.outgoing.entry(edge.from.clone()).or_default().push(i);
self.incoming.entry(edge.to.clone()).or_default().push(i);
}
}
#[inline]
pub fn outgoing_edges(&self, node_id: &str) -> impl Iterator<Item = &CodeEdge> {
self.outgoing
.get(node_id)
.map(|indices| indices.as_slice())
.unwrap_or(&[])
.iter()
.map(move |&i| &self.edges[i])
}
#[inline]
pub fn incoming_edges(&self, node_id: &str) -> impl Iterator<Item = &CodeEdge> {
self.incoming
.get(node_id)
.map(|indices| indices.as_slice())
.unwrap_or(&[])
.iter()
.map(move |&i| &self.edges[i])
}
#[inline]
pub fn node_by_id(&self, node_id: &str) -> Option<&CodeNode> {
self.node_index.get(node_id).map(|&i| &self.nodes[i])
}
pub fn get_callers(&self, node_id: &str) -> Vec<&CodeNode> {
self.incoming_edges(node_id)
.filter(|e| e.relation == EdgeRelation::Calls)
.filter_map(|e| self.node_by_id(&e.from))
.collect()
}
pub fn get_callees(&self, node_id: &str) -> Vec<&CodeNode> {
self.outgoing_edges(node_id)
.filter(|e| e.relation == EdgeRelation::Calls)
.filter_map(|e| self.node_by_id(&e.to))
.collect()
}
pub fn get_dependencies(&self, node_id: &str) -> Vec<&CodeNode> {
self.outgoing_edges(node_id)
.filter_map(|e| self.node_by_id(&e.to))
.collect()
}
pub fn get_impact(&self, node_id: &str) -> Vec<&CodeNode> {
let mut impacted = Vec::new();
let mut visited = HashSet::new();
self.collect_dependents(node_id, &mut impacted, &mut visited);
impacted
}
fn collect_dependents<'a>(
&'a self,
node_id: &str,
result: &mut Vec<&'a CodeNode>,
visited: &mut HashSet<String>,
) {
if !visited.insert(node_id.to_string()) {
return;
}
for edge in self.incoming_edges(node_id) {
if let Some(node) = self.node_by_id(&edge.from) {
result.push(node);
self.collect_dependents(&edge.from, result, visited);
}
}
}
pub fn find_relevant_nodes(&self, keywords: &[&str]) -> Vec<&CodeNode> {
let mut scored: Vec<(usize, &CodeNode)> = self
.nodes
.iter()
.map(|n| {
let score: usize = keywords
.iter()
.filter(|kw| {
let kw_lower = kw.to_lowercase();
let name_lower = n.name.to_lowercase();
let path_lower = n.file_path.to_lowercase();
name_lower.contains(&kw_lower)
|| path_lower.contains(&kw_lower)
|| (name_lower.len() >= 5
&& kw_lower.contains(name_lower.trim_start_matches('_')))
})
.count();
(score, n)
})
.filter(|(score, _)| *score > 0)
.collect();
scored.sort_by(|a, b| b.0.cmp(&a.0));
let mut results: Vec<&CodeNode> = scored.into_iter().map(|(_, n)| n).collect();
let relevant_files: HashSet<String> = results.iter().map(|n| n.file_path.clone()).collect();
for node in &self.nodes {
if relevant_files.contains(&node.file_path) && !results.iter().any(|r| r.id == node.id) {
results.push(node);
}
}
let mut inheritance_additions: Vec<&CodeNode> = Vec::new();
let result_ids: HashSet<String> = results.iter().map(|n| n.id.clone()).collect();
for node in &results {
if node.kind == NodeKind::Class {
let chain = self.get_inheritance_chain(&node.id);
for ancestor_id in &chain {
if !result_ids.contains(ancestor_id) {
if let Some(ancestor) = self.node_by_id(ancestor_id) {
inheritance_additions.push(ancestor);
}
}
}
for edge in self.incoming_edges(&node.id) {
if edge.relation == EdgeRelation::Inherits && !result_ids.contains(&edge.from) {
if let Some(child) = self.node_by_id(&edge.from) {
inheritance_additions.push(child);
}
}
}
}
}
let mut extra_files: HashSet<String> = HashSet::new();
for node in &inheritance_additions {
if !results.iter().any(|r| r.id == node.id) {
extra_files.insert(node.file_path.clone());
results.push(node);
}
}
for node in &self.nodes {
if extra_files.contains(&node.file_path) && !results.iter().any(|r| r.id == node.id) {
results.push(node);
}
}
let mut import_additions: Vec<&CodeNode> = Vec::new();
let current_ids: HashSet<String> = results.iter().map(|n| n.id.clone()).collect();
for node in &results {
if node.kind == NodeKind::File {
for edge in self.outgoing_edges(&node.id) {
if edge.relation == EdgeRelation::Imports {
if !current_ids.contains(&edge.to) {
if let Some(imported) = self.node_by_id(&edge.to) {
import_additions.push(imported);
}
}
}
}
}
}
for node in &import_additions {
if node.kind == NodeKind::File {
let has_keyword_match = self
.nodes
.iter()
.filter(|n| n.file_path == node.file_path && n.kind != NodeKind::File)
.any(|n| {
let name_lower = n.name.to_lowercase();
keywords.iter().any(|kw| {
let kw_lower = kw.to_lowercase();
name_lower.contains(&kw_lower) || kw_lower.contains(&name_lower)
})
});
if has_keyword_match && !results.iter().any(|r| r.id == node.id) {
results.push(node);
for entity in &self.nodes {
if entity.file_path == node.file_path
&& !results.iter().any(|r| r.id == entity.id)
{
results.push(entity);
}
}
}
}
}
results
}
pub fn impact_analysis(&self, changed_node_ids: &[&str]) -> ImpactReport<'_> {
let mut affected_nodes = Vec::new();
let mut affected_tests = Vec::new();
let mut seen = HashSet::new();
for node_id in changed_node_ids {
let impacted = self.get_impact(node_id);
for node in impacted {
if seen.insert(node.id.clone()) {
if node.file_path.contains("/tests/") || node.file_path.contains("/test_") {
affected_tests.push(node);
} else {
affected_nodes.push(node);
}
}
}
}
let related_tests = self.find_related_tests(changed_node_ids);
for test in related_tests {
if seen.insert(test.id.clone()) {
affected_tests.push(test);
}
}
ImpactReport {
affected_source: affected_nodes,
affected_tests,
}
}
pub fn find_related_tests(&self, source_node_ids: &[&str]) -> Vec<&CodeNode> {
let mut test_nodes = Vec::new();
let mut seen = HashSet::new();
let source_files: HashSet<String> = source_node_ids
.iter()
.filter_map(|id| self.node_by_id(id))
.map(|n| n.file_path.clone())
.collect();
let source_file_ids: HashSet<String> = source_files.iter().map(|f| format!("file:{}", f)).collect();
for source_fid in &source_file_ids {
for edge in self.incoming_edges(source_fid.as_str()) {
if edge.relation == EdgeRelation::TestsFor {
if let Some(test_node) = self.node_by_id(&edge.from) {
if seen.insert(test_node.id.clone()) {
test_nodes.push(test_node);
}
for node in &self.nodes {
if node.file_path == test_node.file_path
&& node.kind != NodeKind::File
&& seen.insert(node.id.clone())
{
test_nodes.push(node);
}
}
}
}
}
}
for source_id in source_node_ids.iter() {
for edge in self.incoming_edges(source_id) {
if edge.relation == EdgeRelation::Calls {
if let Some(caller) = self.node_by_id(&edge.from) {
if caller.file_path.contains("/tests/") || caller.file_path.contains("/test_") {
if seen.insert(caller.id.clone()) {
test_nodes.push(caller);
}
}
}
}
}
}
test_nodes
}
pub fn format_impact_for_llm(&self, changed_node_ids: &[&str], repo_dir: &Path) -> String {
let report = self.impact_analysis(changed_node_ids);
let mut result = String::new();
if !report.affected_source.is_empty() {
result.push_str("**⚠️ Impact Analysis — Code affected by your change:**\n");
for node in &report.affected_source {
let prefix = match node.kind {
NodeKind::File => "📄",
NodeKind::Class => "🔷",
NodeKind::Function => "🔹",
NodeKind::Module => "📦",
};
result.push_str(&format!("{} {} (`{}`)\n", prefix, node.name, node.file_path));
}
result.push('\n');
}
if !report.affected_tests.is_empty() {
result.push_str("**🧪 Tests that exercise the code you're changing:**\n");
result.push_str("DO NOT break these tests! Make minimal changes.\n\n");
let mut test_files: HashSet<String> = HashSet::new();
for node in &report.affected_tests {
test_files.insert(node.file_path.clone());
}
for test_file in &test_files {
result.push_str(&format!("📋 `{}`\n", test_file));
let funcs: Vec<&str> = report
.affected_tests
.iter()
.filter(|n| n.file_path == *test_file && n.kind == NodeKind::Function)
.map(|n| n.name.as_str())
.collect();
if !funcs.is_empty() {
for func in funcs.iter().take(10) {
result.push_str(&format!(" - {}\n", func));
}
if funcs.len() > 10 {
result.push_str(&format!(" ... and {} more\n", funcs.len() - 10));
}
}
}
result.push('\n');
let test_nodes_refs: Vec<&CodeNode> = report
.affected_tests
.iter()
.filter(|n| n.kind == NodeKind::Function)
.take(10)
.copied()
.collect();
if !test_nodes_refs.is_empty() {
let test_snippets = self.extract_snippets(&test_nodes_refs, repo_dir, 30);
if !test_snippets.is_empty() {
result.push_str("**Key test code (DO NOT break these):**\n```python\n");
for (node_id, snippet) in test_snippets.iter().take(5) {
let name = self.node_name(node_id);
result.push_str(&format!("# --- {} ---\n{}\n\n", name, snippet));
}
result.push_str("```\n");
}
}
}
result
}
pub fn trace_causal_chains_from_symptoms(
&self,
symptom_node_ids: &[&str],
max_depth: usize,
max_chains: usize,
) -> Vec<CausalChain> {
#[derive(Clone)]
struct WeightedPath {
node_id: String,
accumulated_weight: f32,
chain: Vec<ChainNode>,
}
impl PartialEq for WeightedPath {
fn eq(&self, other: &Self) -> bool {
self.accumulated_weight
.total_cmp(&other.accumulated_weight)
== std::cmp::Ordering::Equal
}
}
impl Eq for WeightedPath {}
impl PartialOrd for WeightedPath {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for WeightedPath {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.accumulated_weight.total_cmp(&other.accumulated_weight)
}
}
let mut all_chains: Vec<CausalChain> = Vec::new();
for symptom_id in symptom_node_ids {
let symptom_node = match self.node_by_id(symptom_id) {
Some(n) => n,
None => continue,
};
{
let mut heap: BinaryHeap<WeightedPath> = BinaryHeap::new();
let mut visited = HashSet::new();
visited.insert(symptom_id.to_string());
let start_chain_node = ChainNode {
node_id: symptom_id.to_string(),
node_name: symptom_node.name.clone(),
file_path: symptom_node.file_path.clone(),
line: symptom_node.line,
edge_to_next: None,
};
heap.push(WeightedPath {
node_id: symptom_id.to_string(),
accumulated_weight: 1.0,
chain: vec![start_chain_node],
});
while let Some(current) = heap.pop() {
if current.chain.len() > max_depth {
continue;
}
for edge in self.outgoing_edges(¤t.node_id) {
let (target_id, edge_label) = match edge.relation {
EdgeRelation::Calls => (&edge.to, "calls"),
EdgeRelation::Inherits => (&edge.to, "inherits"),
EdgeRelation::Imports => (&edge.to, "imports"),
EdgeRelation::Overrides => (&edge.to, "overrides"),
EdgeRelation::TestsFor => (&edge.to, "tests"),
_ => continue,
};
if visited.contains(target_id) {
continue;
}
if let Some(target_node) = self.node_by_id(target_id) {
visited.insert(target_node.id.clone());
let new_weight = current.accumulated_weight * edge.weight;
let mut new_chain = current.chain.clone();
if let Some(last) = new_chain.last_mut() {
last.edge_to_next = Some(edge_label.to_string());
}
new_chain.push(ChainNode {
node_id: target_node.id.clone(),
node_name: target_node.name.clone(),
file_path: target_node.file_path.clone(),
line: target_node.line,
edge_to_next: None,
});
if new_chain.len() >= 2 {
all_chains.push(CausalChain {
symptom_node_id: symptom_id.to_string(),
chain: new_chain.clone(),
});
}
if new_chain.len() < max_depth {
heap.push(WeightedPath {
node_id: target_node.id.clone(),
accumulated_weight: new_weight,
chain: new_chain,
});
}
}
}
}
}
{
let mut heap: BinaryHeap<WeightedPath> = BinaryHeap::new();
let mut visited = HashSet::new();
visited.insert(symptom_id.to_string());
let start_chain_node = ChainNode {
node_id: symptom_id.to_string(),
node_name: symptom_node.name.clone(),
file_path: symptom_node.file_path.clone(),
line: symptom_node.line,
edge_to_next: None,
};
heap.push(WeightedPath {
node_id: symptom_id.to_string(),
accumulated_weight: 1.0,
chain: vec![start_chain_node],
});
while let Some(current) = heap.pop() {
if current.chain.len() > max_depth {
continue;
}
for edge in self.incoming_edges(¤t.node_id) {
if edge.relation != EdgeRelation::Calls
&& edge.relation != EdgeRelation::Imports
&& edge.relation != EdgeRelation::Overrides
{
continue;
}
if visited.contains(&edge.from) {
continue;
}
if let Some(caller) = self.node_by_id(&edge.from) {
if caller.file_path.contains("/tests/")
|| caller.file_path.contains("/test_")
{
continue;
}
visited.insert(caller.id.clone());
let new_weight = current.accumulated_weight * edge.weight;
let edge_label = match edge.relation {
EdgeRelation::Imports => "imported_by",
EdgeRelation::Overrides => "overridden_by",
_ => "called_by",
};
let mut new_chain = current.chain.clone();
if let Some(last) = new_chain.last_mut() {
last.edge_to_next = Some(edge_label.to_string());
}
new_chain.push(ChainNode {
node_id: caller.id.clone(),
node_name: caller.name.clone(),
file_path: caller.file_path.clone(),
line: caller.line,
edge_to_next: None,
});
if new_chain.len() >= 2 {
all_chains.push(CausalChain {
symptom_node_id: symptom_id.to_string(),
chain: new_chain.clone(),
});
}
if new_chain.len() < max_depth {
heap.push(WeightedPath {
node_id: caller.id.clone(),
accumulated_weight: new_weight,
chain: new_chain,
});
}
}
}
}
}
}
all_chains.sort_by(|a, b| {
let len_cmp = a.chain.len().cmp(&b.chain.len());
if len_cmp != std::cmp::Ordering::Equal {
return len_cmp;
}
let a_source = a
.chain
.iter()
.filter(|n| !n.file_path.contains("/tests/") && !n.file_path.contains("/test_"))
.count();
let b_source = b
.chain
.iter()
.filter(|n| !n.file_path.contains("/tests/") && !n.file_path.contains("/test_"))
.count();
b_source.cmp(&a_source)
});
let mut deduped: Vec<CausalChain> = Vec::new();
for chain in &all_chains {
let is_prefix = deduped.iter().any(|existing| {
existing.chain.len() > chain.chain.len()
&& chain
.chain
.iter()
.zip(existing.chain.iter())
.all(|(a, b)| a.node_id == b.node_id)
});
if is_prefix {
continue;
}
deduped.retain(|existing| {
!(existing.chain.len() < chain.chain.len()
&& existing
.chain
.iter()
.zip(chain.chain.iter())
.all(|(a, b)| a.node_id == b.node_id))
});
deduped.push(chain.clone());
}
deduped.truncate(max_chains);
deduped
}
pub fn trace_causal_chains(
&self,
changed_node_ids: &[&str],
failed_p2p_tests: &[String],
failed_f2p_tests: &[String],
) -> String {
if failed_p2p_tests.is_empty() && failed_f2p_tests.is_empty() {
return String::new();
}
let mut result = String::new();
if !failed_p2p_tests.is_empty() {
result.push_str("## 🚨 CAUSAL ANALYSIS — Why Your Fix Broke Existing Tests\n\n");
result.push_str(
"These tests PASSED before your change and now FAIL. You MUST fix these regressions.\n\n",
);
for test_name in failed_p2p_tests {
let short_name = test_name.split("::").last().unwrap_or(test_name);
result.push_str(&format!("### ❌ REGRESSION: `{}`\n", short_name));
let test_node = self.nodes.iter().find(|n| {
n.name == short_name
|| n.name.ends_with(short_name)
|| (n.file_path.contains("/test") && n.name == short_name)
});
if let Some(test) = test_node {
let chains = self.find_paths_to_test(changed_node_ids, &test.id);
if !chains.is_empty() {
result.push_str("**Causal chain(s):**\n");
for chain in chains.iter().take(3) {
let chain_str: Vec<String> = chain
.iter()
.map(|id| {
self.nodes
.iter()
.find(|n| n.id == *id)
.map(|n| format!("`{}` ({})", n.name, n.file_path))
.unwrap_or_else(|| id.to_string())
})
.collect();
result.push_str(&format!(" 🔗 {}\n", chain_str.join(" → ")));
}
result.push_str("\n**What this means:** Your change propagated through the dependency chain above and broke this test.\n");
result.push_str("**How to fix:** Make your change more surgical — ensure the modified function's behavior is backward-compatible for the callers in this chain.\n\n");
} else {
let changed_files: HashSet<String> = changed_node_ids
.iter()
.filter_map(|id| self.node_by_id(id))
.map(|n| n.file_path.clone())
.collect();
if changed_files
.iter()
.any(|f| test.file_path.contains(f.as_str()))
|| self.shares_import(&test.id, changed_node_ids)
{
result.push_str("**Connection:** Indirect — test imports or uses a module you changed.\n");
result.push_str("**How to fix:** Check that your change doesn't alter the public API or default behavior of the module.\n\n");
} else {
result.push_str("**Connection:** Could not trace via graph (may be via dynamic dispatch, monkey-patching, or shared global state).\n");
result.push_str("**How to fix:** Read the test's assertion error carefully — it will tell you what behavior changed.\n\n");
}
}
} else {
result.push_str(
"**Note:** Test not found in code graph. Read the error output to understand what broke.\n\n",
);
}
}
result.push_str("### 🎯 Overall Regression Fix Strategy\n");
result.push_str(
"1. **Don't change your approach** — your bug fix logic is likely correct\n",
);
result.push_str("2. **Narrow the scope** — guard your change with a condition so it only applies to the bug case\n");
result.push_str("3. **Add backward compatibility** — if you changed a return type/value, ensure callers still get what they expect\n");
result.push_str("4. **Check default parameters** — if you changed defaults, existing callers rely on the old defaults\n\n");
}
if !failed_f2p_tests.is_empty() {
result.push_str("## ⚠️ Original Bug Not Fixed\n");
result.push_str("These tests still fail — your fix is incomplete or incorrect:\n");
for test_name in failed_f2p_tests {
let short_name = test_name.split("::").last().unwrap_or(test_name);
result.push_str(&format!("- `{}`\n", short_name));
}
result.push('\n');
}
result
}
fn find_paths_to_test(&self, changed_node_ids: &[&str], test_node_id: &str) -> Vec<Vec<String>> {
let mut paths = Vec::new();
for changed_id in changed_node_ids {
if let Some(path) = self.bfs_path(test_node_id, changed_id, 5) {
let mut p = path;
p.reverse();
paths.push(p);
}
}
paths
}
pub fn bfs_path(&self, from: &str, to: &str, max_depth: usize) -> Option<Vec<String>> {
let mut queue: VecDeque<(String, Vec<String>)> = VecDeque::new();
let mut visited = HashSet::new();
queue.push_back((from.to_string(), vec![from.to_string()]));
visited.insert(from.to_string());
while let Some((current, path)) = queue.pop_front() {
if path.len() > max_depth {
continue;
}
for edge in self.outgoing_edges(¤t) {
if edge.to == to {
let mut final_path = path.clone();
final_path.push(edge.to.clone());
return Some(final_path);
}
if !visited.contains(&edge.to) {
visited.insert(edge.to.clone());
let mut new_path = path.clone();
new_path.push(edge.to.clone());
queue.push_back((edge.to.clone(), new_path));
}
}
}
None
}
pub fn get_node_summary(&self, node_id: &str, repo_dir: &Path) -> String {
let node = match self.node_by_id(node_id) {
Some(n) => n,
None => return format!("[unknown node: {}]", node_id),
};
let mut result = format!(
"{} ({}:{})",
node.name,
node.file_path,
node.line.map(|l| l.to_string()).unwrap_or_else(|| "?".to_string()),
);
let full_path = repo_dir.join(&node.file_path);
if let Ok(content) = std::fs::read_to_string(&full_path) {
let lines: Vec<&str> = content.lines().collect();
if let Some(start_line) = node.line {
if start_line > 0 && start_line <= lines.len() {
let start_idx = start_line - 1;
let end_idx = (start_idx + 15).min(lines.len());
let preview: String = lines[start_idx..end_idx]
.iter()
.map(|l| *l)
.collect::<Vec<_>>()
.join("\n");
result.push('\n');
result.push_str(&preview);
}
}
}
result
}
pub fn extract_snippets(
&self,
nodes: &[&CodeNode],
repo_dir: &Path,
max_lines: usize,
) -> HashMap<String, String> {
let mut snippets = HashMap::new();
let mut file_cache: HashMap<String, Vec<String>> = HashMap::new();
for node in nodes {
if node.kind == NodeKind::File {
continue;
}
let file_path = repo_dir.join(&node.file_path);
let lines = file_cache.entry(node.file_path.clone()).or_insert_with(|| {
std::fs::read_to_string(&file_path)
.unwrap_or_default()
.lines()
.map(|l| l.to_string())
.collect()
});
if let Some(start_line) = node.line {
if start_line == 0 || start_line > lines.len() {
continue;
}
let start_idx = start_line - 1;
let base_indent = lines[start_idx]
.chars()
.take_while(|c| c.is_whitespace())
.count();
let mut end_idx = start_idx + 1;
while end_idx < lines.len() && end_idx < start_idx + max_lines {
let line = &lines[end_idx];
if line.trim().is_empty() {
end_idx += 1;
continue;
}
let indent = line.chars().take_while(|c| c.is_whitespace()).count();
if indent <= base_indent && !line.trim().is_empty() {
break;
}
end_idx += 1;
}
let snippet: String = lines[start_idx..end_idx.min(lines.len())]
.iter()
.map(|l| l.as_str())
.collect::<Vec<_>>()
.join("\n");
if !snippet.trim().is_empty() {
snippets.insert(node.id.clone(), snippet);
}
}
}
snippets
}
pub fn format_for_llm(&self, keywords: &[&str], max_chars: usize) -> String {
let relevant = self.find_relevant_nodes(keywords);
if relevant.is_empty() {
return self.format_file_summary(max_chars);
}
let mut result = String::from("**Code structure (relevant to issue):**\n");
result.push_str("\nRelevant files/classes/functions:\n");
let relevant_ids: HashSet<&str> = relevant.iter().map(|n| n.id.as_str()).collect();
for node in relevant.iter().take(20) {
let prefix = match node.kind {
NodeKind::File => "📄",
NodeKind::Class => "🔷",
NodeKind::Function => "🔹",
NodeKind::Module => "📦",
};
let line_info = node.line.map(|l| format!(" (line {})", l)).unwrap_or_default();
result.push_str(&format!(
"{} {} — `{}`{}\n",
prefix, node.name, node.file_path, line_info
));
if result.len() > max_chars / 2 {
break;
}
}
let relevant_edges: Vec<&CodeEdge> = self
.edges
.iter()
.filter(|e| {
relevant_ids.contains(e.from.as_str()) || relevant_ids.contains(e.to.as_str())
})
.filter(|e| e.relation != EdgeRelation::DefinedIn)
.collect();
if !relevant_edges.is_empty() {
result.push_str("\nRelationships:\n");
for edge in relevant_edges.iter().take(15) {
let from_name = self.node_name(&edge.from);
let to_name = self.node_name(&edge.to);
result.push_str(&format!(
" {} --[{}]--> {}\n",
from_name, edge.relation, to_name
));
if result.len() > max_chars {
break;
}
}
}
let relevant_classes: Vec<&&CodeNode> = relevant
.iter()
.filter(|n| n.kind == NodeKind::Class)
.collect();
if !relevant_classes.is_empty() {
result.push_str("\nInheritance:\n");
for cls in relevant_classes.iter().take(5) {
let chain = self.get_inheritance_chain(&cls.id);
if chain.len() > 1 {
let names: Vec<String> =
chain.iter().map(|id| self.node_name(id)).collect();
result.push_str(&format!(" {} \n", names.join(" → ")));
}
}
}
let file_count = self.nodes.iter().filter(|n| n.kind == NodeKind::File).count();
let class_count = self.nodes.iter().filter(|n| n.kind == NodeKind::Class).count();
let import_count = self
.edges
.iter()
.filter(|e| e.relation == EdgeRelation::Imports)
.count();
let inherit_count = self
.edges
.iter()
.filter(|e| e.relation == EdgeRelation::Inherits)
.count();
result.push_str(&format!(
"\nGraph: {} files, {} classes, {} imports, {} inheritance edges\n",
file_count, class_count, import_count, inherit_count
));
if result.len() > max_chars {
result.truncate(max_chars);
result.push_str("\n...[truncated]\n");
}
result
}
fn format_file_summary(&self, max_chars: usize) -> String {
let mut result = String::from("**Repository files:**\n");
let files: Vec<&CodeNode> = self
.nodes
.iter()
.filter(|n| n.kind == NodeKind::File)
.collect();
for file in &files {
let classes: Vec<String> = self
.nodes
.iter()
.filter(|n| n.kind == NodeKind::Class && n.file_path == file.file_path)
.map(|n| n.name.clone())
.collect();
let mut line = format!("- `{}`", file.file_path);
if !classes.is_empty() {
line.push_str(&format!(" — {}", classes.join(", ")));
}
line.push('\n');
if result.len() + line.len() > max_chars {
result.push_str(&format!("... and {} more files\n", files.len()));
break;
}
result.push_str(&line);
}
result
}
fn node_name(&self, id: &str) -> String {
self.nodes
.iter()
.find(|n| n.id == id)
.map(|n| n.name.clone())
.unwrap_or_else(|| id.to_string())
}
fn get_inheritance_chain(&self, class_id: &str) -> Vec<String> {
let mut chain = vec![class_id.to_string()];
let mut current = class_id.to_string();
for _ in 0..10 {
let parent = self
.edges
.iter()
.find(|e| e.from == current && e.relation == EdgeRelation::Inherits);
match parent {
Some(edge) => {
chain.push(edge.to.clone());
current = edge.to.clone();
}
None => break,
}
}
chain
}
fn shares_import(&self, test_node_id: &str, changed_node_ids: &[&str]) -> bool {
let test_imports: HashSet<String> = self
.edges
.iter()
.filter(|e| e.from == test_node_id && e.relation == EdgeRelation::Imports)
.map(|e| e.to.clone())
.collect();
let changed_files: HashSet<String> = changed_node_ids
.iter()
.filter_map(|id| self.node_by_id(id))
.flat_map(|n| {
let file_id = format!("file:{}", n.file_path);
vec![n.id.clone(), file_id]
})
.collect();
test_imports.intersection(&changed_files).next().is_some()
}
pub fn grep_for_identifiers(&self, repo_dir: &Path, identifiers: &[&str]) -> Vec<CodeNode> {
let mut found_nodes = Vec::new();
let existing_names: HashSet<String> = self.nodes.iter().map(|n| n.name.clone()).collect();
for ident in identifiers {
if existing_names.contains(*ident) {
continue;
}
let patterns = [
format!("class {}[:(]", ident),
format!("def {}[(]", ident),
format!("class {}\\b", ident),
];
for pattern in &patterns {
if let Ok(output) = std::process::Command::new("grep")
.args(["-rn", pattern, "--include=*.py", "-l"])
.current_dir(repo_dir)
.output()
{
let stdout = String::from_utf8_lossy(&output.stdout);
for file_path in stdout.lines().take(3) {
let file_path = file_path.trim();
if file_path.is_empty()
|| file_path.contains("/tests/")
|| file_path.contains("/test_")
{
continue;
}
if let Ok(line_output) = std::process::Command::new("grep")
.args(["-n", pattern, file_path])
.current_dir(repo_dir)
.output()
{
let line_stdout = String::from_utf8_lossy(&line_output.stdout);
if let Some(first_line) = line_stdout.lines().next() {
let line_num: usize = first_line
.split(':')
.next()
.unwrap_or("0")
.parse()
.unwrap_or(0);
let is_class = first_line.contains("class ");
found_nodes.push(CodeNode {
id: format!("grep:{}:{}", file_path, ident),
kind: if is_class {
NodeKind::Class
} else {
NodeKind::Function
},
name: ident.to_string(),
file_path: file_path.to_string(),
line: if line_num > 0 { Some(line_num) } else { None },
decorators: Vec::new(),
signature: None,
docstring: None,
line_count: 0,
is_test: false,
});
break;
}
}
}
}
if found_nodes.iter().any(|n| n.name == *ident) {
break;
}
}
}
found_nodes
}
pub fn extract_keywords(problem_statement: &str) -> Vec<&str> {
let mut keywords = Vec::new();
for word in
problem_statement.split(|c: char| !c.is_alphanumeric() && c != '_' && c != '.')
{
let trimmed = word.trim();
if trimmed.len() < 3 {
continue;
}
let lower = trimmed.to_lowercase();
if [
"the", "and", "for", "that", "this", "with", "from", "not", "but", "are", "was",
"has", "have", "can", "should", "would", "when", "what", "how", "does", "bug",
"fix", "issue", "error", "problem", "description",
]
.contains(&lower.as_str())
{
continue;
}
if trimmed.contains('_')
|| trimmed.contains('.')
|| trimmed.chars().any(|c| c.is_uppercase())
|| trimmed.ends_with(".py")
{
keywords.push(trimmed);
}
}
keywords.dedup();
keywords.truncate(20);
keywords
}
pub fn has_node(&self, file_path: &str, name: &str) -> bool {
let needle = file_path.strip_prefix("./").unwrap_or(file_path);
self.nodes.iter().any(|n| {
let hay = n.file_path.strip_prefix("./").unwrap_or(&n.file_path);
hay == needle && n.name == name
})
}
pub fn find_node(&self, file_path: &str, name: &str) -> Option<&CodeNode> {
let needle = file_path.strip_prefix("./").unwrap_or(file_path);
self.nodes.iter().find(|n| {
let hay = n.file_path.strip_prefix("./").unwrap_or(&n.file_path);
hay == needle && n.name == name
})
}
pub fn add_file_nodes(
&mut self,
repo_dir: &Path,
file_path: &Path,
target_names: Option<&[String]>,
) -> anyhow::Result<()> {
use anyhow::Context;
let full_path = repo_dir.join(file_path);
if !full_path.exists() {
anyhow::bail!("File not found: {:?}", full_path);
}
let source = std::fs::read_to_string(&full_path)
.context(format!("Failed to read {:?}", full_path))?;
let mut parser = Parser::new();
let language = tree_sitter_python::LANGUAGE;
parser
.set_language(&language.into())
.context("Failed to set Python language")?;
let tree = parser
.parse(&source, None)
.context("Failed to parse Python file")?;
let file_path_str = file_path.to_string_lossy().to_string();
let root = tree.root_node();
fn extract_from_node(
node: tree_sitter::Node,
source: &str,
file_path: &str,
nodes: &mut Vec<CodeNode>,
target_names: Option<&[String]>,
) {
if node.kind() == "function_definition" {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let matched =
target_names.map_or(true, |targets| targets.iter().any(|t| t == name));
if matched {
let line = name_node.start_position().row + 1;
let id = format!("func:{}:{}", file_path, name);
nodes.push(CodeNode {
id,
kind: NodeKind::Function,
name: name.to_string(),
file_path: file_path.to_string(),
line: Some(line),
decorators: vec![],
signature: None,
docstring: None,
line_count: 0,
is_test: false,
});
}
}
} else if node.kind() == "class_definition" {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let matched =
target_names.map_or(true, |targets| targets.iter().any(|t| t == name));
if matched {
let line = name_node.start_position().row + 1;
let id = format!("class:{}:{}", file_path, name);
nodes.push(CodeNode {
id,
kind: NodeKind::Class,
name: name.to_string(),
file_path: file_path.to_string(),
line: Some(line),
decorators: vec![],
signature: None,
docstring: None,
line_count: 0,
is_test: false,
});
}
}
}
for child in node.children(&mut node.walk()) {
extract_from_node(child, source, file_path, nodes, target_names);
}
}
extract_from_node(root, &source, &file_path_str, &mut self.nodes, target_names);
self.build_indexes();
Ok(())
}
pub fn get_schema(&self) -> String {
let node_kinds: HashSet<&str> = self.nodes.iter().map(|n| match n.kind {
NodeKind::File => "File",
NodeKind::Class => "Class",
NodeKind::Function => "Function",
NodeKind::Module => "Module",
}).collect();
let edge_relations: HashSet<&str> = self.edges.iter().map(|e| match e.relation {
EdgeRelation::Imports => "imports",
EdgeRelation::Inherits => "inherits",
EdgeRelation::DefinedIn => "defined_in",
EdgeRelation::Calls => "calls",
EdgeRelation::TestsFor => "tests_for",
EdgeRelation::Overrides => "overrides",
}).collect();
format!(
"Schema:\n Node kinds: {:?}\n Edge relations: {:?}\n Total nodes: {}\n Total edges: {}",
node_kinds,
edge_relations,
self.nodes.len(),
self.edges.len()
)
}
pub fn get_file_summary(&self, file_path: &str) -> String {
let file_nodes: Vec<&CodeNode> = self.nodes.iter()
.filter(|n| n.file_path == file_path)
.collect();
if file_nodes.is_empty() {
return format!("No nodes found for file: {}", file_path);
}
let classes: Vec<&str> = file_nodes.iter()
.filter(|n| n.kind == NodeKind::Class)
.map(|n| n.name.as_str())
.collect();
let functions: Vec<&str> = file_nodes.iter()
.filter(|n| n.kind == NodeKind::Function)
.map(|n| n.name.as_str())
.collect();
format!(
"File: {}\n Classes ({}): {}\n Functions ({}): {}",
file_path,
classes.len(),
classes.join(", "),
functions.len(),
functions.join(", ")
)
}
pub fn analyze_test_failures(
&self,
changed_node_ids: &[&str],
failed_test_names: &[String],
_repo_dir: &Path,
) -> String {
let mut analysis = String::new();
analysis.push_str("## 🔍 Graph-based Failure Analysis\n\n");
let changed_names: Vec<String> = changed_node_ids.iter()
.filter_map(|id| self.node_by_id(id))
.map(|n| n.name.clone())
.collect();
let changed_files: HashSet<String> = changed_node_ids.iter()
.filter_map(|id| self.node_by_id(id))
.map(|n| n.file_path.clone())
.collect();
for test_name in failed_test_names {
let short_name = test_name.split("::").last().unwrap_or(test_name);
let test_node = self.nodes.iter().find(|n| {
n.name == short_name
|| n.name.ends_with(short_name)
|| (n.file_path.contains("/test") && n.name == short_name)
});
analysis.push_str(&format!("### ❌ {}\n", short_name));
if let Some(test) = test_node {
let callees = self.get_callees(&test.id);
let mut found_connection = false;
for callee in &callees {
if changed_node_ids.contains(&callee.id.as_str())
|| changed_names.contains(&callee.name)
{
analysis.push_str(&format!(
"**Direct call chain:** `{}` → `{}` (YOU CHANGED THIS)\n",
short_name, callee.name
));
found_connection = true;
let other_callers = self.get_callers(&callee.id);
let other_caller_names: Vec<&str> = other_callers.iter()
.filter(|c| c.id != test.id)
.map(|c| c.name.as_str())
.take(5)
.collect();
if !other_caller_names.is_empty() {
analysis.push_str(&format!(
"**Other callers of `{}`:** {}\n",
callee.name,
other_caller_names.join(", ")
));
}
}
}
if !found_connection {
for callee in &callees {
let sub_callees = self.get_callees(&callee.id);
for sub in &sub_callees {
if changed_node_ids.contains(&sub.id.as_str())
|| changed_names.contains(&sub.name)
{
analysis.push_str(&format!(
"**Indirect chain:** `{}` → `{}` → `{}` (YOU CHANGED THIS)\n",
short_name, callee.name, sub.name
));
found_connection = true;
break;
}
}
if found_connection { break; }
}
}
if !found_connection {
let test_file = &test.file_path;
let test_file_id = format!("file:{}", test_file);
for edge in self.outgoing_edges(&test_file_id) {
if edge.relation == EdgeRelation::TestsFor {
if let Some(target) = self.node_by_id(&edge.to) {
if changed_files.contains(&target.file_path) {
analysis.push_str(&format!(
"**File-level connection:** test file `{}` tests `{}` which you modified\n",
test_file, target.file_path
));
found_connection = true;
break;
}
}
}
}
}
if !found_connection {
analysis.push_str("**Connection:** Could not trace via graph (may be indirect import)\n");
}
} else {
analysis.push_str("**Note:** Test not found in code graph\n");
}
analysis.push('\n');
}
if !changed_names.is_empty() {
analysis.push_str("### Summary\n");
analysis.push_str(&format!("**You changed:** {}\n", changed_names.join(", ")));
let total_callers: usize = changed_node_ids.iter()
.map(|id| self.get_callers(id).len())
.sum();
analysis.push_str(&format!(
"**Total callers of changed code:** {}\n",
total_callers
));
analysis.push_str("**Repair strategy:** Keep the fix but make it backward-compatible with all callers.\n");
}
analysis
}
pub fn find_symptom_nodes(&self, problem_statement: &str, test_names: &str) -> Vec<&CodeNode> {
let mut result: Vec<&CodeNode> = Vec::new();
let mut seen = HashSet::new();
let test_list: Vec<String> = serde_json::from_str(test_names)
.unwrap_or_else(|_| {
test_names.lines()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect()
});
for test_id in &test_list {
let short_name = if test_id.contains("::") {
test_id.split("::").last().unwrap_or(test_id)
} else if test_id.contains(" (") {
test_id.split(" (").next().unwrap_or(test_id).trim()
} else {
test_id.as_str()
};
for node in &self.nodes {
if node.kind == NodeKind::Function
&& (node.name == short_name || node.name.ends_with(short_name))
&& (node.file_path.contains("/tests/")
|| node.file_path.contains("/test_")
|| node.name.starts_with("test_"))
{
if seen.insert(node.id.clone()) {
result.push(node);
}
}
}
}
for line in problem_statement.lines() {
let trimmed = line.trim();
if trimmed.contains(", in ") {
if let Some(func_part) = trimmed.rsplit(", in ").next() {
let func_name = func_part.trim().trim_start_matches('<').trim_end_matches('>');
if func_name.len() >= 3 && func_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
for node in &self.nodes {
if node.name == func_name && node.kind == NodeKind::Function {
if seen.insert(node.id.clone()) {
result.push(node);
}
}
}
}
}
}
for quote in &['\'', '"', '`'] {
let parts: Vec<&str> = trimmed.split(*quote).collect();
for i in (1..parts.len()).step_by(2) {
let word = parts[i].trim();
if word.len() >= 3
&& word.len() <= 60
&& word.chars().all(|c| c.is_alphanumeric() || c == '_')
{
for node in &self.nodes {
if node.name == word && (node.kind == NodeKind::Function || node.kind == NodeKind::Class) {
if seen.insert(node.id.clone()) {
result.push(node);
}
}
}
}
}
}
}
for word in problem_statement.split(|c: char| c.is_whitespace() || c == ',' || c == '(' || c == ')' || c == '\'' || c == '"' || c == '`') {
let word = word.trim_matches(|c: char| c == '.' || c == ':' || c == ';');
if word.len() < 4 { continue; }
let has_upper = word.chars().filter(|c| c.is_uppercase()).count() >= 2;
let has_lower = word.chars().any(|c| c.is_lowercase());
let is_ident = word.chars().all(|c| c.is_alphanumeric() || c == '_');
if has_upper && has_lower && is_ident {
for node in &self.nodes {
if node.name == word && node.kind == NodeKind::Class {
if seen.insert(node.id.clone()) {
result.push(node);
}
}
}
}
}
if result.is_empty() {
for test_id in &test_list {
let short_name = if test_id.contains("::") {
test_id.split("::").last().unwrap_or(test_id)
} else if test_id.contains(" (") {
test_id.split(" (").next().unwrap_or(test_id).trim()
} else {
test_id.as_str()
};
let kws: Vec<&str> = short_name.split('_')
.filter(|w| w.len() >= 3 && *w != "test" && *w != "tests")
.collect();
if kws.is_empty() { continue; }
for node in &self.nodes {
if node.file_path.contains("/tests/") || node.file_path.contains("/test_") {
continue;
}
let name_lower = node.name.to_lowercase();
let match_count = kws.iter()
.filter(|kw| name_lower.contains(&kw.to_lowercase()))
.count();
if match_count >= 2 || (match_count >= 1 && kws.len() == 1) {
if seen.insert(node.id.clone()) {
result.push(node);
}
}
}
if test_id.contains(" (") {
let class_part = test_id
.split(" (")
.nth(1)
.unwrap_or("")
.trim_end_matches(')');
let class_name = class_part.rsplit('.').next().unwrap_or("");
if !class_name.is_empty() {
for node in &self.nodes {
if node.kind == NodeKind::Class && node.name == class_name {
let file_id = format!("file:{}", node.file_path);
for edge in self.outgoing_edges(&file_id) {
if edge.relation == EdgeRelation::TestsFor {
if let Some(target) = self.node_by_id(&edge.to) {
if target.kind != NodeKind::File {
if seen.insert(target.id.clone()) {
result.push(target);
}
}
}
for src_node in &self.nodes {
if format!("file:{}", src_node.file_path) == edge.to
&& src_node.kind != NodeKind::File
{
if seen.insert(src_node.id.clone()) {
result.push(src_node);
}
}
}
}
}
}
}
}
}
}
}
result
}
pub fn build_unified_graph(
&self,
relevant_nodes: &[&CodeNode],
snippets: &HashMap<String, String>,
issue_id: &str,
issue_description: &str,
) -> UnifiedGraphResult {
let relevant_ids: HashSet<&str> = relevant_nodes.iter()
.map(|n| n.id.as_str())
.collect();
let mut nodes: Vec<UnifiedNode> = Vec::new();
for code_node in relevant_nodes {
let node_id = code_node.name.replace(|c: char| !c.is_alphanumeric() && c != '_', "_");
let (node_type, layer) = match code_node.kind {
NodeKind::File => ("File".to_string(), "infrastructure"),
NodeKind::Class => ("Component".to_string(), "domain"),
NodeKind::Function | NodeKind::Module => ("Component".to_string(), "application"),
};
let snippet = snippets.get(&code_node.id).cloned();
nodes.push(UnifiedNode {
id: node_id,
node_type,
layer: layer.to_string(),
description: format!("{} in {}", code_node.name, code_node.file_path),
path: Some(code_node.file_path.clone()),
line: code_node.line,
code: snippet,
});
}
let mut edges: Vec<UnifiedEdge> = Vec::new();
let mut seen_keys: HashSet<(String, String, String)> = HashSet::new();
for rel_id in &relevant_ids {
for edge in self.outgoing_edges(rel_id) {
if let (Some(from), Some(to)) = (self.node_by_id(&edge.from), self.node_by_id(&edge.to)) {
let from_id = from.name.replace(|c: char| !c.is_alphanumeric() && c != '_', "_");
let to_id = to.name.replace(|c: char| !c.is_alphanumeric() && c != '_', "_");
let rel = edge.relation.to_string();
let key = (from_id.clone(), to_id.clone(), rel.clone());
if nodes.iter().any(|n| n.id == from_id)
&& nodes.iter().any(|n| n.id == to_id)
&& seen_keys.insert(key)
{
edges.push(UnifiedEdge {
from: from_id,
to: to_id,
relation: rel,
});
}
}
}
}
let description = if issue_description.len() > 100 {
let mut end = 100;
while end > 0 && !issue_description.is_char_boundary(end) { end -= 1; }
format!("{}...", &issue_description[..end])
} else {
issue_description.to_string()
};
UnifiedGraphResult {
issue_id: issue_id.to_string(),
description,
nodes,
edges,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UnifiedGraphResult {
pub issue_id: String,
pub description: String,
pub nodes: Vec<UnifiedNode>,
pub edges: Vec<UnifiedEdge>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UnifiedNode {
pub id: String,
pub node_type: String,
pub layer: String,
pub description: String,
pub path: Option<String>,
pub line: Option<usize>,
pub code: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UnifiedEdge {
pub from: String,
pub to: String,
pub relation: String,
}
fn collect_decorators(node: tree_sitter::Node, source: &[u8]) -> Vec<String> {
let mut decorators = Vec::new();
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "decorator" {
let dec_text = child.utf8_text(source).unwrap_or("").trim().to_string();
let name = dec_text.trim_start_matches('@');
let name = name.split('(').next().unwrap_or(name).trim();
if !name.is_empty() {
decorators.push(name.to_string());
}
}
}
decorators
}
fn extract_docstring(node: tree_sitter::Node, source: &str) -> Option<String> {
let body = node.child_by_field_name("body")?;
let mut cursor = body.walk();
for child in body.children(&mut cursor) {
if child.kind() == "comment" {
continue;
}
if child.kind() == "expression_statement" {
if let Some(str_node) = child.child(0) {
if str_node.kind() == "string" || str_node.kind() == "concatenated_string" {
if str_node.start_byte() < source.len() && str_node.end_byte() <= source.len() {
let doc_text = &source[str_node.start_byte()..str_node.end_byte()];
let doc_clean = doc_text
.trim_start_matches("\"\"\"")
.trim_end_matches("\"\"\"")
.trim_start_matches("'''")
.trim_end_matches("'''")
.trim_start_matches('"')
.trim_end_matches('"')
.trim_start_matches('\'')
.trim_end_matches('\'')
.trim();
let first_line = doc_clean.lines().find(|l| !l.trim().is_empty()).unwrap_or("");
if first_line.is_empty() {
return None;
}
let truncated = if first_line.len() > 100 {
let mut end = 100;
while end > 0 && !first_line.is_char_boundary(end) {
end -= 1;
}
&first_line[..end]
} else {
first_line
};
return Some(truncated.to_string());
}
}
}
}
break;
}
None
}
fn is_in_error_path(node: &tree_sitter::Node, source: &[u8]) -> bool {
let source_str = std::str::from_utf8(source).unwrap_or("");
let mut current = node.parent();
let mut levels = 0;
while let Some(parent) = current {
levels += 1;
if levels > 10 {
break;
}
match parent.kind() {
"except_clause" | "raise_statement" => return true,
"try_statement" => return true,
"if_statement" => {
if let Some(cond) = parent.child_by_field_name("condition") {
if cond.start_byte() < source_str.len() && cond.end_byte() <= source_str.len() {
let cond_text = &source_str[cond.start_byte()..cond.end_byte()];
let lower = cond_text.to_lowercase();
if lower.contains("error")
|| lower.contains("exception")
|| lower.contains("err")
|| lower.contains("fail")
|| lower.contains("none")
{
return true;
}
}
}
}
_ => {}
}
current = parent.parent();
}
false
}
fn extract_python_tree_sitter(
path: &str,
content: &str,
parser: &mut Parser,
class_id_map: &mut HashMap<String, String>,
) -> (Vec<CodeNode>, Vec<CodeEdge>, HashSet<String>) {
let mut nodes = Vec::new();
let mut edges = Vec::new();
let mut imports = HashSet::new();
let tree = match parser.parse(content, None) {
Some(t) => t,
None => return (nodes, edges, imports),
};
let file_id = format!("file:{}", path);
let source = content.as_bytes();
let root = tree.root_node();
let text = |node: tree_sitter::Node| -> String {
node.utf8_text(source).unwrap_or("").to_string()
};
let mut cursor = root.walk();
for child in root.children(&mut cursor) {
match child.kind() {
"class_definition" => {
extract_class_node(
child,
source,
content,
path,
&file_id,
&[],
&mut nodes,
&mut edges,
class_id_map,
);
}
"function_definition" => {
extract_function_node(child, source, content, path, &file_id, &[], &mut nodes, &mut edges);
}
"decorated_definition" => {
let decorators = collect_decorators(child, source);
let mut inner_cursor = child.walk();
for inner in child.children(&mut inner_cursor) {
match inner.kind() {
"class_definition" => {
extract_class_node(
inner,
source,
content,
path,
&file_id,
&decorators,
&mut nodes,
&mut edges,
class_id_map,
);
}
"function_definition" => {
extract_function_node(
inner, source, content, path, &file_id, &decorators, &mut nodes, &mut edges,
);
}
_ => {}
}
}
}
"import_statement" => {
let import_text = text(child);
let re_import = Regex::new(r"import\s+([\w.]+)").unwrap();
if let Some(cap) = re_import.captures(&import_text) {
let module = cap[1].to_string();
if !is_stdlib(&module) {
edges.push(CodeEdge {
from: file_id.clone(),
to: format!("module_ref:{}", module),
relation: EdgeRelation::Imports,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 1.0,
});
}
}
}
"import_from_statement" => {
let mut mod_cursor = child.walk();
for mod_child in child.children(&mut mod_cursor) {
if mod_child.kind() == "dotted_name" {
let module = text(mod_child);
if !is_stdlib(&module) {
edges.push(CodeEdge {
from: file_id.clone(),
to: format!("module_ref:{}", module),
relation: EdgeRelation::Imports,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 1.0,
});
}
break;
}
if mod_child.kind() == "relative_import" {
let rel_import_text = text(mod_child);
let trimmed = rel_import_text.trim_start_matches('.');
if !trimmed.is_empty() && !is_stdlib(trimmed) {
edges.push(CodeEdge {
from: file_id.clone(),
to: format!("module_ref:{}", trimmed),
relation: EdgeRelation::Imports,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 1.0,
});
}
break;
}
}
let import_text = child.utf8_text(source).unwrap_or("");
if let Some(after_import) = import_text.split(" import ").nth(1) {
for name in after_import.split(',') {
let clean = name.trim().split(" as ").next().unwrap_or("").trim();
if !clean.is_empty() && clean != "*" && clean != "(" && clean != ")" {
imports.insert(clean.to_string());
}
}
}
}
_ => {}
}
}
(nodes, edges, imports)
}
fn extract_class_node(
node: tree_sitter::Node,
source: &[u8],
source_str: &str,
path: &str,
file_id: &str,
decorators: &[String],
nodes: &mut Vec<CodeNode>,
edges: &mut Vec<CodeEdge>,
class_id_map: &mut HashMap<String, String>,
) {
let class_name = node
.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if class_name.is_empty() {
return;
}
let line_num = node.start_position().row + 1;
let class_id = format!("class:{}:{}", path, class_name);
let class_sig = {
let sig_text = &source_str[node.start_byte()..];
let sig_end = sig_text
.find(":\n")
.or_else(|| sig_text.find(":\r"))
.unwrap_or(sig_text.len().min(200));
Some(sig_text[..sig_end].trim().to_string())
};
let class_docstring = extract_docstring(node, source_str);
let class_line_count = node.end_position().row - node.start_position().row + 1;
let class_is_test =
path.contains("/tests/") || path.contains("/test_") || class_name.starts_with("Test");
nodes.push(CodeNode {
id: class_id.clone(),
kind: NodeKind::Class,
name: class_name.clone(),
file_path: path.to_string(),
line: Some(line_num),
decorators: decorators.to_vec(),
signature: class_sig,
docstring: class_docstring,
line_count: class_line_count,
is_test: class_is_test,
});
edges.push(CodeEdge {
from: class_id.clone(),
to: file_id.to_string(),
relation: EdgeRelation::DefinedIn,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 1.0,
});
class_id_map.insert(class_name.clone(), class_id.clone());
if let Some(superclasses) = node.child_by_field_name("superclasses") {
let mut sc_cursor = superclasses.walk();
for sc_child in superclasses.children(&mut sc_cursor) {
let kind = sc_child.kind();
if kind == "identifier" || kind == "attribute" {
let parent_text = sc_child.utf8_text(source).unwrap_or("");
let parent_name = parent_text.split('.').last().unwrap_or("").trim();
if !parent_name.is_empty() && parent_name != "object" {
edges.push(CodeEdge {
from: class_id.clone(),
to: format!("class_ref:{}", parent_name),
relation: EdgeRelation::Inherits,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 1.0,
});
}
}
}
}
if let Some(body) = node.child_by_field_name("body") {
let mut body_cursor = body.walk();
for body_child in body.children(&mut body_cursor) {
match body_child.kind() {
"function_definition" => {
extract_method_node(body_child, source, source_str, path, &class_id, &[], nodes, edges);
}
"decorated_definition" => {
let method_decorators = collect_decorators(body_child, source);
let mut inner_cursor = body_child.walk();
for inner in body_child.children(&mut inner_cursor) {
if inner.kind() == "function_definition" {
extract_method_node(
inner,
source,
source_str,
path,
&class_id,
&method_decorators,
nodes,
edges,
);
}
}
}
_ => {}
}
}
}
}
fn extract_method_node(
node: tree_sitter::Node,
source: &[u8],
source_str: &str,
path: &str,
class_id: &str,
decorators: &[String],
nodes: &mut Vec<CodeNode>,
edges: &mut Vec<CodeEdge>,
) {
let func_name = node
.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if func_name.is_empty() {
return;
}
let line_num = node.start_position().row + 1;
let method_id = format!("method:{}:{}", path, func_name);
let signature = {
let sig_text = &source_str[node.start_byte()..];
let sig_end = sig_text
.find(":\n")
.or_else(|| sig_text.find(":\r"))
.unwrap_or(sig_text.len().min(200));
Some(sig_text[..sig_end].trim().to_string())
};
let docstring = extract_docstring(node, source_str);
let line_count = node.end_position().row - node.start_position().row + 1;
let is_test = path.contains("/tests/")
|| path.contains("/test_")
|| func_name.starts_with("test_")
|| func_name.starts_with("Test");
nodes.push(CodeNode {
id: method_id.clone(),
kind: NodeKind::Function,
name: func_name,
file_path: path.to_string(),
line: Some(line_num),
decorators: decorators.to_vec(),
signature,
docstring,
line_count,
is_test,
});
edges.push(CodeEdge {
from: method_id,
to: class_id.to_string(),
relation: EdgeRelation::DefinedIn,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 1.0,
});
}
fn extract_function_node(
node: tree_sitter::Node,
source: &[u8],
source_str: &str,
path: &str,
file_id: &str,
decorators: &[String],
nodes: &mut Vec<CodeNode>,
edges: &mut Vec<CodeEdge>,
) {
let func_name = node
.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if func_name.is_empty() {
return;
}
let line_num = node.start_position().row + 1;
let func_id = format!("func:{}:{}", path, func_name);
let signature = {
let sig_text = &source_str[node.start_byte()..];
let sig_end = sig_text
.find(":\n")
.or_else(|| sig_text.find(":\r"))
.unwrap_or(sig_text.len().min(200));
Some(sig_text[..sig_end].trim().to_string())
};
let docstring = extract_docstring(node, source_str);
let line_count = node.end_position().row - node.start_position().row + 1;
let is_test = path.contains("/tests/")
|| path.contains("/test_")
|| func_name.starts_with("test_")
|| func_name.starts_with("Test");
nodes.push(CodeNode {
id: func_id.clone(),
kind: NodeKind::Function,
name: func_name,
file_path: path.to_string(),
line: Some(line_num),
decorators: decorators.to_vec(),
signature,
docstring,
line_count,
is_test,
});
edges.push(CodeEdge {
from: func_id,
to: file_id.to_string(),
relation: EdgeRelation::DefinedIn,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 1.0,
});
}
fn extract_calls_from_tree(
root: tree_sitter::Node,
source: &[u8],
rel_path: &str,
func_name_map: &HashMap<String, Vec<String>>,
method_to_class: &HashMap<String, String>,
class_parents: &HashMap<String, Vec<String>>,
file_func_ids: &HashSet<String>,
file_imported_names: &HashMap<String, HashSet<String>>,
package_dir: &str,
class_init_map: &HashMap<String, Vec<(String, String)>>,
node_pkg_map: &HashMap<String, String>,
edges: &mut Vec<CodeEdge>,
) {
let mut scope_map: Vec<(usize, usize, String, Option<String>)> = Vec::new();
build_scope_map(root, source, rel_path, &mut scope_map);
let mut stack = vec![root];
while let Some(node) = stack.pop() {
if node.kind() == "string"
|| node.kind() == "comment"
|| node.kind() == "string_content"
|| node.kind() == "concatenated_string"
{
continue;
}
if node.kind() == "call" {
let call_line = node.start_position().row + 1;
let error_path = is_in_error_path(&node, source);
let scope = scope_map
.iter()
.filter(|(start, end, _, _)| call_line >= *start && call_line <= *end)
.max_by_key(|(start, _, _, _)| *start);
if let Some((_start, _end, caller_id, caller_class)) = scope {
if let Some(function_node) = node.child_by_field_name("function") {
let edges_before = edges.len();
match function_node.kind() {
"identifier" => {
let callee_name = function_node.utf8_text(source).unwrap_or("");
if !callee_name.is_empty() && !is_python_builtin(callee_name) {
resolve_and_add_call_edge(
caller_id,
callee_name,
func_name_map,
file_func_ids,
file_imported_names,
rel_path,
package_dir,
class_init_map,
node_pkg_map,
false,
edges,
);
}
}
"attribute" => {
let obj_node = function_node.child_by_field_name("object");
let attr_node = function_node.child_by_field_name("attribute");
if let (Some(obj), Some(attr)) = (obj_node, attr_node) {
let obj_text = obj.utf8_text(source).unwrap_or("");
let method_name = attr.utf8_text(source).unwrap_or("");
if (obj_text == "self" || obj_text == "cls") && !method_name.is_empty() {
resolve_self_method_call(
caller_id,
method_name,
caller_class.as_deref(),
func_name_map,
method_to_class,
class_parents,
file_func_ids,
edges,
);
} else if !method_name.is_empty() && !is_python_builtin(method_name) {
resolve_and_add_call_edge(
caller_id,
method_name,
func_name_map,
file_func_ids,
file_imported_names,
rel_path,
package_dir,
class_init_map,
node_pkg_map,
true,
edges,
);
}
}
}
_ => {}
}
if error_path {
for edge in edges[edges_before..].iter_mut() {
edge.in_error_path = true;
}
}
}
}
}
let child_count = node.child_count();
for i in (0..child_count).rev() {
if let Some(child) = node.child(i) {
stack.push(child);
}
}
}
}
fn build_scope_map(
node: tree_sitter::Node,
source: &[u8],
rel_path: &str,
scope_map: &mut Vec<(usize, usize, String, Option<String>)>,
) {
let mut stack: Vec<(tree_sitter::Node, Option<String>)> = vec![(node, None)];
while let Some((current, class_ctx)) = stack.pop() {
match current.kind() {
"class_definition" => {
let class_name = current
.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("");
let class_id = if !class_name.is_empty() {
Some(format!("class:{}:{}", rel_path, class_name))
} else {
class_ctx.clone()
};
let child_count = current.child_count();
for i in (0..child_count).rev() {
if let Some(child) = current.child(i) {
stack.push((child, class_id.clone()));
}
}
}
"function_definition" => {
let func_name = current
.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("");
if !func_name.is_empty() {
let start_line = current.start_position().row + 1;
let end_line = current.end_position().row + 1;
let func_id = if class_ctx.is_some() {
format!("method:{}:{}", rel_path, func_name)
} else {
format!("func:{}:{}", rel_path, func_name)
};
scope_map.push((start_line, end_line, func_id, class_ctx.clone()));
}
let child_count = current.child_count();
for i in (0..child_count).rev() {
if let Some(child) = current.child(i) {
stack.push((child, class_ctx.clone()));
}
}
}
"decorated_definition" => {
let child_count = current.child_count();
for i in (0..child_count).rev() {
if let Some(child) = current.child(i) {
stack.push((child, class_ctx.clone()));
}
}
}
_ => {
let child_count = current.child_count();
for i in (0..child_count).rev() {
if let Some(child) = current.child(i) {
stack.push((child, class_ctx.clone()));
}
}
}
}
}
}
fn is_common_dunder(name: &str) -> bool {
matches!(
name,
"__init__"
| "__str__"
| "__repr__"
| "__eq__"
| "__ne__"
| "__hash__"
| "__len__"
| "__iter__"
| "__next__"
| "__getitem__"
| "__setitem__"
| "__delitem__"
| "__contains__"
| "__call__"
| "__enter__"
| "__exit__"
| "__get__"
| "__set__"
| "__delete__"
| "__getattr__"
| "__setattr__"
| "__bool__"
| "__lt__"
| "__le__"
| "__gt__"
| "__ge__"
| "__add__"
| "__sub__"
| "__mul__"
| "__new__"
| "__del__"
| "__format__"
| "get"
| "set"
| "update"
| "delete"
| "save"
| "clean"
| "run"
| "setup"
| "teardown"
)
}
fn resolve_and_add_call_edge(
caller_id: &str,
callee_name: &str,
func_name_map: &HashMap<String, Vec<String>>,
file_func_ids: &HashSet<String>,
file_imported_names: &HashMap<String, HashSet<String>>,
rel_path: &str,
package_dir: &str,
class_init_map: &HashMap<String, Vec<(String, String)>>,
node_pkg_map: &HashMap<String, String>,
is_attribute_call: bool,
edges: &mut Vec<CodeEdge>,
) {
if let Some(callee_ids) = func_name_map.get(callee_name) {
let same_file: Vec<&String> = callee_ids
.iter()
.filter(|id| file_func_ids.contains(*id))
.collect();
let imported: Vec<&String> = callee_ids
.iter()
.filter(|_id| {
file_imported_names
.get(rel_path)
.map(|names| names.contains(callee_name))
.unwrap_or(false)
})
.collect();
let same_pkg: Vec<&String> = callee_ids
.iter()
.filter(|id| {
node_pkg_map
.get(id.as_str())
.map(|pkg| pkg == package_dir)
.unwrap_or(false)
})
.collect();
let global_limit = if is_attribute_call && !is_common_dunder(callee_name) {
20
} else {
3
};
let confidence = if !same_file.is_empty() {
0.8_f32
} else if !imported.is_empty() {
0.8
} else if !same_pkg.is_empty() {
0.7
} else if is_attribute_call {
0.3
} else {
0.5
};
let weight = if !same_file.is_empty() || !imported.is_empty() || !same_pkg.is_empty() {
0.5
} else if is_attribute_call {
0.8
} else {
0.5
};
let targets = if !same_file.is_empty() {
same_file
} else if !imported.is_empty() {
imported
} else if !same_pkg.is_empty() {
same_pkg
} else if callee_ids.len() <= global_limit {
callee_ids.iter().collect()
} else {
vec![]
};
for callee_id in targets {
if callee_id != caller_id {
edges.push(CodeEdge {
from: caller_id.to_string(),
to: callee_id.clone(),
relation: EdgeRelation::Calls,
weight,
call_count: 1,
in_error_path: false,
confidence,
});
}
}
} else if callee_name
.chars()
.next()
.map(|c| c.is_uppercase())
.unwrap_or(false)
{
if let Some(init_entries) = class_init_map.get(callee_name) {
let same_file: Vec<&str> = init_entries
.iter()
.filter(|(fp, _)| fp == rel_path)
.map(|(_, id)| id.as_str())
.collect();
let is_imported = file_imported_names
.get(rel_path)
.map(|names| names.contains(callee_name))
.unwrap_or(false);
let imported: Vec<&str> = if is_imported {
init_entries.iter().map(|(_, id)| id.as_str()).collect()
} else {
vec![]
};
let same_pkg: Vec<&str> = init_entries
.iter()
.filter(|(fp, _)| fp.rsplitn(2, '/').nth(1).unwrap_or("") == package_dir)
.map(|(_, id)| id.as_str())
.collect();
let (targets, confidence): (Vec<&str>, f32) = if !same_file.is_empty() {
(same_file, 0.8)
} else if !imported.is_empty() {
(imported, 0.7)
} else if !same_pkg.is_empty() {
(same_pkg, 0.6)
} else if init_entries.len() <= 3 {
(init_entries.iter().map(|(_, id)| id.as_str()).collect(), 0.5)
} else {
(vec![], 0.0)
};
for init_id in targets {
if init_id != caller_id {
edges.push(CodeEdge {
from: caller_id.to_string(),
to: init_id.to_string(),
relation: EdgeRelation::Calls,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence,
});
}
}
}
}
}
fn resolve_self_method_call(
caller_id: &str,
method_name: &str,
caller_class: Option<&str>,
func_name_map: &HashMap<String, Vec<String>>,
method_to_class: &HashMap<String, String>,
class_parents: &HashMap<String, Vec<String>>,
file_func_ids: &HashSet<String>,
edges: &mut Vec<CodeEdge>,
) {
if let Some(callee_ids) = func_name_map.get(method_name) {
if let Some(class_id) = caller_class {
let mut valid_classes = vec![class_id.to_string()];
if let Some(parents) = class_parents.get(class_id) {
valid_classes.extend(parents.iter().cloned());
}
let scoped: Vec<&String> = callee_ids
.iter()
.filter(|id| {
method_to_class
.get(*id)
.map(|cls| valid_classes.contains(cls))
.unwrap_or(false)
})
.collect();
let targets = if !scoped.is_empty() {
scoped
} else if callee_ids.len() <= 3 {
callee_ids.iter().collect()
} else {
callee_ids
.iter()
.filter(|id| file_func_ids.contains(*id))
.collect()
};
for callee_id in targets {
if callee_id != caller_id {
edges.push(CodeEdge {
from: caller_id.to_string(),
to: callee_id.clone(),
relation: EdgeRelation::Calls,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 0.9,
});
}
}
} else {
for callee_id in callee_ids {
if callee_id != caller_id && file_func_ids.contains(callee_id) {
edges.push(CodeEdge {
from: caller_id.to_string(),
to: callee_id.clone(),
relation: EdgeRelation::Calls,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 0.6,
});
}
}
}
}
}
fn add_override_edges(nodes: &[CodeNode], edges: &mut Vec<CodeEdge>) {
let mut class_methods: HashMap<String, Vec<(String, String)>> = HashMap::new();
for edge in edges.iter() {
if edge.relation == EdgeRelation::DefinedIn && edge.to.starts_with("class:") {
if let Some(method) = nodes.iter().find(|n| n.id == edge.from && n.kind == NodeKind::Function) {
class_methods
.entry(edge.to.clone())
.or_default()
.push((method.name.clone(), method.id.clone()));
}
}
}
let inherits_pairs: Vec<(String, String)> = edges
.iter()
.filter(|e| e.relation == EdgeRelation::Inherits)
.map(|e| (e.from.clone(), e.to.clone()))
.collect();
let mut new_edges = Vec::new();
for (sub_class_id, base_class_id) in &inherits_pairs {
let sub_methods = match class_methods.get(sub_class_id) {
Some(m) => m,
None => continue,
};
let base_methods = match class_methods.get(base_class_id) {
Some(m) => m,
None => continue,
};
for (sub_name, sub_id) in sub_methods {
for (base_name, base_id) in base_methods {
if sub_name == base_name && sub_id != base_id {
new_edges.push(CodeEdge {
from: base_id.clone(),
to: sub_id.clone(),
relation: EdgeRelation::Overrides,
weight: 0.4,
call_count: 1,
in_error_path: false,
confidence: 0.6,
});
}
}
}
}
edges.extend(new_edges);
}
fn extract_rust_tree_sitter(
path: &str,
content: &str,
parser: &mut Parser,
class_id_map: &mut HashMap<String, String>,
) -> (Vec<CodeNode>, Vec<CodeEdge>, HashSet<String>) {
let mut nodes = Vec::new();
let mut edges = Vec::new();
let mut imports = HashSet::new();
if parser.set_language(&tree_sitter_rust::LANGUAGE.into()).is_err() {
return (nodes, edges, imports);
}
let tree = match parser.parse(content, None) {
Some(t) => t,
None => return (nodes, edges, imports),
};
let file_id = format!("file:{}", path);
let source = content.as_bytes();
let root = tree.root_node();
let mut impl_target_map: HashMap<String, String> = HashMap::new();
let mut cursor = root.walk();
for child in root.children(&mut cursor) {
extract_rust_node(
child,
source,
content,
path,
&file_id,
&mut nodes,
&mut edges,
class_id_map,
&mut impl_target_map,
&mut imports,
"", );
}
(nodes, edges, imports)
}
fn extract_rust_node(
node: tree_sitter::Node,
source: &[u8],
source_str: &str,
path: &str,
file_id: &str,
nodes: &mut Vec<CodeNode>,
edges: &mut Vec<CodeEdge>,
class_id_map: &mut HashMap<String, String>,
impl_target_map: &mut HashMap<String, String>,
imports: &mut HashSet<String>,
module_prefix: &str,
) {
let text = |n: tree_sitter::Node| -> String {
n.utf8_text(source).unwrap_or("").to_string()
};
match node.kind() {
"use_declaration" => {
let use_text = text(node);
if let Some(path_part) = use_text.strip_prefix("use ") {
let clean_path = path_part.trim_end_matches(';').trim();
if !clean_path.starts_with("std::") && !clean_path.starts_with("core::") && !clean_path.starts_with("alloc::") {
let module = if clean_path.contains('{') {
clean_path.split("::").next().unwrap_or(clean_path).to_string()
} else {
clean_path.split("::").take(2).collect::<Vec<_>>().join("::")
};
if !module.is_empty() {
edges.push(CodeEdge {
from: file_id.to_string(),
to: format!("module_ref:{}", module),
relation: EdgeRelation::Imports,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 1.0,
});
imports.insert(module);
}
}
}
}
"struct_item" => {
let name = node.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if name.is_empty() { return; }
let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
let line = node.start_position().row + 1;
let class_id = format!("class:{}:{}", path, full_name);
let signature = extract_rust_signature(node, source_str);
let docstring = extract_rust_docstring(node, source_str);
let line_count = node.end_position().row - node.start_position().row + 1;
nodes.push(CodeNode {
id: class_id.clone(),
kind: NodeKind::Class,
name: full_name.clone(),
file_path: path.to_string(),
line: Some(line),
decorators: extract_rust_attributes(node, source),
signature,
docstring,
line_count,
is_test: path.contains("/tests/") || full_name.contains("Test"),
});
edges.push(CodeEdge::defined_in(&class_id, file_id));
class_id_map.insert(name.clone(), class_id);
}
"enum_item" => {
let name = node.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if name.is_empty() { return; }
let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
let line = node.start_position().row + 1;
let class_id = format!("class:{}:{}", path, full_name);
let signature = extract_rust_signature(node, source_str);
let docstring = extract_rust_docstring(node, source_str);
let line_count = node.end_position().row - node.start_position().row + 1;
nodes.push(CodeNode {
id: class_id.clone(),
kind: NodeKind::Class,
name: full_name.clone(),
file_path: path.to_string(),
line: Some(line),
decorators: extract_rust_attributes(node, source),
signature,
docstring,
line_count,
is_test: path.contains("/tests/") || full_name.contains("Test"),
});
edges.push(CodeEdge::defined_in(&class_id, file_id));
class_id_map.insert(name.clone(), class_id);
}
"trait_item" => {
let name = node.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if name.is_empty() { return; }
let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
let line = node.start_position().row + 1;
let trait_id = format!("class:{}:{}", path, full_name);
let signature = extract_rust_signature(node, source_str);
let docstring = extract_rust_docstring(node, source_str);
let line_count = node.end_position().row - node.start_position().row + 1;
nodes.push(CodeNode {
id: trait_id.clone(),
kind: NodeKind::Class,
name: full_name.clone(),
file_path: path.to_string(),
line: Some(line),
decorators: extract_rust_attributes(node, source),
signature,
docstring,
line_count,
is_test: path.contains("/tests/") || full_name.contains("Test"),
});
edges.push(CodeEdge::defined_in(&trait_id, file_id));
class_id_map.insert(name.clone(), trait_id.clone());
if let Some(body) = node.child_by_field_name("body") {
let mut body_cursor = body.walk();
for body_child in body.children(&mut body_cursor) {
if body_child.kind() == "function_item" || body_child.kind() == "function_signature_item" {
extract_rust_method(body_child, source, source_str, path, &trait_id, nodes, edges);
}
}
}
}
"impl_item" => {
let mut trait_name: Option<String> = None;
let mut type_name: Option<String> = None;
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"type_identifier" | "generic_type" => {
let name = if child.kind() == "generic_type" {
child.child_by_field_name("type")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string()
} else {
text(child)
};
if type_name.is_none() {
type_name = Some(name);
} else if trait_name.is_none() {
trait_name = type_name.take();
type_name = Some(name);
}
}
_ => {}
}
}
let type_name = match type_name {
Some(n) => n,
None => return,
};
let type_id = class_id_map.get(&type_name)
.cloned()
.unwrap_or_else(|| format!("class:{}:{}", path, type_name));
if let Some(ref trait_n) = trait_name {
edges.push(CodeEdge {
from: type_id.clone(),
to: format!("class_ref:{}", trait_n),
relation: EdgeRelation::Inherits,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 1.0,
});
}
if let Some(body) = node.child_by_field_name("body") {
let mut body_cursor = body.walk();
for body_child in body.children(&mut body_cursor) {
if body_child.kind() == "function_item" {
extract_rust_method(body_child, source, source_str, path, &type_id, nodes, edges);
}
}
}
}
"function_item" => {
let name = node.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if name.is_empty() { return; }
let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
let line = node.start_position().row + 1;
let func_id = format!("func:{}:{}", path, full_name);
let signature = extract_rust_signature(node, source_str);
let docstring = extract_rust_docstring(node, source_str);
let line_count = node.end_position().row - node.start_position().row + 1;
let is_test = path.contains("/tests/") || full_name.starts_with("test_") ||
extract_rust_attributes(node, source).iter().any(|a| a.contains("test"));
nodes.push(CodeNode {
id: func_id.clone(),
kind: NodeKind::Function,
name: full_name,
file_path: path.to_string(),
line: Some(line),
decorators: extract_rust_attributes(node, source),
signature,
docstring,
line_count,
is_test,
});
edges.push(CodeEdge::defined_in(&func_id, file_id));
}
"mod_item" => {
let name = node.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if name.is_empty() { return; }
let new_prefix = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
if let Some(body) = node.child_by_field_name("body") {
let mut body_cursor = body.walk();
for body_child in body.children(&mut body_cursor) {
extract_rust_node(
body_child,
source,
source_str,
path,
file_id,
nodes,
edges,
class_id_map,
impl_target_map,
imports,
&new_prefix,
);
}
}
}
"type_item" => {
let name = node.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if name.is_empty() { return; }
let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
let line = node.start_position().row + 1;
let type_id = format!("class:{}:{}", path, full_name);
let signature = extract_rust_signature(node, source_str);
let line_count = node.end_position().row - node.start_position().row + 1;
nodes.push(CodeNode {
id: type_id.clone(),
kind: NodeKind::Class,
name: full_name.clone(),
file_path: path.to_string(),
line: Some(line),
decorators: extract_rust_attributes(node, source),
signature,
docstring: None,
line_count,
is_test: false,
});
edges.push(CodeEdge::defined_in(&type_id, file_id));
class_id_map.insert(name, type_id);
}
"const_item" | "static_item" => {
let name = node.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if name.is_empty() || name.starts_with('_') { return; }
let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
let line = node.start_position().row + 1;
let const_id = format!("const:{}:{}", path, full_name);
let signature = extract_rust_signature(node, source_str);
nodes.push(CodeNode {
id: const_id.clone(),
kind: NodeKind::Class, name: full_name,
file_path: path.to_string(),
line: Some(line),
decorators: extract_rust_attributes(node, source),
signature,
docstring: None,
line_count: 1,
is_test: false,
});
edges.push(CodeEdge::defined_in(&const_id, file_id));
}
"macro_definition" => {
let name = node.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if name.is_empty() { return; }
let full_name = if module_prefix.is_empty() { name.clone() } else { format!("{}::{}", module_prefix, name) };
let line = node.start_position().row + 1;
let macro_id = format!("macro:{}:{}", path, full_name);
let line_count = node.end_position().row - node.start_position().row + 1;
nodes.push(CodeNode {
id: macro_id.clone(),
kind: NodeKind::Function, name: format!("{}!", full_name),
file_path: path.to_string(),
line: Some(line),
decorators: vec!["macro".to_string()],
signature: Some(format!("macro_rules! {}", name)),
docstring: extract_rust_docstring(node, source_str),
line_count,
is_test: false,
});
edges.push(CodeEdge::defined_in(¯o_id, file_id));
}
_ => {}
}
}
fn extract_rust_method(
node: tree_sitter::Node,
source: &[u8],
source_str: &str,
path: &str,
parent_id: &str,
nodes: &mut Vec<CodeNode>,
edges: &mut Vec<CodeEdge>,
) {
let name = node.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if name.is_empty() { return; }
let line = node.start_position().row + 1;
let method_id = format!("method:{}:{}", path, name);
let signature = extract_rust_signature(node, source_str);
let docstring = extract_rust_docstring(node, source_str);
let line_count = node.end_position().row - node.start_position().row + 1;
let attrs = extract_rust_attributes(node, source);
let is_test = path.contains("/tests/") || name.starts_with("test_") ||
attrs.iter().any(|a| a.contains("test"));
nodes.push(CodeNode {
id: method_id.clone(),
kind: NodeKind::Function,
name,
file_path: path.to_string(),
line: Some(line),
decorators: attrs,
signature,
docstring,
line_count,
is_test,
});
edges.push(CodeEdge {
from: method_id,
to: parent_id.to_string(),
relation: EdgeRelation::DefinedIn,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 1.0,
});
}
fn extract_rust_attributes(node: tree_sitter::Node, source: &[u8]) -> Vec<String> {
let mut attrs = Vec::new();
if let Some(parent) = node.parent() {
let mut cursor = parent.walk();
let mut prev_was_attr = false;
for child in parent.children(&mut cursor) {
if child.kind() == "attribute_item" {
if let Ok(attr_text) = child.utf8_text(source) {
let clean = attr_text.trim_start_matches("#[").trim_end_matches(']');
attrs.push(clean.to_string());
}
prev_was_attr = true;
} else if child.id() == node.id() && prev_was_attr {
break;
} else {
if prev_was_attr && child.kind() != "line_comment" {
attrs.clear();
}
prev_was_attr = false;
}
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "attribute_item" {
if let Ok(attr_text) = child.utf8_text(source) {
let clean = attr_text.trim_start_matches("#[").trim_end_matches(']');
attrs.push(clean.to_string());
}
}
}
attrs
}
fn extract_rust_signature(node: tree_sitter::Node, source_str: &str) -> Option<String> {
let start = node.start_byte();
if start >= source_str.len() { return None; }
let sig_text = &source_str[start..];
let sig_end = sig_text.find(" {")
.or_else(|| sig_text.find("\n{"))
.or_else(|| sig_text.find(";\n"))
.or_else(|| sig_text.find(';'))
.unwrap_or(sig_text.len().min(200));
let sig = sig_text[..sig_end].trim();
if sig.is_empty() { None } else { Some(sig.to_string()) }
}
fn extract_rust_docstring(node: tree_sitter::Node, source_str: &str) -> Option<String> {
let start_line = node.start_position().row;
if start_line == 0 { return None; }
let lines: Vec<&str> = source_str.lines().collect();
let mut doc_lines: Vec<&str> = Vec::new();
for i in (0..start_line).rev() {
if i >= lines.len() { continue; }
let line = lines[i].trim();
if line.starts_with("///") {
doc_lines.push(line.trim_start_matches("///").trim());
} else if line.starts_with("//!") {
doc_lines.push(line.trim_start_matches("//!").trim());
} else if line.is_empty() || line.starts_with("#[") {
continue;
} else {
break;
}
}
if doc_lines.is_empty() {
return None;
}
doc_lines.reverse();
let first_line = doc_lines.first().copied().unwrap_or("");
let truncated = if first_line.len() > 100 {
&first_line[..100]
} else {
first_line
};
if truncated.is_empty() { None } else { Some(truncated.to_string()) }
}
fn extract_typescript_tree_sitter(
path: &str,
content: &str,
parser: &mut Parser,
class_id_map: &mut HashMap<String, String>,
extension: &str,
) -> (Vec<CodeNode>, Vec<CodeEdge>, HashSet<String>) {
let mut nodes = Vec::new();
let mut edges = Vec::new();
let mut imports = HashSet::new();
let lang_result = match extension {
"tsx" => parser.set_language(&tree_sitter_typescript::LANGUAGE_TSX.into()),
"ts" => parser.set_language(&tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
"jsx" => parser.set_language(&tree_sitter_javascript::LANGUAGE.into()),
_ => parser.set_language(&tree_sitter_javascript::LANGUAGE.into()), };
if lang_result.is_err() {
return (nodes, edges, imports);
}
let tree = match parser.parse(content, None) {
Some(t) => t,
None => return (nodes, edges, imports),
};
let file_id = format!("file:{}", path);
let source = content.as_bytes();
let root = tree.root_node();
let mut cursor = root.walk();
for child in root.children(&mut cursor) {
extract_typescript_node(
child,
source,
content,
path,
&file_id,
&mut nodes,
&mut edges,
class_id_map,
&mut imports,
);
}
(nodes, edges, imports)
}
fn extract_typescript_node(
node: tree_sitter::Node,
source: &[u8],
source_str: &str,
path: &str,
file_id: &str,
nodes: &mut Vec<CodeNode>,
edges: &mut Vec<CodeEdge>,
class_id_map: &mut HashMap<String, String>,
imports: &mut HashSet<String>,
) {
let text = |n: tree_sitter::Node| -> String {
n.utf8_text(source).unwrap_or("").to_string()
};
match node.kind() {
"import_statement" => {
let import_text = text(node);
if let Some(from_idx) = import_text.rfind(" from ") {
let module_part = import_text[from_idx + 6..].trim();
let module = module_part.trim_matches(|c| c == '\'' || c == '"' || c == ';');
if module.starts_with('.') || module.starts_with("@/") {
edges.push(CodeEdge {
from: file_id.to_string(),
to: format!("module_ref:{}", module),
relation: EdgeRelation::Imports,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 1.0,
});
}
imports.insert(module.to_string());
if let Some(start) = import_text.find('{') {
if let Some(end) = import_text.find('}') {
let names_part = &import_text[start+1..end];
for name in names_part.split(',') {
let clean = name.trim().split(" as ").next().unwrap_or("").trim();
if !clean.is_empty() {
imports.insert(clean.to_string());
}
}
}
}
}
}
"class_declaration" | "class" => {
extract_typescript_class(node, source, source_str, path, file_id, nodes, edges, class_id_map);
}
"abstract_class_declaration" => {
extract_typescript_class(node, source, source_str, path, file_id, nodes, edges, class_id_map);
}
"interface_declaration" => {
let name = node.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if name.is_empty() { return; }
let line = node.start_position().row + 1;
let interface_id = format!("class:{}:{}", path, name);
let signature = extract_typescript_signature(node, source_str);
let line_count = node.end_position().row - node.start_position().row + 1;
nodes.push(CodeNode {
id: interface_id.clone(),
kind: NodeKind::Class,
name: name.clone(),
file_path: path.to_string(),
line: Some(line),
decorators: vec!["interface".to_string()],
signature,
docstring: extract_typescript_docstring(node, source_str),
line_count,
is_test: path.contains("/test") || name.contains("Test"),
});
edges.push(CodeEdge::defined_in(&interface_id, file_id));
class_id_map.insert(name, interface_id);
}
"function_declaration" | "function" => {
let name = node.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if name.is_empty() { return; }
let line = node.start_position().row + 1;
let func_id = format!("func:{}:{}", path, name);
let signature = extract_typescript_signature(node, source_str);
let docstring = extract_typescript_docstring(node, source_str);
let line_count = node.end_position().row - node.start_position().row + 1;
let decorators = extract_typescript_decorators(node, source);
nodes.push(CodeNode {
id: func_id.clone(),
kind: NodeKind::Function,
name,
file_path: path.to_string(),
line: Some(line),
decorators,
signature,
docstring,
line_count,
is_test: path.contains("/test") || path.contains(".test.") || path.contains(".spec."),
});
edges.push(CodeEdge::defined_in(&func_id, file_id));
}
"lexical_declaration" | "variable_declaration" => {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "variable_declarator" {
let name = child.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if let Some(value) = child.child_by_field_name("value") {
if value.kind() == "arrow_function" || value.kind() == "function" {
if name.is_empty() { continue; }
let line = node.start_position().row + 1;
let func_id = format!("func:{}:{}", path, name);
let signature = extract_typescript_signature(node, source_str);
let line_count = node.end_position().row - node.start_position().row + 1;
nodes.push(CodeNode {
id: func_id.clone(),
kind: NodeKind::Function,
name,
file_path: path.to_string(),
line: Some(line),
decorators: Vec::new(),
signature,
docstring: extract_typescript_docstring(node, source_str),
line_count,
is_test: path.contains("/test") || path.contains(".test.") || path.contains(".spec."),
});
edges.push(CodeEdge::defined_in(&func_id, file_id));
}
}
}
}
}
"enum_declaration" => {
let name = node.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if name.is_empty() { return; }
let line = node.start_position().row + 1;
let enum_id = format!("class:{}:{}", path, name);
let signature = extract_typescript_signature(node, source_str);
let line_count = node.end_position().row - node.start_position().row + 1;
nodes.push(CodeNode {
id: enum_id.clone(),
kind: NodeKind::Class,
name: name.clone(),
file_path: path.to_string(),
line: Some(line),
decorators: vec!["enum".to_string()],
signature,
docstring: extract_typescript_docstring(node, source_str),
line_count,
is_test: false,
});
edges.push(CodeEdge::defined_in(&enum_id, file_id));
class_id_map.insert(name, enum_id);
}
"type_alias_declaration" => {
let name = node.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if name.is_empty() { return; }
let line = node.start_position().row + 1;
let type_id = format!("class:{}:{}", path, name);
let signature = extract_typescript_signature(node, source_str);
let line_count = node.end_position().row - node.start_position().row + 1;
nodes.push(CodeNode {
id: type_id.clone(),
kind: NodeKind::Class,
name: name.clone(),
file_path: path.to_string(),
line: Some(line),
decorators: vec!["type".to_string()],
signature,
docstring: None,
line_count,
is_test: false,
});
edges.push(CodeEdge::defined_in(&type_id, file_id));
class_id_map.insert(name, type_id);
}
"export_statement" => {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"class_declaration" | "class" | "abstract_class_declaration" |
"interface_declaration" | "function_declaration" | "function" |
"lexical_declaration" | "variable_declaration" | "enum_declaration" |
"type_alias_declaration" => {
extract_typescript_node(child, source, source_str, path, file_id, nodes, edges, class_id_map, imports);
}
_ => {}
}
}
}
"expression_statement" => {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
extract_typescript_node(child, source, source_str, path, file_id, nodes, edges, class_id_map, imports);
}
}
"module" | "internal_module" | "namespace" => {
let name = node.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if !name.is_empty() {
let line = node.start_position().row + 1;
let module_id = format!("class:{}:{}", path, name);
nodes.push(CodeNode {
id: module_id.clone(),
kind: NodeKind::Class,
name: name.clone(),
file_path: path.to_string(),
line: Some(line),
decorators: vec!["namespace".to_string()],
signature: Some(format!("namespace {}", name)),
docstring: None,
line_count: node.end_position().row - node.start_position().row + 1,
is_test: false,
});
edges.push(CodeEdge::defined_in(&module_id, file_id));
}
if let Some(body) = node.child_by_field_name("body") {
let mut body_cursor = body.walk();
for body_child in body.children(&mut body_cursor) {
extract_typescript_node(body_child, source, source_str, path, file_id, nodes, edges, class_id_map, imports);
}
}
}
_ => {}
}
}
fn extract_typescript_class(
node: tree_sitter::Node,
source: &[u8],
source_str: &str,
path: &str,
file_id: &str,
nodes: &mut Vec<CodeNode>,
edges: &mut Vec<CodeEdge>,
class_id_map: &mut HashMap<String, String>,
) {
let name = node.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if name.is_empty() { return; }
let line = node.start_position().row + 1;
let class_id = format!("class:{}:{}", path, name);
let signature = extract_typescript_signature(node, source_str);
let docstring = extract_typescript_docstring(node, source_str);
let line_count = node.end_position().row - node.start_position().row + 1;
let decorators = extract_typescript_decorators(node, source);
nodes.push(CodeNode {
id: class_id.clone(),
kind: NodeKind::Class,
name: name.clone(),
file_path: path.to_string(),
line: Some(line),
decorators,
signature,
docstring,
line_count,
is_test: path.contains("/test") || name.contains("Test"),
});
edges.push(CodeEdge::defined_in(&class_id, file_id));
class_id_map.insert(name.clone(), class_id.clone());
fn find_extends_identifier(node: tree_sitter::Node, source: &[u8]) -> Option<String> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"identifier" | "type_identifier" => {
return child.utf8_text(source).ok().map(|s| s.to_string());
}
"extends_clause" | "class_heritage" | "extends_type_clause" => {
if let Some(name) = find_extends_identifier(child, source) {
return Some(name);
}
}
_ => {}
}
}
None
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "class_heritage" || child.kind() == "extends_clause" {
if let Some(parent_name) = find_extends_identifier(child, source) {
if !parent_name.is_empty() {
edges.push(CodeEdge {
from: class_id.clone(),
to: format!("class_ref:{}", parent_name),
relation: EdgeRelation::Inherits,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 1.0,
});
}
}
}
}
if let Some(body) = node.child_by_field_name("body") {
let mut body_cursor = body.walk();
for body_child in body.children(&mut body_cursor) {
match body_child.kind() {
"method_definition" | "public_field_definition" | "method_signature" => {
extract_typescript_method(body_child, source, source_str, path, &class_id, nodes, edges);
}
_ => {}
}
}
}
}
fn extract_typescript_method(
node: tree_sitter::Node,
source: &[u8],
source_str: &str,
path: &str,
class_id: &str,
nodes: &mut Vec<CodeNode>,
edges: &mut Vec<CodeEdge>,
) {
let mut name = node.child_by_field_name("name")
.and_then(|n| n.utf8_text(source).ok())
.unwrap_or("")
.to_string();
if name.is_empty() {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "property_identifier" || child.kind() == "identifier" {
if let Ok(text) = child.utf8_text(source) {
name = text.to_string();
break;
}
}
}
}
if name.is_empty() { return; }
let line = node.start_position().row + 1;
let method_id = format!("method:{}:{}", path, name);
let signature = extract_typescript_signature(node, source_str);
let docstring = extract_typescript_docstring(node, source_str);
let line_count = node.end_position().row - node.start_position().row + 1;
let decorators = extract_typescript_decorators(node, source);
nodes.push(CodeNode {
id: method_id.clone(),
kind: NodeKind::Function,
name,
file_path: path.to_string(),
line: Some(line),
decorators,
signature,
docstring,
line_count,
is_test: path.contains("/test") || path.contains(".test.") || path.contains(".spec."),
});
edges.push(CodeEdge {
from: method_id,
to: class_id.to_string(),
relation: EdgeRelation::DefinedIn,
weight: 0.5,
call_count: 1,
in_error_path: false,
confidence: 1.0,
});
}
fn extract_typescript_decorators(node: tree_sitter::Node, source: &[u8]) -> Vec<String> {
let mut decorators = Vec::new();
if let Some(parent) = node.parent() {
let mut cursor = parent.walk();
for child in parent.children(&mut cursor) {
if child.kind() == "decorator" {
if let Ok(dec_text) = child.utf8_text(source) {
let name = dec_text.trim_start_matches('@');
let name = name.split('(').next().unwrap_or(name).trim();
if !name.is_empty() {
decorators.push(name.to_string());
}
}
}
if child.id() == node.id() {
break;
}
}
}
decorators
}
fn extract_typescript_signature(node: tree_sitter::Node, source_str: &str) -> Option<String> {
let start = node.start_byte();
if start >= source_str.len() { return None; }
let sig_text = &source_str[start..];
let sig_end = sig_text.find(" {")
.or_else(|| sig_text.find("\n{"))
.or_else(|| sig_text.find("{\n"))
.unwrap_or(sig_text.len().min(200));
let sig = sig_text[..sig_end].trim();
if sig.is_empty() { None } else { Some(sig.to_string()) }
}
fn extract_typescript_docstring(node: tree_sitter::Node, source_str: &str) -> Option<String> {
let start_line = node.start_position().row;
if start_line == 0 { return None; }
let lines: Vec<&str> = source_str.lines().collect();
for i in (0..start_line).rev() {
if i >= lines.len() { continue; }
let line = lines[i].trim();
if line.ends_with("*/") {
let mut doc_lines: Vec<&str> = Vec::new();
for j in (0..=i).rev() {
if j >= lines.len() { continue; }
let doc_line = lines[j].trim();
if doc_line.starts_with("/**") {
let first = doc_line.trim_start_matches("/**").trim_start_matches('*').trim();
if !first.is_empty() && !first.starts_with('@') {
doc_lines.push(first);
}
break;
} else if doc_line.starts_with('*') {
let content = doc_line.trim_start_matches('*').trim();
if !content.is_empty() && !content.starts_with('@') {
doc_lines.push(content);
}
}
}
if doc_lines.is_empty() {
return None;
}
doc_lines.reverse();
let first_line = doc_lines.first().copied().unwrap_or("");
let truncated = if first_line.len() > 100 {
&first_line[..100]
} else {
first_line
};
return if truncated.is_empty() { None } else { Some(truncated.to_string()) };
} else if line.is_empty() || line.starts_with('@') || line.starts_with("//") {
continue;
} else {
break;
}
}
None
}
#[allow(dead_code)]
fn extract_rust_regex(path: &str, content: &str) -> (Vec<CodeNode>, Vec<CodeEdge>, HashSet<String>) {
let mut nodes = Vec::new();
let mut edges = Vec::new();
let file_id = format!("file:{}", path);
let re_use = Regex::new(r"(?m)^use\s+([\w:]+)").unwrap();
let re_struct = Regex::new(r"(?m)^(?:pub\s+)?struct\s+(\w+)").unwrap();
let re_enum = Regex::new(r"(?m)^(?:pub\s+)?enum\s+(\w+)").unwrap();
let re_impl = Regex::new(r"(?m)^impl(?:<[^>]+>)?\s+(?:(\w+)\s+for\s+)?(\w+)").unwrap();
let re_fn = Regex::new(r"(?m)^\s*(?:pub\s+)?(?:async\s+)?fn\s+(\w+)").unwrap();
for cap in re_use.captures_iter(content) {
let module = cap[1].to_string();
if !module.starts_with("std::") && !module.starts_with("core::") {
edges.push(CodeEdge::new(
&file_id,
&format!("module_ref:{}", module),
EdgeRelation::Imports,
));
}
}
for cap in re_struct.captures_iter(content) {
let name = cap[1].to_string();
let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
let node = CodeNode::new_class(path, &name, line);
edges.push(CodeEdge::defined_in(&node.id, &file_id));
nodes.push(node);
}
for cap in re_enum.captures_iter(content) {
let name = cap[1].to_string();
let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
let node = CodeNode::new_class(path, &name, line);
edges.push(CodeEdge::defined_in(&node.id, &file_id));
nodes.push(node);
}
for cap in re_impl.captures_iter(content) {
if let Some(trait_match) = cap.get(1) {
let type_name = &cap[2];
let trait_name = trait_match.as_str();
if let Some(type_node) = nodes.iter().find(|n| n.name == type_name) {
edges.push(CodeEdge::new(
&type_node.id,
&format!("class_ref:{}", trait_name),
EdgeRelation::Inherits,
));
}
}
}
for cap in re_fn.captures_iter(content) {
let name = cap[1].to_string();
let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
let node = CodeNode::new_function(path, &name, line, false);
edges.push(CodeEdge::defined_in(&node.id, &file_id));
nodes.push(node);
}
(nodes, edges, HashSet::new())
}
#[allow(dead_code)]
fn extract_typescript_regex(path: &str, content: &str) -> (Vec<CodeNode>, Vec<CodeEdge>, HashSet<String>) {
let mut nodes = Vec::new();
let mut edges = Vec::new();
let file_id = format!("file:{}", path);
let re_import = Regex::new(r#"(?m)^import\s+.*?\s+from\s+['"]([^'"]+)['"]"#).unwrap();
let re_class = Regex::new(r"(?m)^(?:export\s+)?(?:abstract\s+)?class\s+(\w+)(?:\s+extends\s+(\w+))?").unwrap();
let re_interface = Regex::new(r"(?m)^(?:export\s+)?interface\s+(\w+)(?:\s+extends\s+(\w+))?").unwrap();
let re_function = Regex::new(r"(?m)^(?:export\s+)?(?:async\s+)?function\s+(\w+)").unwrap();
let re_arrow = Regex::new(r"(?m)^(?:export\s+)?(?:const|let)\s+(\w+)\s*=\s*(?:async\s+)?\([^)]*\)\s*=>").unwrap();
for cap in re_import.captures_iter(content) {
let module = cap[1].to_string();
if module.starts_with('.') || module.starts_with("@/") {
edges.push(CodeEdge::new(
&file_id,
&format!("module_ref:{}", module),
EdgeRelation::Imports,
));
}
}
for cap in re_class.captures_iter(content) {
let name = cap[1].to_string();
let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
let node = CodeNode::new_class(path, &name, line);
edges.push(CodeEdge::defined_in(&node.id, &file_id));
if let Some(parent) = cap.get(2) {
edges.push(CodeEdge::new(
&node.id,
&format!("class_ref:{}", parent.as_str()),
EdgeRelation::Inherits,
));
}
nodes.push(node);
}
for cap in re_interface.captures_iter(content) {
let name = cap[1].to_string();
let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
let node = CodeNode::new_class(path, &name, line);
edges.push(CodeEdge::defined_in(&node.id, &file_id));
nodes.push(node);
}
for cap in re_function.captures_iter(content) {
let name = cap[1].to_string();
let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
let node = CodeNode::new_function(path, &name, line, false);
edges.push(CodeEdge::defined_in(&node.id, &file_id));
nodes.push(node);
}
for cap in re_arrow.captures_iter(content) {
let name = cap[1].to_string();
let line = content[..cap.get(0).unwrap().start()].lines().count() + 1;
let node = CodeNode::new_function(path, &name, line, false);
edges.push(CodeEdge::defined_in(&node.id, &file_id));
nodes.push(node);
}
(nodes, edges, HashSet::new())
}
fn is_python_builtin(name: &str) -> bool {
matches!(
name,
"if" | "for"
| "while"
| "return"
| "print"
| "len"
| "range"
| "str"
| "int"
| "float"
| "list"
| "dict"
| "set"
| "tuple"
| "type"
| "isinstance"
| "issubclass"
| "super"
| "hasattr"
| "getattr"
| "setattr"
| "property"
| "staticmethod"
| "classmethod"
| "enumerate"
| "zip"
| "map"
| "filter"
| "sorted"
| "reversed"
| "any"
| "all"
| "min"
| "max"
| "sum"
| "abs"
| "bool"
| "repr"
| "hash"
| "id"
| "open"
| "format"
| "not"
| "and"
| "or"
| "bytes"
| "bytearray"
| "memoryview"
| "object"
| "complex"
| "frozenset"
| "iter"
| "next"
| "callable"
| "delattr"
| "dir"
| "divmod"
| "eval"
| "exec"
| "globals"
| "hex"
| "input"
| "locals"
| "oct"
| "ord"
| "pow"
| "round"
| "slice"
| "vars"
| "chr"
| "bin"
| "breakpoint"
| "compile"
| "__import__"
| "ValueError"
| "TypeError"
| "KeyError"
| "IndexError"
| "AttributeError"
| "RuntimeError"
| "Exception"
| "NotImplementedError"
| "StopIteration"
| "OSError"
| "IOError"
| "FileNotFoundError"
| "ImportError"
| "AssertionError"
| "NameError"
| "OverflowError"
| "ZeroDivisionError"
| "UnicodeError"
| "SyntaxError"
)
}
fn is_stdlib(module: &str) -> bool {
let stdlib_prefixes = [
"os", "sys", "re", "json", "math", "io", "abc", "collections", "typing", "unittest",
"pytest", "copy", "functools", "itertools", "pathlib", "shutil", "tempfile", "logging",
"warnings", "inspect", "textwrap", "string", "datetime", "time", "hashlib", "base64",
"pickle", "csv", "xml", "html", "http", "urllib", "socket", "threading",
"multiprocessing", "subprocess", "contextlib", "enum", "dataclasses", "struct", "array",
"queue", "heapq", "bisect", "decimal", "fractions", "random", "statistics", "operator",
"pdb", "traceback", "dis", "ast", "token", "importlib", "pkgutil", "site", "zipimport",
"numpy", "scipy", "matplotlib", "pandas", "setuptools", "pip", "wheel", "pkg_resources",
"distutils",
];
let first_part = module.split('.').next().unwrap_or(module);
stdlib_prefixes.contains(&first_part)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_python() {
let content = r#"
import os
from pathlib import Path
class MyClass(BaseClass):
def method(self):
pass
def top_level():
pass
"#;
let mut parser = Parser::new();
let language = tree_sitter_python::LANGUAGE;
parser.set_language(&language.into()).unwrap();
let mut class_map = HashMap::new();
let (nodes, edges, _) = extract_python_tree_sitter("test.py", content, &mut parser, &mut class_map);
assert!(nodes.iter().any(|n| n.name == "MyClass"));
assert!(nodes.iter().any(|n| n.name == "method"));
assert!(nodes.iter().any(|n| n.name == "top_level"));
assert!(edges.iter().any(|e| e.to.contains("BaseClass")));
}
#[test]
fn test_extract_rust() {
let content = r#"
use std::path::Path;
use crate::module;
pub struct MyStruct {
field: i32,
}
impl MyTrait for MyStruct {
fn method(&self) {}
}
pub fn top_level() {}
"#;
let mut parser = Parser::new();
let mut class_map = HashMap::new();
let (nodes, edges, _) = extract_rust_tree_sitter("test.rs", content, &mut parser, &mut class_map);
assert!(nodes.iter().any(|n| n.name == "MyStruct"), "Should find MyStruct");
assert!(nodes.iter().any(|n| n.name == "method"), "Should find method");
assert!(nodes.iter().any(|n| n.name == "top_level"), "Should find top_level");
assert!(edges.iter().any(|e| e.to.contains("module")), "Should have module import edge");
assert!(edges.iter().any(|e| e.relation == EdgeRelation::Inherits && e.to.contains("MyTrait")),
"Should capture trait impl inheritance");
}
#[test]
fn test_extract_rust_comprehensive() {
let content = r#"
use crate::foo::bar;
/// A documented struct
pub struct Person {
name: String,
age: u32,
}
/// A documented enum
pub enum Status {
Active,
Inactive,
}
/// A trait
pub trait Greeter {
fn greet(&self) -> String;
}
impl Greeter for Person {
fn greet(&self) -> String {
format!("Hello, {}", self.name)
}
}
impl Person {
pub fn new(name: String) -> Self {
Self { name, age: 0 }
}
pub fn birthday(&mut self) {
self.age += 1;
}
}
mod inner {
pub fn nested_fn() {}
}
type MyAlias = Vec<String>;
pub fn standalone() {}
#[test]
fn test_something() {}
"#;
let mut parser = Parser::new();
let mut class_map = HashMap::new();
let (nodes, edges, _) = extract_rust_tree_sitter("test.rs", content, &mut parser, &mut class_map);
assert!(nodes.iter().any(|n| n.name == "Person"), "Should find Person struct");
assert!(nodes.iter().any(|n| n.name == "Status"), "Should find Status enum");
assert!(nodes.iter().any(|n| n.name == "Greeter"), "Should find Greeter trait");
assert!(nodes.iter().any(|n| n.name == "greet"), "Should find greet method");
assert!(nodes.iter().any(|n| n.name == "new"), "Should find new method");
assert!(nodes.iter().any(|n| n.name == "birthday"), "Should find birthday method");
assert!(nodes.iter().any(|n| n.name.contains("nested_fn")), "Should find nested_fn");
assert!(nodes.iter().any(|n| n.name == "MyAlias"), "Should find type alias");
assert!(nodes.iter().any(|n| n.name == "standalone"), "Should find standalone fn");
let test_node = nodes.iter().find(|n| n.name == "test_something");
assert!(test_node.is_some(), "Should find test function");
assert!(test_node.unwrap().is_test, "Test function should be marked as test");
let greet_edges: Vec<_> = edges.iter()
.filter(|e| e.from.contains("greet") && e.relation == EdgeRelation::DefinedIn)
.collect();
assert!(!greet_edges.is_empty(), "greet should have DefinedIn edge");
}
#[test]
fn test_extract_typescript() {
let content = r#"
import { Component } from './component';
export class MyClass extends BaseClass {
method(): void {}
}
export function topLevel(): void {}
export const arrowFn = () => {};
"#;
let mut parser = Parser::new();
let mut class_map = HashMap::new();
let (nodes, edges, _) = extract_typescript_tree_sitter("test.ts", content, &mut parser, &mut class_map, "ts");
assert!(nodes.iter().any(|n| n.name == "MyClass"), "Should find MyClass");
assert!(nodes.iter().any(|n| n.name == "topLevel"), "Should find topLevel");
assert!(nodes.iter().any(|n| n.name == "arrowFn"), "Should find arrowFn");
assert!(edges.iter().any(|e| e.to.contains("component")), "Should have component import");
assert!(nodes.iter().any(|n| n.name == "method"), "Should find method inside class");
assert!(edges.iter().any(|e| e.relation == EdgeRelation::Inherits && e.to.contains("BaseClass")),
"Should capture class inheritance");
}
#[test]
fn test_extract_typescript_comprehensive() {
let content = r#"
import { Injectable } from '@angular/core';
import type { User } from './types';
/**
* A service class
*/
@Injectable()
export class UserService {
private users: User[] = [];
/**
* Get all users
*/
getUsers(): User[] {
return this.users;
}
addUser(user: User): void {
this.users.push(user);
}
}
export interface IRepository<T> {
find(id: string): T | undefined;
save(item: T): void;
}
export type UserId = string;
export enum UserRole {
Admin = 'admin',
User = 'user',
}
export function createUser(name: string): User {
return { name };
}
export const fetchUser = async (id: string) => {
return null;
};
export default class DefaultExport {}
namespace MyNamespace {
export function innerFn() {}
}
"#;
let mut parser = Parser::new();
let mut class_map = HashMap::new();
let (nodes, edges, _) = extract_typescript_tree_sitter("test.ts", content, &mut parser, &mut class_map, "ts");
assert!(nodes.iter().any(|n| n.name == "UserService"), "Should find UserService class");
assert!(nodes.iter().any(|n| n.name == "DefaultExport"), "Should find default export class");
assert!(nodes.iter().any(|n| n.name == "getUsers"), "Should find getUsers method");
assert!(nodes.iter().any(|n| n.name == "addUser"), "Should find addUser method");
assert!(nodes.iter().any(|n| n.name == "IRepository"), "Should find interface");
assert!(nodes.iter().any(|n| n.name == "UserId"), "Should find type alias");
assert!(nodes.iter().any(|n| n.name == "UserRole"), "Should find enum");
assert!(nodes.iter().any(|n| n.name == "createUser"), "Should find function");
assert!(nodes.iter().any(|n| n.name == "fetchUser"), "Should find arrow function");
assert!(nodes.iter().any(|n| n.name == "MyNamespace"), "Should find namespace");
assert!(edges.iter().any(|e| e.relation == EdgeRelation::Imports), "Should have import edges");
}
}