use anyhow::{Context, Result};
use std::collections::HashMap;
use std::path::Path;
use tree_sitter::{Parser, Query, QueryCursor};
use crate::core::graph::{Edge, Symbol};
use crate::languages::LanguagePlugin;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum SupportedLanguage {
TypeScript,
CSharp,
Python,
Go,
Java,
Rust,
}
impl SupportedLanguage {
pub fn as_str(&self) -> &'static str {
match self {
Self::TypeScript => "typescript",
Self::CSharp => "csharp",
Self::Python => "python",
Self::Go => "go",
Self::Java => "java",
Self::Rust => "rust",
}
}
}
impl std::fmt::Display for SupportedLanguage {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let name = match self {
Self::TypeScript => "TypeScript",
Self::CSharp => "C#",
Self::Python => "Python",
Self::Go => "Go",
Self::Java => "Java",
Self::Rust => "Rust",
};
write!(f, "{name}")
}
}
struct PluginEntry {
plugin: Box<dyn LanguagePlugin>,
symbol_query: Query,
edge_query: Query,
}
pub struct CodeParser {
parser: Parser,
plugins: Vec<PluginEntry>,
}
impl CodeParser {
pub fn new() -> Result<Self> {
let parser = Parser::new();
let mut plugins = Vec::new();
let all_plugins: Vec<Box<dyn LanguagePlugin>> = vec![
Box::new(crate::languages::typescript::TypeScriptPlugin),
Box::new(crate::languages::csharp::CSharpPlugin),
Box::new(crate::languages::python::PythonPlugin),
Box::new(crate::languages::rust_lang::RustPlugin),
Box::new(crate::languages::go_lang::GoPlugin),
Box::new(crate::languages::java::JavaPlugin),
];
for plugin in all_plugins {
let ts_lang = plugin.ts_language();
let lang_name = plugin.language().to_string();
let symbol_query = Query::new(&ts_lang, plugin.symbol_query_source())
.with_context(|| format!("Failed to compile {lang_name} symbol query"))?;
let edge_query = Query::new(&ts_lang, plugin.edge_query_source())
.with_context(|| format!("Failed to compile {lang_name} edge query"))?;
plugins.push(PluginEntry {
plugin,
symbol_query,
edge_query,
});
}
Ok(Self { parser, plugins })
}
fn find_plugin(&self, lang: SupportedLanguage) -> Option<&PluginEntry> {
self.plugins.iter().find(|e| e.plugin.language() == lang)
}
pub fn detect_language(path: &Path) -> Result<SupportedLanguage> {
let ext = path
.extension()
.and_then(|e| e.to_str())
.ok_or_else(|| anyhow::anyhow!("No file extension: {}", path.display()))?;
match ext {
"ts" | "tsx" => Ok(SupportedLanguage::TypeScript),
"cs" => Ok(SupportedLanguage::CSharp),
"py" => Ok(SupportedLanguage::Python),
"go" => Ok(SupportedLanguage::Go),
"java" => Ok(SupportedLanguage::Java),
"rs" => Ok(SupportedLanguage::Rust),
other => anyhow::bail!("Unsupported file extension: .{other}"),
}
}
pub fn is_supported(&self, path: &Path) -> bool {
let ext = match path.extension().and_then(|e| e.to_str()) {
Some(e) => e,
None => return false,
};
self.plugins
.iter()
.any(|entry| entry.plugin.extensions().contains(&ext))
}
pub fn extract_symbols(
&mut self,
file_path: &str,
source: &str,
lang: SupportedLanguage,
) -> Result<Vec<Symbol>> {
let entry = self
.find_plugin(lang)
.ok_or_else(|| anyhow::anyhow!("Language {:?} not loaded", lang))?;
let ts_lang = entry.plugin.ts_language();
self.parser
.set_language(&ts_lang)
.context("Failed to set parser language")?;
let tree = self
.parser
.parse(source, None)
.ok_or_else(|| anyhow::anyhow!("Parse failed for {file_path}"))?;
let mut cursor = QueryCursor::new();
let entry = self
.find_plugin(lang)
.ok_or_else(|| anyhow::anyhow!("Language {:?} not loaded", lang))?;
let matches = cursor.matches(&entry.symbol_query, tree.root_node(), source.as_bytes());
let mut symbols = Vec::new();
let capture_names = entry.symbol_query.capture_names();
for m in matches {
let mut name_text: Option<String> = None;
let mut def_node = None;
let mut _params_text: Option<String> = None;
let mut _return_type_text: Option<String> = None;
for capture in m.captures {
let capture_name = &capture_names[capture.index as usize];
let text = capture
.node
.utf8_text(source.as_bytes())
.unwrap_or_default();
match &**capture_name {
"name" => name_text = Some(text.to_string()),
"definition" => def_node = Some(capture.node),
"params" => _params_text = Some(text.to_string()),
"return_type" => _return_type_text = Some(text.to_string()),
_ => {}
}
}
let Some(name) = name_text else { continue };
let Some(def) = def_node else { continue };
let kind = entry.plugin.infer_symbol_kind(def.kind()).to_string();
let line = def.start_position().row as u32 + 1;
let id = format!("{file_path}::{name}::{kind}::{line}");
let metadata = entry.plugin.extract_metadata(&def, source, &kind)?;
let signature = extract_signature(&def, source);
let docstring = entry.plugin.extract_docstring(&def, source);
let parent_id = if kind == "method" || kind == "property" || kind == "variant" {
find_parent_class(&def, source, file_path, entry.plugin.as_ref())
} else {
None
};
symbols.push(Symbol {
id,
name,
kind,
file_path: file_path.to_string(),
line_start: def.start_position().row as u32 + 1,
line_end: def.end_position().row as u32 + 1,
signature,
docstring,
parent_id,
language: lang.as_str().to_string(),
metadata,
});
}
if lang == SupportedLanguage::Rust {
associate_rust_impl_methods(&mut symbols, &tree, source, file_path);
}
Ok(symbols)
}
pub fn extract_edges(
&mut self,
file_path: &str,
source: &str,
lang: SupportedLanguage,
) -> Result<Vec<Edge>> {
let entry = self
.find_plugin(lang)
.ok_or_else(|| anyhow::anyhow!("Language {:?} not loaded", lang))?;
let ts_lang = entry.plugin.ts_language();
self.parser
.set_language(&ts_lang)
.context("Failed to set parser language")?;
let tree = self
.parser
.parse(source, None)
.ok_or_else(|| anyhow::anyhow!("Parse failed for {file_path}"))?;
let mut cursor = QueryCursor::new();
let entry = self
.find_plugin(lang)
.ok_or_else(|| anyhow::anyhow!("Language {:?} not loaded", lang))?;
let matches = cursor.matches(&entry.edge_query, tree.root_node(), source.as_bytes());
let mut edges = Vec::new();
let capture_names = entry.edge_query.capture_names();
for m in matches {
let pattern = m.pattern_index;
let mut captures_map: HashMap<String, (String, u32)> = HashMap::new();
let mut representative_node: Option<tree_sitter::Node> = None;
for capture in m.captures {
let capture_name = capture_names[capture.index as usize].to_string();
let text = capture
.node
.utf8_text(source.as_bytes())
.unwrap_or_default()
.to_string();
let line = capture.node.start_position().row as u32 + 1;
if representative_node.is_none() {
representative_node = Some(capture.node);
}
captures_map.insert(capture_name, (text, line));
}
let enclosing_scope_id = representative_node
.as_ref()
.and_then(|n| find_enclosing_scope(n, source, file_path, entry.plugin.as_ref()));
let extracted = entry.plugin.extract_edge(
pattern,
&captures_map,
file_path,
enclosing_scope_id.as_deref(),
);
edges.extend(extracted);
}
Ok(edges)
}
pub fn extract_rust_impl_trait_edges(
&mut self,
file_path: &str,
source: &str,
symbols: &[Symbol],
) -> Result<Vec<Edge>> {
let entry = self
.find_plugin(SupportedLanguage::Rust)
.ok_or_else(|| anyhow::anyhow!("Rust language not loaded"))?;
let ts_lang = entry.plugin.ts_language();
self.parser
.set_language(&ts_lang)
.context("Failed to set parser language")?;
let tree = self
.parser
.parse(source, None)
.ok_or_else(|| anyhow::anyhow!("Parse failed for {file_path}"))?;
Ok(extract_rust_trait_impl_edges(
symbols, &tree, source, file_path,
))
}
}
fn extract_signature(node: &tree_sitter::Node, source: &str) -> Option<String> {
let start = node.start_byte();
let end = node.end_byte();
let text = &source[start..end];
if node.kind() == "enum_variant" {
let sig = if let Some(nl_pos) = text.find('\n') {
let full = text.trim();
if full.contains('{') && full.contains('}') {
let collapsed: String =
full.lines().map(|l| l.trim()).collect::<Vec<_>>().join(" ");
return if collapsed.is_empty() {
None
} else {
Some(collapsed.trim_end_matches(',').trim().to_string())
};
}
text[..nl_pos].trim()
} else {
text.trim()
};
let sig = sig.trim_end_matches(',').trim();
return if sig.is_empty() {
None
} else {
Some(sig.to_string())
};
}
let sig = if let Some(brace_pos) = text.find('{') {
text[..brace_pos].trim()
} else if let Some(nl_pos) = text.find('\n') {
text[..nl_pos].trim()
} else {
text.trim()
};
if sig.is_empty() {
None
} else {
Some(sig.to_string())
}
}
fn find_enclosing_scope(
node: &tree_sitter::Node,
source: &str,
file_path: &str,
plugin: &dyn LanguagePlugin,
) -> Option<String> {
let mut current = node.parent();
let scope_types = plugin.scope_node_types();
while let Some(parent) = current {
if scope_types.contains(&parent.kind()) {
if parent.kind() == "arrow_function" || parent.kind() == "function_expression" {
if let Some(grandparent) = parent.parent() {
if grandparent.kind() == "variable_declarator" {
if let Some(name_node) = grandparent.child_by_field_name("name") {
if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
let line = grandparent.start_position().row as u32 + 1;
return Some(format!("{file_path}::{name}::function::{line}"));
}
}
}
}
current = parent.parent();
continue;
}
if let Some(name_node) = parent.child_by_field_name("name") {
if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
let mut kind = plugin.infer_symbol_kind(parent.kind());
if kind == "function"
&& plugin.language() == SupportedLanguage::Rust
&& parent.kind() == "function_item"
{
if let Some(grandparent) = parent.parent() {
if grandparent.kind() == "declaration_list" {
if let Some(great_grandparent) = grandparent.parent() {
if great_grandparent.kind() == "impl_item" {
kind = "method";
}
}
}
}
}
let line = parent.start_position().row as u32 + 1;
return Some(format!("{file_path}::{name}::{kind}::{line}"));
}
}
}
current = parent.parent();
}
None }
fn find_parent_class(
node: &tree_sitter::Node,
source: &str,
file_path: &str,
plugin: &dyn LanguagePlugin,
) -> Option<String> {
let class_body_nodes = plugin.class_body_node_types();
let class_decl_nodes = plugin.class_decl_node_types();
let mut current = node.parent();
while let Some(parent) = current {
if class_body_nodes.contains(&parent.kind()) {
if let Some(class_node) = parent.parent() {
if class_decl_nodes.contains(&class_node.kind()) {
if let Some(name_node) = class_node.child_by_field_name("name") {
let class_name = name_node.utf8_text(source.as_bytes()).ok()?;
let kind = plugin.infer_symbol_kind(class_node.kind());
let class_line = class_node.start_position().row as u32 + 1;
return Some(format!("{file_path}::{class_name}::{kind}::{class_line}"));
}
}
}
}
current = parent.parent();
}
None
}
fn associate_rust_impl_methods(
symbols: &mut [Symbol],
tree: &tree_sitter::Tree,
source: &str,
file_path: &str,
) {
let root = tree.root_node();
let mut tree_cursor = root.walk();
let mut impl_associations: Vec<(String, Vec<(String, u32)>)> = Vec::new();
for child in root.children(&mut tree_cursor) {
if child.kind() != "impl_item" {
continue;
}
let target_type_name = match extract_impl_target_type(&child, source) {
Some(name) => name,
None => continue,
};
let mut methods = Vec::new();
let mut impl_cursor = child.walk();
for impl_child in child.children(&mut impl_cursor) {
if impl_child.kind() == "declaration_list" {
let mut decl_cursor = impl_child.walk();
for decl_child in impl_child.children(&mut decl_cursor) {
if decl_child.kind() == "function_item" {
if let Some(name_node) = decl_child.child_by_field_name("name") {
if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
let line = decl_child.start_position().row as u32 + 1;
methods.push((name.to_string(), line));
}
}
}
}
}
}
if !methods.is_empty() {
impl_associations.push((target_type_name, methods));
}
}
for (target_type_name, methods) in &impl_associations {
let target_id = symbols
.iter()
.find(|s| {
s.file_path == file_path
&& s.name == *target_type_name
&& (s.kind == "struct" || s.kind == "enum" || s.kind == "interface")
})
.map(|s| s.id.clone());
let Some(target_id) = target_id else {
continue;
};
for (method_name, method_line) in methods {
if let Some(sym) = symbols.iter_mut().find(|s| {
s.file_path == file_path
&& s.name == *method_name
&& s.line_start == *method_line
&& s.kind == "function"
&& s.parent_id.is_none()
}) {
sym.parent_id = Some(target_id.clone());
sym.kind = "method".to_string();
sym.id = format!(
"{}::{}::method::{}",
sym.file_path, sym.name, sym.line_start
);
}
}
}
}
pub fn extract_rust_trait_impl_edges(
symbols: &[Symbol],
tree: &tree_sitter::Tree,
source: &str,
file_path: &str,
) -> Vec<Edge> {
let root = tree.root_node();
let mut tree_cursor = root.walk();
let mut edges = Vec::new();
for child in root.children(&mut tree_cursor) {
if child.kind() != "impl_item" {
continue;
}
let trait_node = match child.child_by_field_name("trait") {
Some(node) => node,
None => continue,
};
let trait_name = match extract_base_type_name(&trait_node, source) {
Some(name) => name,
None => continue,
};
let target_type_name = match extract_impl_target_type(&child, source) {
Some(name) => name,
None => continue,
};
let line = child.start_position().row as u32 + 1;
let from_id = symbols
.iter()
.find(|s| {
s.file_path == file_path
&& s.name == target_type_name
&& (s.kind == "struct" || s.kind == "enum" || s.kind == "interface")
})
.map(|s| s.id.clone())
.unwrap_or_else(|| {
format!("{file_path}::__module__::class")
});
edges.push(Edge {
from_id,
to_id: trait_name,
kind: "implements".to_string(),
file_path: file_path.to_string(),
line: Some(line),
});
}
edges
}
fn extract_impl_target_type(impl_node: &tree_sitter::Node, source: &str) -> Option<String> {
let type_node = impl_node.child_by_field_name("type")?;
extract_base_type_name(&type_node, source)
}
fn extract_base_type_name(type_node: &tree_sitter::Node, source: &str) -> Option<String> {
match type_node.kind() {
"type_identifier" => {
let text = type_node.utf8_text(source.as_bytes()).ok()?;
Some(text.to_string())
}
"generic_type" => {
let mut cursor = type_node.walk();
for child in type_node.children(&mut cursor) {
if child.kind() == "type_identifier" {
let text = child.utf8_text(source.as_bytes()).ok()?;
return Some(text.to_string());
}
}
None
}
"scoped_type_identifier" => {
let text = type_node.utf8_text(source.as_bytes()).ok()?;
text.rsplit("::").next().map(|s| s.to_string())
}
_ => {
let text = type_node.utf8_text(source.as_bytes()).ok()?;
Some(text.to_string())
}
}
}