use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use std::process::Command;
use super::analyzer::{
detect_available_analyzers, detect_language_from_path, AnalyzerKind, AnalyzerResult,
};
use crate::graph::CodeGraph;
#[derive(Debug, Clone)]
pub struct EnrichConfig {
pub analyzers: Option<Vec<AnalyzerKind>>,
pub files: Option<Vec<PathBuf>>,
pub timeout_secs: u64,
}
impl Default for EnrichConfig {
fn default() -> Self {
Self {
analyzers: None,
files: None,
timeout_secs: 30,
}
}
}
#[derive(Debug, Clone)]
pub struct EnrichResult {
pub files_processed: usize,
pub symbols_enriched: usize,
pub errors: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LspSignature {
pub name: String,
pub signature: String,
pub return_type: Option<String>,
pub parameters: Vec<String>,
pub documentation: Option<String>,
}
pub fn enrich_symbols(graph: &mut CodeGraph, config: &EnrichConfig) -> Result<EnrichResult> {
let mut result = EnrichResult {
files_processed: 0,
symbols_enriched: 0,
errors: 0,
};
let available_analyzers = detect_available_analyzers();
if available_analyzers.is_empty() {
eprintln!("No LSP analyzers found (rust-analyzer, javac)");
eprintln!("Install rust-analyzer: rustup component add rust-analyzer");
eprintln!("Install Java JDK for Java projects");
return Ok(result);
}
eprintln!("Found {} analyzer(s):", available_analyzers.len());
for analyzer in &available_analyzers {
eprintln!(" - {}", analyzer.binary_name());
}
eprintln!();
let files = graph.all_file_nodes()?;
for (file_path_str, _file_node) in files {
let file_path = Path::new(&file_path_str);
if let Some(ref files_filter) = config.files {
if !files_filter.contains(&file_path.to_path_buf()) {
continue;
}
}
let language = match detect_language_from_path(file_path) {
Some(lang) => lang,
None => continue, };
let analyzer_kind = match super::analyzer::get_analyzer_for_language(language) {
Some(kind) if available_analyzers.contains(&kind) => kind,
_ => continue, };
eprintln!(
"Enriching {:?} with {}",
file_path,
analyzer_kind.binary_name()
);
let symbols = match graph.symbols_in_file(&file_path_str) {
Ok(s) => s,
Err(e) => {
eprintln!(" Error getting symbols: {}", e);
result.errors += 1;
continue;
}
};
if symbols.is_empty() {
continue;
}
let workspace_root = file_path.parent().unwrap_or(Path::new("."));
let signatures = match analyzer_kind {
AnalyzerKind::RustAnalyzer => parse_rust_analyzer_json(file_path, workspace_root)?,
AnalyzerKind::Clangd => parse_clangd_json(file_path, workspace_root)?,
AnalyzerKind::JDTLS => parse_javac_output(file_path, workspace_root)?,
};
let enriched_count = match_signatures_to_symbols(&symbols, &signatures)?;
result.files_processed += 1;
result.symbols_enriched += enriched_count;
eprintln!(" Enriched {} symbols", enriched_count);
}
eprintln!();
eprintln!("Enrichment complete:");
eprintln!(" Files processed: {}", result.files_processed);
eprintln!(" Symbols enriched: {}", result.symbols_enriched);
eprintln!(" Errors: {}", result.errors);
Ok(result)
}
fn parse_rust_analyzer_json(file_path: &Path, workspace: &Path) -> Result<Vec<LspSignature>> {
let mut signatures = Vec::new();
let output = Command::new("rust-analyzer")
.args(["analysis-stats", "--load-output-dirs"])
.arg(file_path)
.current_dir(workspace)
.output()
.context("Failed to run rust-analyzer")?;
let stdout = String::from_utf8_lossy(&output.stdout);
for line in stdout.lines() {
if line.trim().is_empty() {
continue;
}
if let Ok(json_value) = serde_json::from_str::<serde_json::Value>(line) {
if let Some(signature) = extract_signature_from_json(&json_value) {
signatures.push(signature);
}
}
}
Ok(signatures)
}
fn parse_clangd_json(file_path: &Path, workspace: &Path) -> Result<Vec<LspSignature>> {
let mut signatures = Vec::new();
let extension = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
let is_cpp = matches!(extension, "cpp" | "cc" | "cxx" | "hpp" | "h");
let output = Command::new("clangd-query")
.args([
std::ffi::OsStr::new("--dump-ast"),
std::ffi::OsStr::new("--include-refs"),
file_path.as_os_str(),
])
.current_dir(workspace)
.output();
match output {
Ok(output) => {
let stdout = String::from_utf8_lossy(&output.stdout);
for line in stdout.lines() {
if let Some(sig) = parse_clangd_line(line, is_cpp) {
signatures.push(sig);
}
}
}
Err(_) => {
let output = Command::new("clang")
.args([
std::ffi::OsStr::new("-Xclang"),
std::ffi::OsStr::new("-ast-dump=json"),
std::ffi::OsStr::new("-fsyntax-only"),
file_path.as_os_str(),
])
.current_dir(workspace)
.output()
.context("Failed to run clang AST dump")?;
let stdout = String::from_utf8_lossy(&output.stdout);
if let Ok(json_value) = serde_json::from_str::<serde_json::Value>(&stdout) {
extract_signatures_from_clang_ast(&json_value, &mut signatures);
}
}
}
Ok(signatures)
}
fn parse_clangd_line(line: &str, is_cpp: bool) -> Option<LspSignature> {
let line = line.trim();
if line.is_empty() {
return None;
}
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() < 3 {
return None;
}
let kind = parts[0];
let name = parts[1];
let type_info = parts[2..].join(" ");
match kind {
"Function" | "Method" | "Constructor" | "Destructor" => Some(LspSignature {
name: name.to_string(),
signature: if is_cpp {
format!("{} {}", kind, type_info)
} else {
format!("int {}({})", name, type_info)
},
return_type: Some(type_info.clone()),
parameters: vec![],
documentation: None,
}),
"Class" | "Struct" | "Enum" => Some(LspSignature {
name: name.to_string(),
signature: format!("{} {}", kind, name),
return_type: None,
parameters: vec![],
documentation: None,
}),
_ => None,
}
}
fn extract_signatures_from_clang_ast(json: &serde_json::Value, signatures: &mut Vec<LspSignature>) {
if let Some(obj) = json.as_object() {
if let Some(kind) = obj.get("kind").and_then(|v| v.as_str()) {
if matches!(
kind,
"FunctionDecl" | "CXXMethodDecl" | "Constructor" | "Destructor"
) {
if let Some(name) = obj.get("name").and_then(|v| v.as_str()) {
let return_type = obj
.get("type")
.and_then(|t| t.get("qualType"))
.and_then(|t| t.as_str())
.map(String::from);
let signature = format!("{} {}", kind, name);
signatures.push(LspSignature {
name: name.to_string(),
signature,
return_type,
parameters: vec![],
documentation: None,
});
}
}
}
if let Some(children) = obj.get("inner").and_then(|v| v.as_array()) {
for child in children {
extract_signatures_from_clang_ast(child, signatures);
}
}
}
}
fn parse_javac_output(file_path: &Path, workspace: &Path) -> Result<Vec<LspSignature>> {
let mut signatures = Vec::new();
let output = Command::new("javac")
.args([
std::ffi::OsStr::new("-Xprint"),
std::ffi::OsStr::new("-parameters"), file_path.as_os_str(),
])
.current_dir(workspace)
.output()
.context("Failed to run javac -Xprint")?;
let stdout = String::from_utf8_lossy(&output.stdout);
for line in stdout.lines() {
if let Some(sig) = parse_javac_line(line) {
signatures.push(sig);
}
}
Ok(signatures)
}
fn parse_javac_line(line: &str) -> Option<LspSignature> {
let line = line.trim();
if line.is_empty() || line.starts_with("//") {
return None;
}
if let Some(sig) = parse_java_type_decl(line) {
return Some(sig);
}
if let Some(sig) = parse_java_method_decl(line) {
return Some(sig);
}
None
}
fn parse_java_type_decl(line: &str) -> Option<LspSignature> {
let kind = if line.contains(" class ") {
"class"
} else if line.contains(" interface ") {
"interface"
} else if line.contains(" enum ") {
"enum"
} else {
return None;
};
let parts: Vec<&str> = line.split_whitespace().collect();
let name_idx = parts.iter().position(|&p| p == kind)? + 1;
let name = parts.get(name_idx)?.split('<').next()?;
Some(LspSignature {
name: name.to_string(),
signature: line.split('{').next()?.trim().to_string(),
return_type: None,
parameters: vec![],
documentation: None,
})
}
fn parse_java_method_decl(line: &str) -> Option<LspSignature> {
if !line.contains('(') || !line.contains(')') {
return None;
}
if line.contains(" class ") || line.contains(" interface ") || line.contains(" enum ") {
return None;
}
let paren_start = line.find('(')?;
let paren_end = line.find(')')?;
let before_parens = &line[..paren_start];
let params_str = &line[paren_start + 1..paren_end];
let name = before_parens.split_whitespace().last()?;
let return_type: Option<String> = before_parens
.split_whitespace()
.rev()
.nth(1)
.map(String::from);
let parameters = parse_java_parameters(params_str);
let signature = line.split('{').next()?.trim().to_string();
Some(LspSignature {
name: name.to_string(),
signature,
return_type,
parameters,
documentation: None,
})
}
fn parse_java_parameters(params_str: &str) -> Vec<String> {
if params_str.trim().is_empty() {
return vec![];
}
params_str
.split(',')
.map(|p| {
let p = p.trim();
p.split_whitespace().last().unwrap_or(p).to_string()
})
.collect()
}
fn extract_signature_from_json(json: &serde_json::Value) -> Option<LspSignature> {
let name = json.get("name")?.as_str()?.to_string();
let signature = json
.get("signature")
.or_else(|| json.get("display_name"))?
.as_str()?
.to_string();
let return_type = json
.get("return_type")
.and_then(|v| v.as_str())
.map(String::from);
let parameters = json
.get("parameters")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
let documentation = json
.get("documentation")
.or_else(|| json.get("docs"))
.and_then(|v| v.as_str())
.map(String::from);
Some(LspSignature {
name,
signature,
return_type,
parameters,
documentation,
})
}
fn match_signatures_to_symbols(
symbols: &[crate::ingest::SymbolFact],
signatures: &[LspSignature],
) -> Result<usize> {
let mut matched = 0;
for symbol in symbols {
if let Some(ref name) = symbol.name {
if let Some(sig) = signatures.iter().find(|s| s.name == *name) {
eprintln!(" Matched '{}': {}", name, sig.signature);
matched += 1;
}
}
}
Ok(matched)
}
pub fn run_enrich(db_path: &Path) -> Result<EnrichResult> {
let mut graph = CodeGraph::open(db_path)?;
let config = EnrichConfig::default();
enrich_symbols(&mut graph, &config)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_signature_from_json() {
let json = serde_json::json!({
"name": "main",
"signature": "fn main() -> Result<(), Error>",
"return_type": "Result<(), Error>",
"parameters": [],
"documentation": "Main entry point"
});
let sig = extract_signature_from_json(&json);
assert!(sig.is_some());
let sig = sig.unwrap();
assert_eq!(sig.name, "main");
assert_eq!(sig.signature, "fn main() -> Result<(), Error>");
assert_eq!(sig.return_type, Some("Result<(), Error>".to_string()));
}
#[test]
fn test_parse_clangd_line() {
let sig = parse_clangd_line("Function main int()", false);
assert!(sig.is_some());
let sig = sig.unwrap();
assert_eq!(sig.name, "main");
assert!(sig.signature.contains("main"));
}
#[test]
fn test_parse_javac_class() {
let sig = parse_java_type_decl("public class MyClass {");
assert!(sig.is_some());
let sig = sig.unwrap();
assert_eq!(sig.name, "MyClass");
assert_eq!(sig.signature, "public class MyClass");
}
#[test]
fn test_parse_javac_method() {
let sig = parse_java_method_decl("public void myMethod(int x, String y) {");
assert!(sig.is_some());
let sig = sig.unwrap();
assert_eq!(sig.name, "myMethod");
assert_eq!(sig.return_type, Some("void".to_string()));
assert_eq!(sig.parameters, vec!["x", "y"]);
}
#[test]
fn test_parse_java_parameters() {
let params = parse_java_parameters("int x, String y, boolean z");
assert_eq!(params, vec!["x", "y", "z"]);
}
#[test]
fn test_enrich_config_default() {
let config = EnrichConfig::default();
assert!(config.analyzers.is_none());
assert!(config.files.is_none());
assert_eq!(config.timeout_secs, 30);
}
}