use std::path::{Path, PathBuf};
use std::process::Command;
use codemem_core::{CodememError, ScipConfig};
use super::{parse_scip_bytes, ScipReadResult};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ScipLanguage {
Rust,
TypeScript,
Python,
Java,
Go,
CSharp,
Ruby,
Php,
Dart,
}
impl ScipLanguage {
fn indexer_binary(&self) -> &'static str {
match self {
Self::Rust => "rust-analyzer",
Self::TypeScript => "scip-typescript",
Self::Python => "scip-python",
Self::Java => "scip-java",
Self::Go => "scip-go",
Self::CSharp => "scip-dotnet",
Self::Ruby => "scip-ruby",
Self::Php => "scip-php",
Self::Dart => "scip-dart",
}
}
fn default_args(&self) -> Vec<&'static str> {
match self {
Self::Rust => vec!["scip", "."],
Self::TypeScript => vec!["index"],
Self::Python => vec!["index", "."],
Self::Java => vec!["index"],
Self::Go => vec![],
Self::CSharp => vec!["index"],
Self::Ruby => vec![],
Self::Php => vec!["index"],
Self::Dart => vec![],
}
}
fn default_output_file(&self) -> &'static str {
"index.scip"
}
fn name(&self) -> &'static str {
match self {
Self::Rust => "rust",
Self::TypeScript => "typescript",
Self::Python => "python",
Self::Java => "java",
Self::Go => "go",
Self::CSharp => "csharp",
Self::Ruby => "ruby",
Self::Php => "php",
Self::Dart => "dart",
}
}
}
const MANIFEST_LANGUAGES: &[(&str, ScipLanguage)] = &[
("Cargo.toml", ScipLanguage::Rust),
("package.json", ScipLanguage::TypeScript),
("tsconfig.json", ScipLanguage::TypeScript),
("pyproject.toml", ScipLanguage::Python),
("setup.py", ScipLanguage::Python),
("setup.cfg", ScipLanguage::Python),
("go.mod", ScipLanguage::Go),
("pom.xml", ScipLanguage::Java),
("build.gradle", ScipLanguage::Java),
("build.gradle.kts", ScipLanguage::Java),
("pubspec.yaml", ScipLanguage::Dart),
("Gemfile", ScipLanguage::Ruby),
("composer.json", ScipLanguage::Php),
];
#[derive(Debug)]
pub struct OrchestrationResult {
pub scip_result: ScipReadResult,
pub indexed_languages: Vec<ScipLanguage>,
pub failed_languages: Vec<(ScipLanguage, String)>,
}
impl OrchestrationResult {
fn empty(project_root: &Path) -> Self {
Self {
scip_result: ScipReadResult {
project_root: project_root.to_string_lossy().to_string(),
definitions: Vec::new(),
references: Vec::new(),
externals: Vec::new(),
covered_files: Vec::new(),
},
indexed_languages: Vec::new(),
failed_languages: Vec::new(),
}
}
}
pub struct ScipOrchestrator {
config: ScipConfig,
}
impl ScipOrchestrator {
pub fn new(config: ScipConfig) -> Self {
Self { config }
}
pub fn run(
&self,
project_root: &Path,
namespace: &str,
) -> Result<OrchestrationResult, CodememError> {
let detected_languages = self.detect_languages(project_root);
if detected_languages.is_empty() {
return Ok(OrchestrationResult::empty(project_root));
}
let available = self.detect_available_indexers(&detected_languages);
if available.is_empty() {
tracing::info!("No SCIP indexers found on PATH for detected languages");
return Ok(OrchestrationResult::empty(project_root));
}
let mut indexed_languages = Vec::new();
let mut failed_languages = Vec::new();
let mut scip_files: Vec<PathBuf> = Vec::new();
let temp_dir = tempfile::tempdir().map_err(|e| {
CodememError::ScipOrchestration(format!("Failed to create temp dir: {e}"))
})?;
let cache_dir = if self.config.cache_index {
scip_cache_dir(namespace)
} else {
None
};
for lang in &available {
if let Some(ref cache) = cache_dir {
if let Some(status) = check_cache(cache, *lang, self.config.cache_ttl_hours) {
if status.valid {
tracing::info!(
"Using cached SCIP index for {} ({})",
lang.name(),
status.path.display()
);
scip_files.push(status.path);
indexed_languages.push(*lang);
continue;
}
}
}
let output_path = temp_dir.path().join(format!("index-{}.scip", lang.name()));
match self.run_indexer(*lang, project_root, &output_path, namespace) {
Ok(()) => {
let scip_path = if output_path.exists() {
output_path
} else {
let default_path = project_root.join(lang.default_output_file());
if default_path.exists() {
default_path
} else {
failed_languages.push((
*lang,
"Indexer exited successfully but produced no .scip file"
.to_string(),
));
continue;
}
};
if let Some(ref cache) = cache_dir {
save_to_cache(cache, *lang, &scip_path);
}
scip_files.push(scip_path);
indexed_languages.push(*lang);
}
Err(e) => {
tracing::warn!("SCIP indexer for {} failed: {}", lang.name(), e);
failed_languages.push((*lang, e.to_string()));
}
}
}
let scip_result = self.merge_scip_files(&scip_files, project_root)?;
Ok(OrchestrationResult {
scip_result,
indexed_languages,
failed_languages,
})
}
pub fn detect_languages(&self, project_root: &Path) -> Vec<ScipLanguage> {
let mut found = std::collections::HashSet::new();
let walker = ignore::WalkBuilder::new(project_root)
.hidden(true)
.git_ignore(true)
.git_global(true)
.git_exclude(true)
.max_depth(Some(3)) .build();
for entry in walker.flatten() {
if !entry.file_type().is_some_and(|ft| ft.is_file()) {
continue;
}
let file_name = entry
.path()
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("");
for &(manifest, lang) in MANIFEST_LANGUAGES {
if file_name == manifest {
found.insert(lang);
}
}
if file_name.ends_with(".csproj") {
found.insert(ScipLanguage::CSharp);
}
}
found.into_iter().collect()
}
pub fn detect_available_indexers(&self, languages: &[ScipLanguage]) -> Vec<ScipLanguage> {
let mut available = Vec::new();
for &lang in languages {
if self.config_command_for(lang).is_some() {
available.push(lang);
continue;
}
if !self.config.auto_detect_indexers {
continue;
}
if which_binary(lang.indexer_binary()).is_some() {
available.push(lang);
}
}
available
}
fn run_indexer(
&self,
lang: ScipLanguage,
project_root: &Path,
output_path: &Path,
namespace: &str,
) -> Result<(), CodememError> {
let (program, args) = if let Some(cmd) = self.config_command_for(lang) {
let expanded = cmd.replace("{namespace}", namespace);
parse_shell_command(&expanded)?
} else {
let binary_name = lang.indexer_binary();
let resolved = which_binary(binary_name)
.map(|p| p.display().to_string())
.unwrap_or_else(|| binary_name.to_string());
(
resolved,
lang.default_args().iter().map(|s| s.to_string()).collect(),
)
};
tracing::info!(
"Running SCIP indexer for {}: {} {:?}",
lang.name(),
program,
args
);
let path_env = augmented_path();
let output = Command::new(&program)
.args(&args)
.current_dir(project_root)
.env("PATH", &path_env)
.output()
.map_err(|e| {
CodememError::ScipOrchestration(format!("Failed to spawn {program}: {e}"))
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(CodememError::ScipOrchestration(format!(
"{} exited with {}: {}",
program,
output.status,
stderr.trim()
)));
}
if !output_path.exists() {
let default_output = project_root.join(lang.default_output_file());
if default_output.exists() {
std::fs::rename(&default_output, output_path).map_err(|e| {
CodememError::ScipOrchestration(format!(
"Failed to move {}: {e}",
default_output.display()
))
})?;
}
}
Ok(())
}
fn config_command_for(&self, lang: ScipLanguage) -> Option<&String> {
let cmd = match lang {
ScipLanguage::Rust => &self.config.indexers.rust,
ScipLanguage::TypeScript => &self.config.indexers.typescript,
ScipLanguage::Python => &self.config.indexers.python,
ScipLanguage::Java => &self.config.indexers.java,
ScipLanguage::Go => &self.config.indexers.go,
ScipLanguage::CSharp | ScipLanguage::Ruby | ScipLanguage::Php | ScipLanguage::Dart => {
return None;
}
};
if cmd.is_empty() {
None
} else {
Some(cmd)
}
}
fn merge_scip_files(
&self,
paths: &[PathBuf],
project_root: &Path,
) -> Result<ScipReadResult, CodememError> {
let mut merged = ScipReadResult {
project_root: project_root.to_string_lossy().to_string(),
definitions: Vec::new(),
references: Vec::new(),
externals: Vec::new(),
covered_files: Vec::new(),
};
for path in paths {
let bytes = std::fs::read(path).map_err(|e| {
CodememError::ScipOrchestration(format!("Failed to read {}: {e}", path.display()))
})?;
let result = parse_scip_bytes(&bytes).map_err(CodememError::ScipOrchestration)?;
merged.definitions.extend(result.definitions);
merged.references.extend(result.references);
merged.externals.extend(result.externals);
merged.covered_files.extend(result.covered_files);
}
merged.covered_files.sort();
merged.covered_files.dedup();
Ok(merged)
}
}
fn which_binary(name: &str) -> Option<PathBuf> {
which::which(name).ok()
}
fn augmented_path() -> String {
let current = std::env::var("PATH").unwrap_or_default();
let home = dirs::home_dir().unwrap_or_else(|| PathBuf::from("/tmp"));
let extra_dirs = [
home.join(".cargo/bin"),
home.join(".local/bin"),
home.join(".nvm/current/bin"),
PathBuf::from("/usr/local/bin"),
PathBuf::from("/opt/homebrew/bin"),
];
let mut parts: Vec<String> = vec![current];
for dir in &extra_dirs {
if dir.is_dir() {
parts.push(dir.display().to_string());
}
}
parts.join(":")
}
fn parse_shell_command(cmd: &str) -> Result<(String, Vec<String>), CodememError> {
let parts: Vec<&str> = cmd.split_whitespace().collect();
if parts.is_empty() {
return Err(CodememError::ScipOrchestration(
"Empty command string".to_string(),
));
}
let program = parts[0].to_string();
let args = parts[1..].iter().map(|s| s.to_string()).collect();
Ok((program, args))
}
pub struct CacheStatus {
pub path: PathBuf,
pub valid: bool,
}
fn scip_cache_dir(namespace: &str) -> Option<PathBuf> {
let home = dirs::home_dir()?;
let dir = home.join(".codemem").join("scip-cache").join(namespace);
std::fs::create_dir_all(&dir).ok()?;
Some(dir)
}
pub fn check_cache(cache_dir: &Path, lang: ScipLanguage, ttl_hours: u64) -> Option<CacheStatus> {
let cache_path = cache_dir.join(format!("index-{}.scip", lang.name()));
if !cache_path.exists() {
return None;
}
let metadata = std::fs::metadata(&cache_path).ok()?;
let modified = metadata.modified().ok()?;
let age = modified.elapsed().ok()?;
let valid = age.as_secs() < ttl_hours * 3600;
Some(CacheStatus {
path: cache_path,
valid,
})
}
fn save_to_cache(cache_dir: &Path, lang: ScipLanguage, source_path: &Path) {
let cache_path = cache_dir.join(format!("index-{}.scip", lang.name()));
if let Err(e) = std::fs::copy(source_path, &cache_path) {
tracing::warn!("Failed to cache SCIP index for {}: {e}", lang.name());
}
}
#[cfg(test)]
#[path = "../tests/scip_orchestrate_tests.rs"]
mod tests;