codemem_engine/index/scip/
orchestrate.rs1use std::path::{Path, PathBuf};
5use std::process::Command;
6
7use codemem_core::{CodememError, ScipConfig};
8
9use super::{parse_scip_bytes, ScipReadResult};
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
13pub enum ScipLanguage {
14 Rust,
15 TypeScript,
16 Python,
17 Java,
18 Go,
19 CSharp,
20 Ruby,
21 Php,
22 Dart,
23}
24
25impl ScipLanguage {
26 fn indexer_binary(&self) -> &'static str {
28 match self {
29 Self::Rust => "rust-analyzer",
30 Self::TypeScript => "scip-typescript",
31 Self::Python => "scip-python",
32 Self::Java => "scip-java",
33 Self::Go => "scip-go",
34 Self::CSharp => "scip-dotnet",
35 Self::Ruby => "scip-ruby",
36 Self::Php => "scip-php",
37 Self::Dart => "scip-dart",
38 }
39 }
40
41 fn default_args(&self) -> Vec<&'static str> {
43 match self {
44 Self::Rust => vec!["scip", "."],
45 Self::TypeScript => vec!["index"],
46 Self::Python => vec!["index", "."],
47 Self::Java => vec!["index"],
48 Self::Go => vec![],
49 Self::CSharp => vec!["index"],
50 Self::Ruby => vec![],
51 Self::Php => vec!["index"],
52 Self::Dart => vec![],
53 }
54 }
55
56 fn default_output_file(&self) -> &'static str {
58 "index.scip"
60 }
61
62 fn name(&self) -> &'static str {
63 match self {
64 Self::Rust => "rust",
65 Self::TypeScript => "typescript",
66 Self::Python => "python",
67 Self::Java => "java",
68 Self::Go => "go",
69 Self::CSharp => "csharp",
70 Self::Ruby => "ruby",
71 Self::Php => "php",
72 Self::Dart => "dart",
73 }
74 }
75}
76
77const MANIFEST_LANGUAGES: &[(&str, ScipLanguage)] = &[
79 ("Cargo.toml", ScipLanguage::Rust),
80 ("package.json", ScipLanguage::TypeScript),
81 ("tsconfig.json", ScipLanguage::TypeScript),
82 ("pyproject.toml", ScipLanguage::Python),
83 ("setup.py", ScipLanguage::Python),
84 ("setup.cfg", ScipLanguage::Python),
85 ("go.mod", ScipLanguage::Go),
86 ("pom.xml", ScipLanguage::Java),
87 ("build.gradle", ScipLanguage::Java),
88 ("build.gradle.kts", ScipLanguage::Java),
89 ("pubspec.yaml", ScipLanguage::Dart),
90 ("Gemfile", ScipLanguage::Ruby),
91 ("composer.json", ScipLanguage::Php),
92];
93
94#[derive(Debug)]
96pub struct OrchestrationResult {
97 pub scip_result: ScipReadResult,
99 pub indexed_languages: Vec<ScipLanguage>,
101 pub failed_languages: Vec<(ScipLanguage, String)>,
103}
104
105impl OrchestrationResult {
106 fn empty(project_root: &Path) -> Self {
108 Self {
109 scip_result: ScipReadResult {
110 project_root: project_root.to_string_lossy().to_string(),
111 definitions: Vec::new(),
112 references: Vec::new(),
113 externals: Vec::new(),
114 covered_files: Vec::new(),
115 },
116 indexed_languages: Vec::new(),
117 failed_languages: Vec::new(),
118 }
119 }
120}
121
122pub struct ScipOrchestrator {
124 config: ScipConfig,
125}
126
127impl ScipOrchestrator {
128 pub fn new(config: ScipConfig) -> Self {
129 Self { config }
130 }
131
132 pub fn run(
134 &self,
135 project_root: &Path,
136 namespace: &str,
137 ) -> Result<OrchestrationResult, CodememError> {
138 let detected_languages = self.detect_languages(project_root);
140 if detected_languages.is_empty() {
141 return Ok(OrchestrationResult::empty(project_root));
142 }
143
144 let available = self.detect_available_indexers(&detected_languages);
146 if available.is_empty() {
147 tracing::info!("No SCIP indexers found on PATH for detected languages");
148 return Ok(OrchestrationResult::empty(project_root));
149 }
150
151 let mut indexed_languages = Vec::new();
153 let mut failed_languages = Vec::new();
154 let mut scip_files: Vec<PathBuf> = Vec::new();
155
156 let temp_dir = tempfile::tempdir().map_err(|e| {
157 CodememError::ScipOrchestration(format!("Failed to create temp dir: {e}"))
158 })?;
159
160 let cache_dir = if self.config.cache_index {
162 scip_cache_dir(namespace)
163 } else {
164 None
165 };
166
167 for lang in &available {
168 if let Some(ref cache) = cache_dir {
170 if let Some(status) = check_cache(cache, *lang, self.config.cache_ttl_hours) {
171 if status.valid {
172 tracing::info!(
173 "Using cached SCIP index for {} ({})",
174 lang.name(),
175 status.path.display()
176 );
177 scip_files.push(status.path);
178 indexed_languages.push(*lang);
179 continue;
180 }
181 }
182 }
183
184 let output_path = temp_dir.path().join(format!("index-{}.scip", lang.name()));
185
186 match self.run_indexer(*lang, project_root, &output_path, namespace) {
187 Ok(()) => {
188 let scip_path = if output_path.exists() {
190 output_path
191 } else {
192 let default_path = project_root.join(lang.default_output_file());
193 if default_path.exists() {
194 default_path
195 } else {
196 failed_languages.push((
197 *lang,
198 "Indexer exited successfully but produced no .scip file"
199 .to_string(),
200 ));
201 continue;
202 }
203 };
204
205 if let Some(ref cache) = cache_dir {
207 save_to_cache(cache, *lang, &scip_path);
208 }
209
210 scip_files.push(scip_path);
211 indexed_languages.push(*lang);
212 }
213 Err(e) => {
214 tracing::warn!("SCIP indexer for {} failed: {}", lang.name(), e);
215 failed_languages.push((*lang, e.to_string()));
216 }
217 }
218 }
219
220 let scip_result = self.merge_scip_files(&scip_files, project_root)?;
222
223 Ok(OrchestrationResult {
224 scip_result,
225 indexed_languages,
226 failed_languages,
227 })
228 }
229
230 pub fn detect_languages(&self, project_root: &Path) -> Vec<ScipLanguage> {
232 let mut found = std::collections::HashSet::new();
233
234 let walker = ignore::WalkBuilder::new(project_root)
235 .hidden(true)
236 .git_ignore(true)
237 .git_global(true)
238 .git_exclude(true)
239 .max_depth(Some(3)) .build();
241
242 for entry in walker.flatten() {
243 if !entry.file_type().is_some_and(|ft| ft.is_file()) {
244 continue;
245 }
246 let file_name = entry
247 .path()
248 .file_name()
249 .and_then(|n| n.to_str())
250 .unwrap_or("");
251
252 for &(manifest, lang) in MANIFEST_LANGUAGES {
253 if file_name == manifest {
254 found.insert(lang);
255 }
256 }
257
258 if file_name.ends_with(".csproj") {
260 found.insert(ScipLanguage::CSharp);
261 }
262 }
263
264 found.into_iter().collect()
265 }
266
267 pub fn detect_available_indexers(&self, languages: &[ScipLanguage]) -> Vec<ScipLanguage> {
269 let mut available = Vec::new();
270
271 for &lang in languages {
272 if self.config_command_for(lang).is_some() {
274 available.push(lang);
275 continue;
276 }
277
278 if !self.config.auto_detect_indexers {
280 continue;
281 }
282 if which_binary(lang.indexer_binary()).is_some() {
283 available.push(lang);
284 }
285 }
286
287 available
288 }
289
290 fn run_indexer(
292 &self,
293 lang: ScipLanguage,
294 project_root: &Path,
295 output_path: &Path,
296 namespace: &str,
297 ) -> Result<(), CodememError> {
298 let (program, args) = if let Some(cmd) = self.config_command_for(lang) {
299 let expanded = cmd.replace("{namespace}", namespace);
301 parse_shell_command(&expanded)?
302 } else {
303 (
304 lang.indexer_binary().to_string(),
305 lang.default_args().iter().map(|s| s.to_string()).collect(),
306 )
307 };
308
309 tracing::info!(
310 "Running SCIP indexer for {}: {} {:?}",
311 lang.name(),
312 program,
313 args
314 );
315
316 let output = Command::new(&program)
317 .args(&args)
318 .current_dir(project_root)
319 .output()
320 .map_err(|e| {
321 CodememError::ScipOrchestration(format!("Failed to spawn {program}: {e}"))
322 })?;
323
324 if !output.status.success() {
325 let stderr = String::from_utf8_lossy(&output.stderr);
326 return Err(CodememError::ScipOrchestration(format!(
327 "{} exited with {}: {}",
328 program,
329 output.status,
330 stderr.trim()
331 )));
332 }
333
334 if !output_path.exists() {
337 let default_output = project_root.join(lang.default_output_file());
338 if default_output.exists() {
339 std::fs::rename(&default_output, output_path).map_err(|e| {
340 CodememError::ScipOrchestration(format!(
341 "Failed to move {}: {e}",
342 default_output.display()
343 ))
344 })?;
345 }
346 }
347
348 Ok(())
349 }
350
351 fn config_command_for(&self, lang: ScipLanguage) -> Option<&String> {
353 let cmd = match lang {
354 ScipLanguage::Rust => &self.config.indexers.rust,
355 ScipLanguage::TypeScript => &self.config.indexers.typescript,
356 ScipLanguage::Python => &self.config.indexers.python,
357 ScipLanguage::Java => &self.config.indexers.java,
358 ScipLanguage::Go => &self.config.indexers.go,
359 ScipLanguage::CSharp | ScipLanguage::Ruby | ScipLanguage::Php | ScipLanguage::Dart => {
361 return None;
362 }
363 };
364 if cmd.is_empty() {
365 None
366 } else {
367 Some(cmd)
368 }
369 }
370
371 fn merge_scip_files(
373 &self,
374 paths: &[PathBuf],
375 project_root: &Path,
376 ) -> Result<ScipReadResult, CodememError> {
377 let mut merged = ScipReadResult {
378 project_root: project_root.to_string_lossy().to_string(),
379 definitions: Vec::new(),
380 references: Vec::new(),
381 externals: Vec::new(),
382 covered_files: Vec::new(),
383 };
384
385 for path in paths {
386 let bytes = std::fs::read(path).map_err(|e| {
387 CodememError::ScipOrchestration(format!("Failed to read {}: {e}", path.display()))
388 })?;
389 let result = parse_scip_bytes(&bytes).map_err(CodememError::ScipOrchestration)?;
390 merged.definitions.extend(result.definitions);
391 merged.references.extend(result.references);
392 merged.externals.extend(result.externals);
393 merged.covered_files.extend(result.covered_files);
394 }
395
396 merged.covered_files.sort();
398 merged.covered_files.dedup();
399
400 Ok(merged)
401 }
402}
403
404fn which_binary(name: &str) -> Option<PathBuf> {
406 which::which(name).ok()
407}
408
409fn parse_shell_command(cmd: &str) -> Result<(String, Vec<String>), CodememError> {
413 let parts: Vec<&str> = cmd.split_whitespace().collect();
414 if parts.is_empty() {
415 return Err(CodememError::ScipOrchestration(
416 "Empty command string".to_string(),
417 ));
418 }
419 let program = parts[0].to_string();
420 let args = parts[1..].iter().map(|s| s.to_string()).collect();
421 Ok((program, args))
422}
423
424pub struct CacheStatus {
426 pub path: PathBuf,
428 pub valid: bool,
430}
431
432fn scip_cache_dir(namespace: &str) -> Option<PathBuf> {
435 let home = dirs::home_dir()?;
436 let dir = home.join(".codemem").join("scip-cache").join(namespace);
437 std::fs::create_dir_all(&dir).ok()?;
438 Some(dir)
439}
440
441pub fn check_cache(cache_dir: &Path, lang: ScipLanguage, ttl_hours: u64) -> Option<CacheStatus> {
443 let cache_path = cache_dir.join(format!("index-{}.scip", lang.name()));
444 if !cache_path.exists() {
445 return None;
446 }
447
448 let metadata = std::fs::metadata(&cache_path).ok()?;
449 let modified = metadata.modified().ok()?;
450 let age = modified.elapsed().ok()?;
451 let valid = age.as_secs() < ttl_hours * 3600;
452
453 Some(CacheStatus {
454 path: cache_path,
455 valid,
456 })
457}
458
459fn save_to_cache(cache_dir: &Path, lang: ScipLanguage, source_path: &Path) {
461 let cache_path = cache_dir.join(format!("index-{}.scip", lang.name()));
462 if let Err(e) = std::fs::copy(source_path, &cache_path) {
463 tracing::warn!("Failed to cache SCIP index for {}: {e}", lang.name());
464 }
465}
466
467#[cfg(test)]
468#[path = "../tests/scip_orchestrate_tests.rs"]
469mod tests;