Skip to main content

codemem_engine/index/scip/
orchestrate.rs

1//! SCIP indexer orchestration: auto-detect project languages and available SCIP indexers,
2//! run them, and merge the resulting `.scip` files.
3
4use std::path::{Path, PathBuf};
5use std::process::Command;
6
7use codemem_core::{CodememError, ScipConfig};
8
9use super::{parse_scip_bytes, ScipReadResult};
10
11/// Language detected from a manifest file.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
13pub enum ScipLanguage {
14    Rust,
15    TypeScript,
16    Python,
17    Java,
18    Go,
19    CSharp,
20    Ruby,
21    Php,
22    Dart,
23}
24
25impl ScipLanguage {
26    /// The binary name to search for on PATH.
27    fn indexer_binary(&self) -> &'static str {
28        match self {
29            Self::Rust => "rust-analyzer",
30            Self::TypeScript => "scip-typescript",
31            Self::Python => "scip-python",
32            Self::Java => "scip-java",
33            Self::Go => "scip-go",
34            Self::CSharp => "scip-dotnet",
35            Self::Ruby => "scip-ruby",
36            Self::Php => "scip-php",
37            Self::Dart => "scip-dart",
38        }
39    }
40
41    /// Default arguments for the indexer when no config override is provided.
42    fn default_args(&self) -> Vec<&'static str> {
43        match self {
44            Self::Rust => vec!["scip", "."],
45            Self::TypeScript => vec!["index"],
46            Self::Python => vec!["index", "."],
47            Self::Java => vec!["index"],
48            Self::Go => vec![],
49            Self::CSharp => vec!["index"],
50            Self::Ruby => vec![],
51            Self::Php => vec!["index"],
52            Self::Dart => vec![],
53        }
54    }
55
56    /// Default output filename for this language's indexer.
57    fn default_output_file(&self) -> &'static str {
58        // All SCIP indexers write to the same default filename.
59        "index.scip"
60    }
61
62    fn name(&self) -> &'static str {
63        match self {
64            Self::Rust => "rust",
65            Self::TypeScript => "typescript",
66            Self::Python => "python",
67            Self::Java => "java",
68            Self::Go => "go",
69            Self::CSharp => "csharp",
70            Self::Ruby => "ruby",
71            Self::Php => "php",
72            Self::Dart => "dart",
73        }
74    }
75}
76
77/// Manifest file patterns that indicate a project language.
78const MANIFEST_LANGUAGES: &[(&str, ScipLanguage)] = &[
79    ("Cargo.toml", ScipLanguage::Rust),
80    ("package.json", ScipLanguage::TypeScript),
81    ("tsconfig.json", ScipLanguage::TypeScript),
82    ("pyproject.toml", ScipLanguage::Python),
83    ("setup.py", ScipLanguage::Python),
84    ("setup.cfg", ScipLanguage::Python),
85    ("go.mod", ScipLanguage::Go),
86    ("pom.xml", ScipLanguage::Java),
87    ("build.gradle", ScipLanguage::Java),
88    ("build.gradle.kts", ScipLanguage::Java),
89    ("pubspec.yaml", ScipLanguage::Dart),
90    ("Gemfile", ScipLanguage::Ruby),
91    ("composer.json", ScipLanguage::Php),
92];
93
94/// Result of running SCIP indexers.
95#[derive(Debug)]
96pub struct OrchestrationResult {
97    /// The merged SCIP read result (definitions, references, externals, covered files).
98    pub scip_result: ScipReadResult,
99    /// Languages for which indexers ran successfully.
100    pub indexed_languages: Vec<ScipLanguage>,
101    /// Languages for which indexers were available but failed.
102    pub failed_languages: Vec<(ScipLanguage, String)>,
103}
104
105impl OrchestrationResult {
106    /// Create an empty result with no definitions, references, or indexed languages.
107    fn empty(project_root: &Path) -> Self {
108        Self {
109            scip_result: ScipReadResult {
110                project_root: project_root.to_string_lossy().to_string(),
111                definitions: Vec::new(),
112                references: Vec::new(),
113                externals: Vec::new(),
114                covered_files: Vec::new(),
115            },
116            indexed_languages: Vec::new(),
117            failed_languages: Vec::new(),
118        }
119    }
120}
121
122/// Orchestrates SCIP indexer detection and execution.
123pub struct ScipOrchestrator {
124    config: ScipConfig,
125}
126
127impl ScipOrchestrator {
128    pub fn new(config: ScipConfig) -> Self {
129        Self { config }
130    }
131
132    /// Run the full orchestration pipeline: detect → run → merge.
133    pub fn run(
134        &self,
135        project_root: &Path,
136        namespace: &str,
137    ) -> Result<OrchestrationResult, CodememError> {
138        // Phase 1: Detect languages from manifests.
139        let detected_languages = self.detect_languages(project_root);
140        if detected_languages.is_empty() {
141            return Ok(OrchestrationResult::empty(project_root));
142        }
143
144        // Phase 2: Determine which indexers are available.
145        let available = self.detect_available_indexers(&detected_languages);
146        if available.is_empty() {
147            tracing::info!("No SCIP indexers found on PATH for detected languages");
148            return Ok(OrchestrationResult::empty(project_root));
149        }
150
151        // Phase 3: Run indexers and collect .scip files.
152        let mut indexed_languages = Vec::new();
153        let mut failed_languages = Vec::new();
154        let mut scip_files: Vec<PathBuf> = Vec::new();
155
156        let temp_dir = tempfile::tempdir().map_err(|e| {
157            CodememError::ScipOrchestration(format!("Failed to create temp dir: {e}"))
158        })?;
159
160        // Resolve cache dir once (None if caching disabled or home dir unavailable).
161        let cache_dir = if self.config.cache_index {
162            scip_cache_dir(namespace)
163        } else {
164            None
165        };
166
167        for lang in &available {
168            // Check cache first if enabled.
169            if let Some(ref cache) = cache_dir {
170                if let Some(status) = check_cache(cache, *lang, self.config.cache_ttl_hours) {
171                    if status.valid {
172                        tracing::info!(
173                            "Using cached SCIP index for {} ({})",
174                            lang.name(),
175                            status.path.display()
176                        );
177                        scip_files.push(status.path);
178                        indexed_languages.push(*lang);
179                        continue;
180                    }
181                }
182            }
183
184            let output_path = temp_dir.path().join(format!("index-{}.scip", lang.name()));
185
186            match self.run_indexer(*lang, project_root, &output_path, namespace) {
187                Ok(()) => {
188                    // Find the actual .scip file (either at output_path or default location).
189                    let scip_path = if output_path.exists() {
190                        output_path
191                    } else {
192                        let default_path = project_root.join(lang.default_output_file());
193                        if default_path.exists() {
194                            default_path
195                        } else {
196                            failed_languages.push((
197                                *lang,
198                                "Indexer exited successfully but produced no .scip file"
199                                    .to_string(),
200                            ));
201                            continue;
202                        }
203                    };
204
205                    // Save to cache for future runs.
206                    if let Some(ref cache) = cache_dir {
207                        save_to_cache(cache, *lang, &scip_path);
208                    }
209
210                    scip_files.push(scip_path);
211                    indexed_languages.push(*lang);
212                }
213                Err(e) => {
214                    tracing::warn!("SCIP indexer for {} failed: {}", lang.name(), e);
215                    failed_languages.push((*lang, e.to_string()));
216                }
217            }
218        }
219
220        // Phase 4: Parse and merge all .scip files.
221        let scip_result = self.merge_scip_files(&scip_files, project_root)?;
222
223        Ok(OrchestrationResult {
224            scip_result,
225            indexed_languages,
226            failed_languages,
227        })
228    }
229
230    /// Detect which languages are used in the project by scanning for manifest files.
231    pub fn detect_languages(&self, project_root: &Path) -> Vec<ScipLanguage> {
232        let mut found = std::collections::HashSet::new();
233
234        let walker = ignore::WalkBuilder::new(project_root)
235            .hidden(true)
236            .git_ignore(true)
237            .git_global(true)
238            .git_exclude(true)
239            .max_depth(Some(3)) // Don't recurse too deep for manifest detection
240            .build();
241
242        for entry in walker.flatten() {
243            if !entry.file_type().is_some_and(|ft| ft.is_file()) {
244                continue;
245            }
246            let file_name = entry
247                .path()
248                .file_name()
249                .and_then(|n| n.to_str())
250                .unwrap_or("");
251
252            for &(manifest, lang) in MANIFEST_LANGUAGES {
253                if file_name == manifest {
254                    found.insert(lang);
255                }
256            }
257
258            // .csproj files by extension
259            if file_name.ends_with(".csproj") {
260                found.insert(ScipLanguage::CSharp);
261            }
262        }
263
264        found.into_iter().collect()
265    }
266
267    /// Check which indexers are available on PATH or configured with explicit commands.
268    pub fn detect_available_indexers(&self, languages: &[ScipLanguage]) -> Vec<ScipLanguage> {
269        let mut available = Vec::new();
270
271        for &lang in languages {
272            // Check if there's a config override for this language.
273            if self.config_command_for(lang).is_some() {
274                available.push(lang);
275                continue;
276            }
277
278            // Auto-detect from PATH.
279            if !self.config.auto_detect_indexers {
280                continue;
281            }
282            if which_binary(lang.indexer_binary()).is_some() {
283                available.push(lang);
284            }
285        }
286
287        available
288    }
289
290    /// Run a single SCIP indexer for the given language.
291    fn run_indexer(
292        &self,
293        lang: ScipLanguage,
294        project_root: &Path,
295        output_path: &Path,
296        namespace: &str,
297    ) -> Result<(), CodememError> {
298        let (program, args) = if let Some(cmd) = self.config_command_for(lang) {
299            // Parse the config override command, substituting {namespace}.
300            let expanded = cmd.replace("{namespace}", namespace);
301            parse_shell_command(&expanded)?
302        } else {
303            (
304                lang.indexer_binary().to_string(),
305                lang.default_args().iter().map(|s| s.to_string()).collect(),
306            )
307        };
308
309        tracing::info!(
310            "Running SCIP indexer for {}: {} {:?}",
311            lang.name(),
312            program,
313            args
314        );
315
316        let output = Command::new(&program)
317            .args(&args)
318            .current_dir(project_root)
319            .output()
320            .map_err(|e| {
321                CodememError::ScipOrchestration(format!("Failed to spawn {program}: {e}"))
322            })?;
323
324        if !output.status.success() {
325            let stderr = String::from_utf8_lossy(&output.stderr);
326            return Err(CodememError::ScipOrchestration(format!(
327                "{} exited with {}: {}",
328                program,
329                output.status,
330                stderr.trim()
331            )));
332        }
333
334        // Many indexers write to index.scip in the project root by default.
335        // If the output file doesn't exist yet, try to move the default output.
336        if !output_path.exists() {
337            let default_output = project_root.join(lang.default_output_file());
338            if default_output.exists() {
339                std::fs::rename(&default_output, output_path).map_err(|e| {
340                    CodememError::ScipOrchestration(format!(
341                        "Failed to move {}: {e}",
342                        default_output.display()
343                    ))
344                })?;
345            }
346        }
347
348        Ok(())
349    }
350
351    /// Get the config override command for a language, if any.
352    fn config_command_for(&self, lang: ScipLanguage) -> Option<&String> {
353        let cmd = match lang {
354            ScipLanguage::Rust => &self.config.indexers.rust,
355            ScipLanguage::TypeScript => &self.config.indexers.typescript,
356            ScipLanguage::Python => &self.config.indexers.python,
357            ScipLanguage::Java => &self.config.indexers.java,
358            ScipLanguage::Go => &self.config.indexers.go,
359            // These languages don't have config overrides in ScipIndexersConfig yet.
360            ScipLanguage::CSharp | ScipLanguage::Ruby | ScipLanguage::Php | ScipLanguage::Dart => {
361                return None;
362            }
363        };
364        if cmd.is_empty() {
365            None
366        } else {
367            Some(cmd)
368        }
369    }
370
371    /// Parse and merge multiple .scip files into a single ScipReadResult.
372    fn merge_scip_files(
373        &self,
374        paths: &[PathBuf],
375        project_root: &Path,
376    ) -> Result<ScipReadResult, CodememError> {
377        let mut merged = ScipReadResult {
378            project_root: project_root.to_string_lossy().to_string(),
379            definitions: Vec::new(),
380            references: Vec::new(),
381            externals: Vec::new(),
382            covered_files: Vec::new(),
383        };
384
385        for path in paths {
386            let bytes = std::fs::read(path).map_err(|e| {
387                CodememError::ScipOrchestration(format!("Failed to read {}: {e}", path.display()))
388            })?;
389            let result = parse_scip_bytes(&bytes).map_err(CodememError::ScipOrchestration)?;
390            merged.definitions.extend(result.definitions);
391            merged.references.extend(result.references);
392            merged.externals.extend(result.externals);
393            merged.covered_files.extend(result.covered_files);
394        }
395
396        // Dedup covered files (multiple indexers might cover overlapping files).
397        merged.covered_files.sort();
398        merged.covered_files.dedup();
399
400        Ok(merged)
401    }
402}
403
404/// Check if a binary is available on PATH.
405fn which_binary(name: &str) -> Option<PathBuf> {
406    which::which(name).ok()
407}
408
409/// Parse a shell command string into (program, args).
410///
411/// Simple whitespace splitting — does not handle quoted strings.
412fn parse_shell_command(cmd: &str) -> Result<(String, Vec<String>), CodememError> {
413    let parts: Vec<&str> = cmd.split_whitespace().collect();
414    if parts.is_empty() {
415        return Err(CodememError::ScipOrchestration(
416            "Empty command string".to_string(),
417        ));
418    }
419    let program = parts[0].to_string();
420    let args = parts[1..].iter().map(|s| s.to_string()).collect();
421    Ok((program, args))
422}
423
424/// Result of checking SCIP cache validity.
425pub struct CacheStatus {
426    /// Path to the cached .scip file.
427    pub path: PathBuf,
428    /// Whether the cache is still valid (within TTL).
429    pub valid: bool,
430}
431
432/// Resolve the SCIP cache directory for a given namespace.
433/// Returns `~/.codemem/scip-cache/{namespace}/`, creating it if needed.
434fn scip_cache_dir(namespace: &str) -> Option<PathBuf> {
435    let home = dirs::home_dir()?;
436    let dir = home.join(".codemem").join("scip-cache").join(namespace);
437    std::fs::create_dir_all(&dir).ok()?;
438    Some(dir)
439}
440
441/// Check if a cached SCIP index exists in `cache_dir` and is within the TTL.
442pub fn check_cache(cache_dir: &Path, lang: ScipLanguage, ttl_hours: u64) -> Option<CacheStatus> {
443    let cache_path = cache_dir.join(format!("index-{}.scip", lang.name()));
444    if !cache_path.exists() {
445        return None;
446    }
447
448    let metadata = std::fs::metadata(&cache_path).ok()?;
449    let modified = metadata.modified().ok()?;
450    let age = modified.elapsed().ok()?;
451    let valid = age.as_secs() < ttl_hours * 3600;
452
453    Some(CacheStatus {
454        path: cache_path,
455        valid,
456    })
457}
458
459/// Save a .scip file to the given cache directory for future runs.
460fn save_to_cache(cache_dir: &Path, lang: ScipLanguage, source_path: &Path) {
461    let cache_path = cache_dir.join(format!("index-{}.scip", lang.name()));
462    if let Err(e) = std::fs::copy(source_path, &cache_path) {
463        tracing::warn!("Failed to cache SCIP index for {}: {e}", lang.name());
464    }
465}
466
467#[cfg(test)]
468#[path = "../tests/scip_orchestrate_tests.rs"]
469mod tests;