Skip to main content

codemem_engine/index/scip/
orchestrate.rs

1//! SCIP indexer orchestration: auto-detect project languages and available SCIP indexers,
2//! run them, and merge the resulting `.scip` files.
3
4use std::path::{Path, PathBuf};
5use std::process::Command;
6
7use codemem_core::{CodememError, ScipConfig};
8
9use super::{parse_scip_bytes, ScipReadResult};
10
11/// Language detected from a manifest file.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
13pub enum ScipLanguage {
14    Rust,
15    TypeScript,
16    Python,
17    Java,
18    Go,
19    CSharp,
20    Ruby,
21    Php,
22    Dart,
23}
24
25impl ScipLanguage {
26    /// The binary name to search for on PATH.
27    fn indexer_binary(&self) -> &'static str {
28        match self {
29            Self::Rust => "rust-analyzer",
30            Self::TypeScript => "scip-typescript",
31            Self::Python => "scip-python",
32            Self::Java => "scip-java",
33            Self::Go => "scip-go",
34            Self::CSharp => "scip-dotnet",
35            Self::Ruby => "scip-ruby",
36            Self::Php => "scip-php",
37            Self::Dart => "scip-dart",
38        }
39    }
40
41    /// Default arguments for the indexer when no config override is provided.
42    fn default_args(&self) -> Vec<&'static str> {
43        match self {
44            Self::Rust => vec!["scip", "."],
45            Self::TypeScript => vec!["index"],
46            Self::Python => vec!["index", "."],
47            Self::Java => vec!["index"],
48            Self::Go => vec![],
49            Self::CSharp => vec!["index"],
50            Self::Ruby => vec![],
51            Self::Php => vec!["index"],
52            Self::Dart => vec![],
53        }
54    }
55
56    /// Default output filename for this language's indexer.
57    fn default_output_file(&self) -> &'static str {
58        // All SCIP indexers write to the same default filename.
59        "index.scip"
60    }
61
62    fn name(&self) -> &'static str {
63        match self {
64            Self::Rust => "rust",
65            Self::TypeScript => "typescript",
66            Self::Python => "python",
67            Self::Java => "java",
68            Self::Go => "go",
69            Self::CSharp => "csharp",
70            Self::Ruby => "ruby",
71            Self::Php => "php",
72            Self::Dart => "dart",
73        }
74    }
75}
76
77/// Manifest file patterns that indicate a project language.
78const MANIFEST_LANGUAGES: &[(&str, ScipLanguage)] = &[
79    ("Cargo.toml", ScipLanguage::Rust),
80    ("package.json", ScipLanguage::TypeScript),
81    ("tsconfig.json", ScipLanguage::TypeScript),
82    ("pyproject.toml", ScipLanguage::Python),
83    ("setup.py", ScipLanguage::Python),
84    ("setup.cfg", ScipLanguage::Python),
85    ("go.mod", ScipLanguage::Go),
86    ("pom.xml", ScipLanguage::Java),
87    ("build.gradle", ScipLanguage::Java),
88    ("build.gradle.kts", ScipLanguage::Java),
89    ("pubspec.yaml", ScipLanguage::Dart),
90    ("Gemfile", ScipLanguage::Ruby),
91    ("composer.json", ScipLanguage::Php),
92];
93
94/// Result of running SCIP indexers.
95#[derive(Debug)]
96pub struct OrchestrationResult {
97    /// The merged SCIP read result (definitions, references, externals, covered files).
98    pub scip_result: ScipReadResult,
99    /// Languages for which indexers ran successfully.
100    pub indexed_languages: Vec<ScipLanguage>,
101    /// Languages for which indexers were available but failed.
102    pub failed_languages: Vec<(ScipLanguage, String)>,
103}
104
105impl OrchestrationResult {
106    /// Create an empty result with no definitions, references, or indexed languages.
107    fn empty(project_root: &Path) -> Self {
108        Self {
109            scip_result: ScipReadResult {
110                project_root: project_root.to_string_lossy().to_string(),
111                definitions: Vec::new(),
112                references: Vec::new(),
113                externals: Vec::new(),
114                covered_files: Vec::new(),
115            },
116            indexed_languages: Vec::new(),
117            failed_languages: Vec::new(),
118        }
119    }
120}
121
122/// Orchestrates SCIP indexer detection and execution.
123pub struct ScipOrchestrator {
124    config: ScipConfig,
125}
126
127impl ScipOrchestrator {
128    pub fn new(config: ScipConfig) -> Self {
129        Self { config }
130    }
131
132    /// Run the full orchestration pipeline: detect → run → merge.
133    pub fn run(
134        &self,
135        project_root: &Path,
136        namespace: &str,
137    ) -> Result<OrchestrationResult, CodememError> {
138        // Phase 1: Detect languages from manifests.
139        let detected_languages = self.detect_languages(project_root);
140        if detected_languages.is_empty() {
141            return Ok(OrchestrationResult::empty(project_root));
142        }
143
144        // Phase 2: Determine which indexers are available.
145        let available = self.detect_available_indexers(&detected_languages);
146        if available.is_empty() {
147            tracing::info!("No SCIP indexers found on PATH for detected languages");
148            return Ok(OrchestrationResult::empty(project_root));
149        }
150
151        // Phase 3: Run indexers and collect .scip files.
152        let mut indexed_languages = Vec::new();
153        let mut failed_languages = Vec::new();
154        let mut scip_files: Vec<PathBuf> = Vec::new();
155
156        let temp_dir = tempfile::tempdir().map_err(|e| {
157            CodememError::ScipOrchestration(format!("Failed to create temp dir: {e}"))
158        })?;
159
160        // Resolve cache dir once (None if caching disabled or home dir unavailable).
161        let cache_dir = if self.config.cache_index {
162            scip_cache_dir(namespace)
163        } else {
164            None
165        };
166
167        for lang in &available {
168            // Check cache first if enabled.
169            if let Some(ref cache) = cache_dir {
170                if let Some(status) = check_cache(cache, *lang, self.config.cache_ttl_hours) {
171                    if status.valid {
172                        tracing::info!(
173                            "Using cached SCIP index for {} ({})",
174                            lang.name(),
175                            status.path.display()
176                        );
177                        scip_files.push(status.path);
178                        indexed_languages.push(*lang);
179                        continue;
180                    }
181                }
182            }
183
184            let output_path = temp_dir.path().join(format!("index-{}.scip", lang.name()));
185
186            match self.run_indexer(*lang, project_root, &output_path, namespace) {
187                Ok(()) => {
188                    // Find the actual .scip file (either at output_path or default location).
189                    let scip_path = if output_path.exists() {
190                        output_path
191                    } else {
192                        let default_path = project_root.join(lang.default_output_file());
193                        if default_path.exists() {
194                            default_path
195                        } else {
196                            failed_languages.push((
197                                *lang,
198                                "Indexer exited successfully but produced no .scip file"
199                                    .to_string(),
200                            ));
201                            continue;
202                        }
203                    };
204
205                    // Save to cache for future runs.
206                    if let Some(ref cache) = cache_dir {
207                        save_to_cache(cache, *lang, &scip_path);
208                    }
209
210                    scip_files.push(scip_path);
211                    indexed_languages.push(*lang);
212                }
213                Err(e) => {
214                    tracing::warn!("SCIP indexer for {} failed: {}", lang.name(), e);
215                    failed_languages.push((*lang, e.to_string()));
216                }
217            }
218        }
219
220        // Phase 4: Parse and merge all .scip files.
221        let scip_result = self.merge_scip_files(&scip_files, project_root)?;
222
223        Ok(OrchestrationResult {
224            scip_result,
225            indexed_languages,
226            failed_languages,
227        })
228    }
229
230    /// Detect which languages are used in the project by scanning for manifest files.
231    pub fn detect_languages(&self, project_root: &Path) -> Vec<ScipLanguage> {
232        let mut found = std::collections::HashSet::new();
233
234        let walker = ignore::WalkBuilder::new(project_root)
235            .hidden(true)
236            .git_ignore(true)
237            .git_global(true)
238            .git_exclude(true)
239            .max_depth(Some(3)) // Don't recurse too deep for manifest detection
240            .build();
241
242        for entry in walker.flatten() {
243            if !entry.file_type().is_some_and(|ft| ft.is_file()) {
244                continue;
245            }
246            let file_name = entry
247                .path()
248                .file_name()
249                .and_then(|n| n.to_str())
250                .unwrap_or("");
251
252            for &(manifest, lang) in MANIFEST_LANGUAGES {
253                if file_name == manifest {
254                    found.insert(lang);
255                }
256            }
257
258            // .csproj files by extension
259            if file_name.ends_with(".csproj") {
260                found.insert(ScipLanguage::CSharp);
261            }
262        }
263
264        found.into_iter().collect()
265    }
266
267    /// Check which indexers are available on PATH or configured with explicit commands.
268    pub fn detect_available_indexers(&self, languages: &[ScipLanguage]) -> Vec<ScipLanguage> {
269        let mut available = Vec::new();
270
271        for &lang in languages {
272            // Check if there's a config override for this language.
273            if self.config_command_for(lang).is_some() {
274                available.push(lang);
275                continue;
276            }
277
278            // Auto-detect from PATH.
279            if !self.config.auto_detect_indexers {
280                continue;
281            }
282            if which_binary(lang.indexer_binary()).is_some() {
283                available.push(lang);
284            }
285        }
286
287        available
288    }
289
290    /// Run a single SCIP indexer for the given language.
291    fn run_indexer(
292        &self,
293        lang: ScipLanguage,
294        project_root: &Path,
295        output_path: &Path,
296        namespace: &str,
297    ) -> Result<(), CodememError> {
298        let (program, args) = if let Some(cmd) = self.config_command_for(lang) {
299            // Parse the config override command, substituting {namespace}.
300            let expanded = cmd.replace("{namespace}", namespace);
301            parse_shell_command(&expanded)?
302        } else {
303            // Resolve the absolute path to the indexer binary so child processes
304            // work even when PATH doesn't include the user's shell additions
305            // (e.g. when invoked from hooks running under /bin/sh).
306            let binary_name = lang.indexer_binary();
307            let resolved = which_binary(binary_name)
308                .map(|p| p.display().to_string())
309                .unwrap_or_else(|| binary_name.to_string());
310            (
311                resolved,
312                lang.default_args().iter().map(|s| s.to_string()).collect(),
313            )
314        };
315
316        tracing::info!(
317            "Running SCIP indexer for {}: {} {:?}",
318            lang.name(),
319            program,
320            args
321        );
322
323        // Ensure the child process inherits a PATH that includes common
324        // tool locations (e.g. ~/.cargo/bin, homebrew paths, nvm paths).
325        // The parent process PATH may be minimal when run from hooks.
326        let path_env = augmented_path();
327
328        let output = Command::new(&program)
329            .args(&args)
330            .current_dir(project_root)
331            .env("PATH", &path_env)
332            .output()
333            .map_err(|e| {
334                CodememError::ScipOrchestration(format!("Failed to spawn {program}: {e}"))
335            })?;
336
337        if !output.status.success() {
338            let stderr = String::from_utf8_lossy(&output.stderr);
339            return Err(CodememError::ScipOrchestration(format!(
340                "{} exited with {}: {}",
341                program,
342                output.status,
343                stderr.trim()
344            )));
345        }
346
347        // Many indexers write to index.scip in the project root by default.
348        // If the output file doesn't exist yet, try to move the default output.
349        if !output_path.exists() {
350            let default_output = project_root.join(lang.default_output_file());
351            if default_output.exists() {
352                std::fs::rename(&default_output, output_path).map_err(|e| {
353                    CodememError::ScipOrchestration(format!(
354                        "Failed to move {}: {e}",
355                        default_output.display()
356                    ))
357                })?;
358            }
359        }
360
361        Ok(())
362    }
363
364    /// Get the config override command for a language, if any.
365    fn config_command_for(&self, lang: ScipLanguage) -> Option<&String> {
366        let cmd = match lang {
367            ScipLanguage::Rust => &self.config.indexers.rust,
368            ScipLanguage::TypeScript => &self.config.indexers.typescript,
369            ScipLanguage::Python => &self.config.indexers.python,
370            ScipLanguage::Java => &self.config.indexers.java,
371            ScipLanguage::Go => &self.config.indexers.go,
372            // These languages don't have config overrides in ScipIndexersConfig yet.
373            ScipLanguage::CSharp | ScipLanguage::Ruby | ScipLanguage::Php | ScipLanguage::Dart => {
374                return None;
375            }
376        };
377        if cmd.is_empty() {
378            None
379        } else {
380            Some(cmd)
381        }
382    }
383
384    /// Parse and merge multiple .scip files into a single ScipReadResult.
385    fn merge_scip_files(
386        &self,
387        paths: &[PathBuf],
388        project_root: &Path,
389    ) -> Result<ScipReadResult, CodememError> {
390        let mut merged = ScipReadResult {
391            project_root: project_root.to_string_lossy().to_string(),
392            definitions: Vec::new(),
393            references: Vec::new(),
394            externals: Vec::new(),
395            covered_files: Vec::new(),
396        };
397
398        for path in paths {
399            let bytes = std::fs::read(path).map_err(|e| {
400                CodememError::ScipOrchestration(format!("Failed to read {}: {e}", path.display()))
401            })?;
402            let result = parse_scip_bytes(&bytes).map_err(CodememError::ScipOrchestration)?;
403            merged.definitions.extend(result.definitions);
404            merged.references.extend(result.references);
405            merged.externals.extend(result.externals);
406            merged.covered_files.extend(result.covered_files);
407        }
408
409        // Dedup covered files (multiple indexers might cover overlapping files).
410        merged.covered_files.sort();
411        merged.covered_files.dedup();
412
413        Ok(merged)
414    }
415}
416
417/// Check if a binary is available on PATH.
418fn which_binary(name: &str) -> Option<PathBuf> {
419    which::which(name).ok()
420}
421
422/// Build an augmented PATH that includes common tool directories.
423/// Useful when the current process was spawned by /bin/sh which
424/// doesn't source shell profiles (~/.zshrc, ~/.bashrc).
425fn augmented_path() -> String {
426    let current = std::env::var("PATH").unwrap_or_default();
427    let home = dirs::home_dir().unwrap_or_else(|| PathBuf::from("/tmp"));
428
429    let extra_dirs = [
430        home.join(".cargo/bin"),
431        home.join(".local/bin"),
432        home.join(".nvm/current/bin"),
433        PathBuf::from("/usr/local/bin"),
434        PathBuf::from("/opt/homebrew/bin"),
435    ];
436
437    let mut parts: Vec<String> = vec![current];
438    for dir in &extra_dirs {
439        if dir.is_dir() {
440            parts.push(dir.display().to_string());
441        }
442    }
443    parts.join(":")
444}
445
446/// Parse a shell command string into (program, args).
447///
448/// Simple whitespace splitting — does not handle quoted strings.
449fn parse_shell_command(cmd: &str) -> Result<(String, Vec<String>), CodememError> {
450    let parts: Vec<&str> = cmd.split_whitespace().collect();
451    if parts.is_empty() {
452        return Err(CodememError::ScipOrchestration(
453            "Empty command string".to_string(),
454        ));
455    }
456    let program = parts[0].to_string();
457    let args = parts[1..].iter().map(|s| s.to_string()).collect();
458    Ok((program, args))
459}
460
461/// Result of checking SCIP cache validity.
462pub struct CacheStatus {
463    /// Path to the cached .scip file.
464    pub path: PathBuf,
465    /// Whether the cache is still valid (within TTL).
466    pub valid: bool,
467}
468
469/// Resolve the SCIP cache directory for a given namespace.
470/// Returns `~/.codemem/scip-cache/{namespace}/`, creating it if needed.
471fn scip_cache_dir(namespace: &str) -> Option<PathBuf> {
472    let home = dirs::home_dir()?;
473    let dir = home.join(".codemem").join("scip-cache").join(namespace);
474    std::fs::create_dir_all(&dir).ok()?;
475    Some(dir)
476}
477
478/// Check if a cached SCIP index exists in `cache_dir` and is within the TTL.
479pub fn check_cache(cache_dir: &Path, lang: ScipLanguage, ttl_hours: u64) -> Option<CacheStatus> {
480    let cache_path = cache_dir.join(format!("index-{}.scip", lang.name()));
481    if !cache_path.exists() {
482        return None;
483    }
484
485    let metadata = std::fs::metadata(&cache_path).ok()?;
486    let modified = metadata.modified().ok()?;
487    let age = modified.elapsed().ok()?;
488    let valid = age.as_secs() < ttl_hours * 3600;
489
490    Some(CacheStatus {
491        path: cache_path,
492        valid,
493    })
494}
495
496/// Save a .scip file to the given cache directory for future runs.
497fn save_to_cache(cache_dir: &Path, lang: ScipLanguage, source_path: &Path) {
498    let cache_path = cache_dir.join(format!("index-{}.scip", lang.name()));
499    if let Err(e) = std::fs::copy(source_path, &cache_path) {
500        tracing::warn!("Failed to cache SCIP index for {}: {e}", lang.name());
501    }
502}
503
504#[cfg(test)]
505#[path = "../tests/scip_orchestrate_tests.rs"]
506mod tests;