Skip to main content

lean_ctx/core/
multi_repo.rs

1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3
4use serde::{Deserialize, Serialize};
5
6use crate::core::bm25_index::{BM25Index, SearchResult};
7
8/// Default RRF parameter (controls how quickly rank decay affects fusion scores).
9const DEFAULT_RRF_K: f64 = 60.0;
10
11/// Maximum number of repo roots that can be served simultaneously.
12const MAX_ROOTS: usize = 16;
13
14/// A single search result from one repo root.
15#[derive(Debug, Clone)]
16pub struct RepoSearchResult {
17    pub repo_alias: String,
18    pub repo_path: String,
19    pub file_path: String,
20    pub symbol_name: String,
21    pub content: String,
22    pub start_line: usize,
23    pub end_line: usize,
24    pub score: f64,
25}
26
27/// A merged result after RRF fusion across multiple repos.
28#[derive(Debug, Clone)]
29pub struct FusedSearchResult {
30    pub repo_alias: String,
31    pub repo_path: String,
32    pub file_path: String,
33    pub symbol_name: String,
34    pub content: String,
35    pub start_line: usize,
36    pub end_line: usize,
37    pub rrf_score: f64,
38}
39
40/// Configuration for a single repository root in multi-repo mode.
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct RepoRootConfig {
43    pub path: String,
44    #[serde(default)]
45    pub alias: Option<String>,
46}
47
48impl RepoRootConfig {
49    pub fn effective_alias(&self) -> String {
50        self.alias.clone().unwrap_or_else(|| {
51            Path::new(&self.path)
52                .file_name()
53                .and_then(|n| n.to_str())
54                .unwrap_or("unknown")
55                .to_string()
56        })
57    }
58}
59
60/// Multi-repo configuration loaded from `~/.config/lean-ctx/multi-repo.toml`.
61#[derive(Debug, Clone, Serialize, Deserialize, Default)]
62pub struct MultiRepoConfig {
63    #[serde(default)]
64    pub repos: Vec<RepoRootConfig>,
65    #[serde(default)]
66    pub rrf_k: Option<f64>,
67}
68
69impl MultiRepoConfig {
70    pub fn load() -> Self {
71        let config_path = config_file_path();
72        if !config_path.exists() {
73            return Self::default();
74        }
75        match std::fs::read_to_string(&config_path) {
76            Ok(content) => toml::from_str(&content).unwrap_or_default(),
77            Err(_) => Self::default(),
78        }
79    }
80
81    pub fn save(&self) -> Result<(), String> {
82        let config_path = config_file_path();
83        if let Some(parent) = config_path.parent() {
84            std::fs::create_dir_all(parent)
85                .map_err(|e| format!("Failed to create config dir: {e}"))?;
86        }
87        let content =
88            toml::to_string_pretty(self).map_err(|e| format!("Failed to serialize config: {e}"))?;
89        let defaults = toml::to_string_pretty(&Self::default())
90            .map_err(|e| format!("Failed to serialize defaults: {e}"))?;
91        crate::config_io::write_toml_preserving_minimal(&config_path, &content, &defaults)
92            .map_err(|e| format!("Failed to write config: {e}"))?;
93        Ok(())
94    }
95}
96
97/// An active repo root with its loaded BM25 index.
98pub struct ActiveRepoRoot {
99    pub config: RepoRootConfig,
100    pub path: PathBuf,
101    index: Option<BM25Index>,
102}
103
104impl std::fmt::Debug for ActiveRepoRoot {
105    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
106        f.debug_struct("ActiveRepoRoot")
107            .field("config", &self.config)
108            .field("path", &self.path)
109            .field("has_index", &self.index.is_some())
110            .finish()
111    }
112}
113
114impl ActiveRepoRoot {
115    fn new(config: RepoRootConfig) -> Result<Self, String> {
116        let path = PathBuf::from(&config.path);
117        if !path.is_dir() {
118            return Err(format!(
119                "Path does not exist or is not a directory: {}",
120                config.path
121            ));
122        }
123        let path = path
124            .canonicalize()
125            .map_err(|e| format!("Cannot canonicalize {}: {e}", config.path))?;
126        Ok(Self {
127            config,
128            path,
129            index: None,
130        })
131    }
132
133    fn ensure_index(&mut self) {
134        if self.index.is_some() {
135            return;
136        }
137        self.index = Some(BM25Index::load_or_build(&self.path));
138    }
139
140    pub fn alias(&self) -> String {
141        self.config.effective_alias()
142    }
143
144    pub fn search(&mut self, query: &str, max_results: usize) -> Vec<RepoSearchResult> {
145        self.ensure_index();
146        let Some(ref index) = self.index else {
147            return Vec::new();
148        };
149
150        let results: Vec<SearchResult> = index.search(query, max_results);
151        let alias = self.alias();
152        let repo_path = self.path.to_string_lossy().to_string();
153
154        results
155            .into_iter()
156            .enumerate()
157            .map(|(rank, sr)| RepoSearchResult {
158                repo_alias: alias.clone(),
159                repo_path: repo_path.clone(),
160                file_path: sr.file_path,
161                symbol_name: sr.symbol_name,
162                content: sr.snippet,
163                start_line: sr.start_line,
164                end_line: sr.end_line,
165                score: 1.0 / (rank as f64 + 1.0),
166            })
167            .collect()
168    }
169}
170
171/// Manages multiple repository roots and performs cross-repo search with RRF fusion.
172pub struct MultiRepoManager {
173    roots: Vec<ActiveRepoRoot>,
174    rrf_k: f64,
175}
176
177impl MultiRepoManager {
178    pub fn new() -> Self {
179        Self {
180            roots: Vec::new(),
181            rrf_k: DEFAULT_RRF_K,
182        }
183    }
184
185    pub fn with_rrf_k(mut self, k: f64) -> Self {
186        self.rrf_k = k;
187        self
188    }
189
190    pub fn from_config(config: &MultiRepoConfig) -> Result<Self, String> {
191        let mut manager = Self::new();
192        if let Some(k) = config.rrf_k {
193            manager.rrf_k = k;
194        }
195        for repo_config in &config.repos {
196            manager.add_root_config(repo_config.clone())?;
197        }
198        Ok(manager)
199    }
200
201    pub fn add_root(&mut self, path: &str, alias: Option<&str>) -> Result<(), String> {
202        if self.roots.len() >= MAX_ROOTS {
203            return Err(format!("Maximum number of roots ({MAX_ROOTS}) reached"));
204        }
205        let config = RepoRootConfig {
206            path: path.to_string(),
207            alias: alias.map(String::from),
208        };
209        let root = ActiveRepoRoot::new(config)?;
210        if self.roots.iter().any(|r| r.path == root.path) {
211            return Err(format!("Root already exists: {path}"));
212        }
213        self.roots.push(root);
214        Ok(())
215    }
216
217    fn add_root_config(&mut self, config: RepoRootConfig) -> Result<(), String> {
218        if self.roots.len() >= MAX_ROOTS {
219            return Err(format!("Maximum number of roots ({MAX_ROOTS}) reached"));
220        }
221        let root = ActiveRepoRoot::new(config)?;
222        if self.roots.iter().any(|r| r.path == root.path) {
223            return Err(format!(
224                "Root already exists: {}",
225                root.path.to_string_lossy()
226            ));
227        }
228        self.roots.push(root);
229        Ok(())
230    }
231
232    pub fn remove_root(&mut self, path: &str) -> Result<(), String> {
233        let normalized = PathBuf::from(path)
234            .canonicalize()
235            .unwrap_or_else(|_| PathBuf::from(path));
236        let before = self.roots.len();
237        self.roots
238            .retain(|r| r.path != normalized && r.config.path != path);
239        if self.roots.len() == before {
240            return Err(format!("Root not found: {path}"));
241        }
242        Ok(())
243    }
244
245    pub fn list_roots(&self) -> Vec<RootInfo> {
246        self.roots
247            .iter()
248            .map(|r| RootInfo {
249                path: r.path.to_string_lossy().to_string(),
250                alias: r.alias(),
251                has_index: r.index.is_some(),
252            })
253            .collect()
254    }
255
256    pub fn root_count(&self) -> usize {
257        self.roots.len()
258    }
259
260    pub fn is_active(&self) -> bool {
261        self.roots.len() > 1
262    }
263
264    /// Resolve a repo alias or path to the corresponding root index.
265    pub fn resolve_root(&self, repo: &str) -> Option<usize> {
266        self.roots.iter().position(|r| {
267            r.alias() == repo || r.config.path == repo || r.path.to_string_lossy() == repo
268        })
269    }
270
271    /// Search across all roots (or a subset) and merge with Reciprocal Rank Fusion.
272    pub fn search(
273        &mut self,
274        query: &str,
275        max_results: usize,
276        filter_roots: Option<&[String]>,
277    ) -> Vec<FusedSearchResult> {
278        let per_root_max = (max_results * 2).max(20);
279
280        let mut all_results: HashMap<String, FusedSearchResult> = HashMap::new();
281
282        for root in &mut self.roots {
283            if let Some(filter) = filter_roots {
284                let alias = root.alias();
285                let path = root.path.to_string_lossy().to_string();
286                if !filter.iter().any(|f| f == &alias || f == &path) {
287                    continue;
288                }
289            }
290
291            let results = root.search(query, per_root_max);
292
293            for (rank, result) in results.iter().enumerate() {
294                let rrf_contribution = 1.0 / (self.rrf_k + rank as f64 + 1.0);
295                let key = format!(
296                    "{}:{}:{}",
297                    result.repo_alias, result.file_path, result.start_line
298                );
299
300                all_results
301                    .entry(key)
302                    .and_modify(|existing| {
303                        existing.rrf_score += rrf_contribution;
304                    })
305                    .or_insert_with(|| FusedSearchResult {
306                        repo_alias: result.repo_alias.clone(),
307                        repo_path: result.repo_path.clone(),
308                        file_path: result.file_path.clone(),
309                        symbol_name: result.symbol_name.clone(),
310                        content: result.content.clone(),
311                        start_line: result.start_line,
312                        end_line: result.end_line,
313                        rrf_score: rrf_contribution,
314                    });
315            }
316        }
317
318        let mut fused: Vec<FusedSearchResult> = all_results.into_values().collect();
319        fused.sort_by(|a, b| {
320            b.rrf_score
321                .partial_cmp(&a.rrf_score)
322                .unwrap_or(std::cmp::Ordering::Equal)
323        });
324        fused.truncate(max_results);
325        fused
326    }
327
328    /// Search within a specific repo root (no RRF, single-repo query).
329    pub fn search_single_repo(
330        &mut self,
331        repo: &str,
332        query: &str,
333        max_results: usize,
334    ) -> Result<Vec<RepoSearchResult>, String> {
335        let idx = self
336            .resolve_root(repo)
337            .ok_or_else(|| format!("Unknown repo: {repo}"))?;
338        Ok(self.roots[idx].search(query, max_results))
339    }
340}
341
342impl Default for MultiRepoManager {
343    fn default() -> Self {
344        Self::new()
345    }
346}
347
348/// Summary info about a registered root.
349#[derive(Debug, Clone, Serialize)]
350pub struct RootInfo {
351    pub path: String,
352    pub alias: String,
353    pub has_index: bool,
354}
355
356/// Returns the path to the multi-repo config file.
357pub fn config_file_path() -> PathBuf {
358    dirs::config_dir()
359        .unwrap_or_else(|| PathBuf::from("~/.config"))
360        .join("lean-ctx")
361        .join("multi-repo.toml")
362}
363
364/// Global multi-repo manager instance (lazily initialized).
365static GLOBAL_MANAGER: std::sync::OnceLock<std::sync::Mutex<MultiRepoManager>> =
366    std::sync::OnceLock::new();
367
368pub fn global_manager() -> &'static std::sync::Mutex<MultiRepoManager> {
369    GLOBAL_MANAGER.get_or_init(|| {
370        let config = MultiRepoConfig::load();
371        let manager = MultiRepoManager::from_config(&config).unwrap_or_default();
372        std::sync::Mutex::new(manager)
373    })
374}
375
376/// Initialize the global manager with explicit roots (e.g. from CLI `--root` flags).
377pub fn init_with_roots(
378    roots: &[(String, Option<String>)],
379    rrf_k: Option<f64>,
380) -> Result<(), String> {
381    let mut manager = MultiRepoManager::new();
382    if let Some(k) = rrf_k {
383        manager.rrf_k = k;
384    }
385    for (path, alias) in roots {
386        manager.add_root(path, alias.as_deref())?;
387    }
388    GLOBAL_MANAGER
389        .set(std::sync::Mutex::new(manager))
390        .map_err(|_| "Multi-repo manager already initialized".to_string())
391}
392
393/// Resolve a `repo` alias/path to the actual filesystem root.
394/// Used by existing tools (ctx_read, ctx_search, etc.) when a `repo` param is provided.
395/// Returns the absolute path to the repo root, or None if multi-repo is inactive or repo not found.
396pub fn resolve_repo_root(repo: &str) -> Option<String> {
397    let manager = global_manager();
398    let mgr = manager.lock().ok()?;
399    let idx = mgr.resolve_root(repo)?;
400    Some(mgr.roots[idx].path.to_string_lossy().to_string())
401}
402
403/// Check if multi-repo mode is active (more than 1 root configured).
404pub fn is_multi_repo_active() -> bool {
405    let manager = global_manager();
406    manager.lock().ok().is_some_and(|mgr| mgr.is_active())
407}
408
409/// Get all configured repo root paths (for tools that need to iterate).
410pub fn all_root_paths() -> Vec<String> {
411    let manager = global_manager();
412    let Ok(mgr) = manager.lock() else {
413        return Vec::new();
414    };
415    mgr.roots
416        .iter()
417        .map(|r| r.path.to_string_lossy().to_string())
418        .collect()
419}
420
421/// Format search results for MCP output.
422pub fn format_fused_results(results: &[FusedSearchResult]) -> String {
423    if results.is_empty() {
424        return "No results found across repos.".to_string();
425    }
426
427    let mut out = String::with_capacity(results.len() * 200);
428    out.push_str(&format!(
429        "Cross-repo results ({} matches):\n\n",
430        results.len()
431    ));
432
433    for (i, result) in results.iter().enumerate() {
434        out.push_str(&format!(
435            "{}. [{}] {}:{}-{} ({})\n   RRF: {:.4}\n",
436            i + 1,
437            result.repo_alias,
438            result.file_path,
439            result.start_line,
440            result.end_line,
441            result.symbol_name,
442            result.rrf_score,
443        ));
444        let preview: String = result
445            .content
446            .lines()
447            .take(3)
448            .collect::<Vec<_>>()
449            .join("\n");
450        if !preview.is_empty() {
451            out.push_str(&format!("   {}\n", preview.replace('\n', "\n   ")));
452        }
453        out.push('\n');
454    }
455    out
456}
457
458#[cfg(test)]
459mod tests {
460    use super::*;
461
462    #[test]
463    fn repo_root_config_effective_alias() {
464        let cfg = RepoRootConfig {
465            path: "/home/user/projects/backend".to_string(),
466            alias: None,
467        };
468        assert_eq!(cfg.effective_alias(), "backend");
469
470        let cfg_with_alias = RepoRootConfig {
471            path: "/home/user/projects/backend".to_string(),
472            alias: Some("api".to_string()),
473        };
474        assert_eq!(cfg_with_alias.effective_alias(), "api");
475    }
476
477    #[test]
478    fn multi_repo_config_default_is_empty() {
479        let cfg = MultiRepoConfig::default();
480        assert!(cfg.repos.is_empty());
481        assert!(cfg.rrf_k.is_none());
482    }
483
484    #[test]
485    fn multi_repo_config_deserialize() {
486        let toml_str = r#"
487rrf_k = 45.0
488
489[[repos]]
490path = "/home/user/backend"
491alias = "backend"
492
493[[repos]]
494path = "/home/user/frontend"
495"#;
496        let cfg: MultiRepoConfig = toml::from_str(toml_str).unwrap();
497        assert_eq!(cfg.repos.len(), 2);
498        assert_eq!(cfg.rrf_k, Some(45.0));
499        assert_eq!(cfg.repos[0].alias, Some("backend".to_string()));
500        assert_eq!(cfg.repos[1].alias, None);
501    }
502
503    #[test]
504    fn manager_max_roots_enforced() {
505        let mut manager = MultiRepoManager::new();
506        for i in 0..MAX_ROOTS {
507            let dir = std::env::temp_dir().join(format!("multi_repo_test_{i}"));
508            let _ = std::fs::create_dir_all(&dir);
509            let _ = manager.add_root(&dir.to_string_lossy(), Some(&format!("repo{i}")));
510        }
511        let extra = std::env::temp_dir().join("multi_repo_test_extra");
512        let _ = std::fs::create_dir_all(&extra);
513        let result = manager.add_root(&extra.to_string_lossy(), None);
514        assert!(result.is_err());
515
516        // Cleanup
517        for i in 0..=MAX_ROOTS {
518            let dir = std::env::temp_dir().join(format!("multi_repo_test_{i}"));
519            let _ = std::fs::remove_dir_all(&dir);
520        }
521        let _ = std::fs::remove_dir_all(&extra);
522    }
523
524    #[test]
525    fn manager_duplicate_root_rejected() {
526        let dir = std::env::temp_dir().join("multi_repo_dup_test");
527        let _ = std::fs::create_dir_all(&dir);
528        let mut manager = MultiRepoManager::new();
529        assert!(manager
530            .add_root(&dir.to_string_lossy(), Some("first"))
531            .is_ok());
532        assert!(manager
533            .add_root(&dir.to_string_lossy(), Some("second"))
534            .is_err());
535        let _ = std::fs::remove_dir_all(&dir);
536    }
537
538    #[test]
539    fn rrf_fusion_basic() {
540        let mut manager = MultiRepoManager::new().with_rrf_k(60.0);
541        // RRF score for rank 0: 1/(60+0+1) = 1/61 ≈ 0.01639
542        let score: f64 = 1.0 / (60.0 + 0.0 + 1.0);
543        assert!((score - 0.01639).abs() < 0.001);
544
545        // Verify RRF parameter is set
546        assert_eq!(manager.rrf_k, 60.0);
547        let _ = &mut manager; // suppress unused warning
548    }
549
550    #[test]
551    fn remove_root_works() {
552        let dir = std::env::temp_dir().join("multi_repo_remove_test");
553        let _ = std::fs::create_dir_all(&dir);
554        let mut manager = MultiRepoManager::new();
555        manager
556            .add_root(&dir.to_string_lossy(), Some("removable"))
557            .unwrap();
558        assert_eq!(manager.root_count(), 1);
559        manager.remove_root(&dir.to_string_lossy()).unwrap();
560        assert_eq!(manager.root_count(), 0);
561        let _ = std::fs::remove_dir_all(&dir);
562    }
563
564    #[test]
565    fn list_roots_returns_info() {
566        let dir = std::env::temp_dir().join("multi_repo_list_test");
567        let _ = std::fs::create_dir_all(&dir);
568        let mut manager = MultiRepoManager::new();
569        manager
570            .add_root(&dir.to_string_lossy(), Some("myrepo"))
571            .unwrap();
572        let roots = manager.list_roots();
573        assert_eq!(roots.len(), 1);
574        assert_eq!(roots[0].alias, "myrepo");
575        let _ = std::fs::remove_dir_all(&dir);
576    }
577
578    #[test]
579    fn format_empty_results() {
580        let results: Vec<FusedSearchResult> = Vec::new();
581        let output = format_fused_results(&results);
582        assert!(output.contains("No results"));
583    }
584}