infiniloom_engine/analysis/
multi_repo.rs

1//! Multi-repository index for cross-repo symbol linking
2//!
3//! Enables indexing multiple repositories and tracking cross-repository
4//! references, dependencies, and symbol relationships.
5
6use crate::analysis::types::{
7    CrossRepoLink, CrossRepoLinkType, MultiRepoIndex, RepoEntry, UnifiedSymbolRef,
8};
9use crate::types::{Symbol, SymbolKind};
10use std::collections::HashMap;
11use std::time::{SystemTime, UNIX_EPOCH};
12
13/// Builds and maintains a multi-repository index
14pub struct MultiRepoIndexBuilder {
15    /// Index being built
16    index: MultiRepoIndex,
17    /// Map of import paths to repo IDs
18    import_path_to_repo: HashMap<String, String>,
19    /// Package/module names to repo IDs
20    package_to_repo: HashMap<String, String>,
21}
22
23impl MultiRepoIndexBuilder {
24    /// Create a new builder
25    pub fn new() -> Self {
26        Self {
27            index: MultiRepoIndex::default(),
28            import_path_to_repo: HashMap::new(),
29            package_to_repo: HashMap::new(),
30        }
31    }
32
33    /// Add a repository to the index
34    pub fn add_repository(
35        &mut self,
36        id: impl Into<String>,
37        name: impl Into<String>,
38        path: impl Into<String>,
39        commit: Option<String>,
40    ) -> &mut Self {
41        let id = id.into();
42        let name = name.into();
43        let path = path.into();
44
45        let entry = RepoEntry {
46            id: id.clone(),
47            name: name.clone(),
48            path: path.clone(),
49            commit,
50            file_count: 0,
51            symbol_count: 0,
52            indexed_at: Some(
53                SystemTime::now()
54                    .duration_since(UNIX_EPOCH)
55                    .unwrap()
56                    .as_secs(),
57            ),
58        };
59
60        self.index.repositories.push(entry);
61
62        // Register common package patterns
63        self.package_to_repo.insert(name, id.clone());
64        self.package_to_repo.insert(path, id);
65
66        self
67    }
68
69    /// Register an import path pattern for a repository
70    pub fn register_import_path(&mut self, pattern: impl Into<String>, repo_id: impl Into<String>) {
71        self.import_path_to_repo
72            .insert(pattern.into(), repo_id.into());
73    }
74
75    /// Add symbols from a file
76    pub fn add_file_symbols(
77        &mut self,
78        repo_id: &str,
79        file_path: &str,
80        symbols: &[Symbol],
81    ) -> &mut Self {
82        // Update repo stats
83        if let Some(repo) = self.index.repositories.iter_mut().find(|r| r.id == repo_id) {
84            repo.file_count += 1;
85            repo.symbol_count += symbols.len() as u32;
86        }
87
88        // Add symbols to unified index
89        for symbol in symbols {
90            let qualified_name = self.get_qualified_name(symbol, file_path);
91
92            let symbol_ref = UnifiedSymbolRef {
93                repo_id: repo_id.to_owned(),
94                file_path: file_path.to_owned(),
95                line: symbol.start_line,
96                kind: format!("{:?}", symbol.kind),
97                qualified_name: Some(qualified_name.clone()),
98            };
99
100            self.index
101                .unified_symbols
102                .entry(symbol.name.clone())
103                .or_default()
104                .push(symbol_ref);
105
106            // Also index by qualified name
107            if qualified_name != symbol.name {
108                let symbol_ref = UnifiedSymbolRef {
109                    repo_id: repo_id.to_owned(),
110                    file_path: file_path.to_owned(),
111                    line: symbol.start_line,
112                    kind: format!("{:?}", symbol.kind),
113                    qualified_name: Some(qualified_name.clone()),
114                };
115
116                self.index
117                    .unified_symbols
118                    .entry(qualified_name)
119                    .or_default()
120                    .push(symbol_ref);
121            }
122
123            // Detect cross-repo links from this symbol
124            self.detect_cross_repo_links(repo_id, file_path, symbol);
125        }
126
127        self
128    }
129
130    /// Get qualified name for a symbol
131    fn get_qualified_name(&self, symbol: &Symbol, file_path: &str) -> String {
132        // Build qualified name from parent and file path
133        let module = self.file_to_module(file_path);
134
135        if let Some(ref parent) = symbol.parent {
136            format!("{}::{}::{}", module, parent, symbol.name)
137        } else {
138            format!("{}::{}", module, symbol.name)
139        }
140    }
141
142    /// Convert file path to module path
143    fn file_to_module(&self, file_path: &str) -> String {
144        // Remove extension and convert slashes to ::
145        let without_ext = file_path
146            .trim_end_matches(".rs")
147            .trim_end_matches(".py")
148            .trim_end_matches(".ts")
149            .trim_end_matches(".js")
150            .trim_end_matches(".java")
151            .trim_end_matches(".go")
152            .trim_end_matches(".rb")
153            .trim_end_matches(".php");
154
155        // Handle mod.rs, index.ts, __init__.py patterns
156        let normalized = without_ext
157            .trim_end_matches("/mod")
158            .trim_end_matches("/index")
159            .trim_end_matches("/__init__");
160
161        // Convert path separators to ::
162        normalized
163            .replace(['/', '\\'], "::")
164            .trim_start_matches("::")
165            .to_owned()
166    }
167
168    /// Detect cross-repository links from a symbol
169    fn detect_cross_repo_links(&mut self, source_repo: &str, source_file: &str, symbol: &Symbol) {
170        // Check extends
171        if let Some(ref extends) = symbol.extends {
172            if let Some(target_repo) = self.find_symbol_repo(extends) {
173                if target_repo != source_repo {
174                    self.add_cross_repo_link(
175                        source_repo,
176                        source_file,
177                        Some(&symbol.name),
178                        symbol.start_line,
179                        &target_repo,
180                        extends,
181                        CrossRepoLinkType::Extends,
182                    );
183                }
184            }
185        }
186
187        // Check implements
188        for implements in &symbol.implements {
189            if let Some(target_repo) = self.find_symbol_repo(implements) {
190                if target_repo != source_repo {
191                    self.add_cross_repo_link(
192                        source_repo,
193                        source_file,
194                        Some(&symbol.name),
195                        symbol.start_line,
196                        &target_repo,
197                        implements,
198                        CrossRepoLinkType::Implements,
199                    );
200                }
201            }
202        }
203
204        // Check calls
205        for call in &symbol.calls {
206            if let Some(target_repo) = self.find_symbol_repo(call) {
207                if target_repo != source_repo {
208                    self.add_cross_repo_link(
209                        source_repo,
210                        source_file,
211                        Some(&symbol.name),
212                        symbol.start_line,
213                        &target_repo,
214                        call,
215                        CrossRepoLinkType::Call,
216                    );
217                }
218            }
219        }
220    }
221
222    /// Find which repo a symbol belongs to
223    fn find_symbol_repo(&self, symbol_name: &str) -> Option<String> {
224        // Check if we have this symbol in unified index
225        if let Some(refs) = self.index.unified_symbols.get(symbol_name) {
226            if let Some(first) = refs.first() {
227                return Some(first.repo_id.clone());
228            }
229        }
230
231        // Check import path patterns
232        for (pattern, repo_id) in &self.import_path_to_repo {
233            if symbol_name.starts_with(pattern) || symbol_name.contains(pattern) {
234                return Some(repo_id.clone());
235            }
236        }
237
238        // Check package names
239        let parts: Vec<&str> = symbol_name.split("::").collect();
240        if let Some(first) = parts.first() {
241            if let Some(repo_id) = self.package_to_repo.get(*first) {
242                return Some(repo_id.clone());
243            }
244        }
245
246        None
247    }
248
249    /// Add a cross-repository link
250    fn add_cross_repo_link(
251        &mut self,
252        source_repo: &str,
253        source_file: &str,
254        source_symbol: Option<&str>,
255        source_line: u32,
256        target_repo: &str,
257        target_symbol: &str,
258        link_type: CrossRepoLinkType,
259    ) {
260        self.index.cross_repo_links.push(CrossRepoLink {
261            source_repo: source_repo.to_owned(),
262            source_file: source_file.to_owned(),
263            source_symbol: source_symbol.map(String::from),
264            source_line,
265            target_repo: target_repo.to_owned(),
266            target_symbol: target_symbol.to_owned(),
267            link_type,
268        });
269    }
270
271    /// Build the final index
272    pub fn build(self) -> MultiRepoIndex {
273        self.index
274    }
275
276    /// Get current state of the index (for incremental building)
277    pub fn current_index(&self) -> &MultiRepoIndex {
278        &self.index
279    }
280}
281
282impl Default for MultiRepoIndexBuilder {
283    fn default() -> Self {
284        Self::new()
285    }
286}
287
288/// Query interface for the multi-repository index
289pub struct MultiRepoQuery<'a> {
290    index: &'a MultiRepoIndex,
291}
292
293impl<'a> MultiRepoQuery<'a> {
294    /// Create a new query interface
295    pub fn new(index: &'a MultiRepoIndex) -> Self {
296        Self { index }
297    }
298
299    /// Find all definitions of a symbol across all repos
300    pub fn find_symbol(&self, name: &str) -> Vec<&UnifiedSymbolRef> {
301        self.index
302            .unified_symbols
303            .get(name)
304            .map(|refs| refs.iter().collect())
305            .unwrap_or_default()
306    }
307
308    /// Find symbols by prefix (namespace search)
309    pub fn find_by_prefix(&self, prefix: &str) -> Vec<(&String, &Vec<UnifiedSymbolRef>)> {
310        self.index
311            .unified_symbols
312            .iter()
313            .filter(|(name, _)| name.starts_with(prefix))
314            .collect()
315    }
316
317    /// Find symbols by kind
318    pub fn find_by_kind(&self, kind: SymbolKind) -> Vec<&UnifiedSymbolRef> {
319        let kind_str = format!("{:?}", kind);
320        self.index
321            .unified_symbols
322            .values()
323            .flatten()
324            .filter(|r| r.kind == kind_str)
325            .collect()
326    }
327
328    /// Find cross-repo dependencies of a repository
329    pub fn get_repo_dependencies(&self, repo_id: &str) -> Vec<&CrossRepoLink> {
330        self.index
331            .cross_repo_links
332            .iter()
333            .filter(|link| link.source_repo == repo_id)
334            .collect()
335    }
336
337    /// Find repos that depend on a given repo
338    pub fn get_repo_dependents(&self, repo_id: &str) -> Vec<&CrossRepoLink> {
339        self.index
340            .cross_repo_links
341            .iter()
342            .filter(|link| link.target_repo == repo_id)
343            .collect()
344    }
345
346    /// Get cross-repo links by type
347    pub fn get_links_by_type(&self, link_type: CrossRepoLinkType) -> Vec<&CrossRepoLink> {
348        self.index
349            .cross_repo_links
350            .iter()
351            .filter(|link| link.link_type == link_type)
352            .collect()
353    }
354
355    /// Find all symbols in a file across all repos
356    pub fn find_symbols_in_file(&self, file_path: &str) -> Vec<&UnifiedSymbolRef> {
357        self.index
358            .unified_symbols
359            .values()
360            .flatten()
361            .filter(|r| r.file_path == file_path)
362            .collect()
363    }
364
365    /// Find all symbols in a repo
366    pub fn find_symbols_in_repo(&self, repo_id: &str) -> Vec<&UnifiedSymbolRef> {
367        self.index
368            .unified_symbols
369            .values()
370            .flatten()
371            .filter(|r| r.repo_id == repo_id)
372            .collect()
373    }
374
375    /// Get repository info
376    pub fn get_repo(&self, repo_id: &str) -> Option<&RepoEntry> {
377        self.index.repositories.iter().find(|r| r.id == repo_id)
378    }
379
380    /// Get all repositories
381    pub fn get_all_repos(&self) -> &[RepoEntry] {
382        &self.index.repositories
383    }
384
385    /// Get dependency graph as adjacency list
386    pub fn get_dependency_graph(&self) -> HashMap<String, Vec<String>> {
387        let mut graph: HashMap<String, Vec<String>> = HashMap::new();
388
389        for link in &self.index.cross_repo_links {
390            graph
391                .entry(link.source_repo.clone())
392                .or_default()
393                .push(link.target_repo.clone());
394        }
395
396        // Deduplicate
397        for deps in graph.values_mut() {
398            deps.sort();
399            deps.dedup();
400        }
401
402        graph
403    }
404
405    /// Find common symbols between repos
406    pub fn find_common_symbols(&self, repo1: &str, repo2: &str) -> Vec<&String> {
407        let repo1_symbols: std::collections::HashSet<_> = self
408            .index
409            .unified_symbols
410            .iter()
411            .filter(|(_, refs)| refs.iter().any(|r| r.repo_id == repo1))
412            .map(|(name, _)| name)
413            .collect();
414
415        let repo2_symbols: std::collections::HashSet<_> = self
416            .index
417            .unified_symbols
418            .iter()
419            .filter(|(_, refs)| refs.iter().any(|r| r.repo_id == repo2))
420            .map(|(name, _)| name)
421            .collect();
422
423        repo1_symbols
424            .intersection(&repo2_symbols)
425            .copied()
426            .collect()
427    }
428
429    /// Get statistics for the index
430    pub fn get_stats(&self) -> MultiRepoStats {
431        let mut symbols_per_repo: HashMap<String, u32> = HashMap::new();
432
433        for refs in self.index.unified_symbols.values() {
434            for r in refs {
435                *symbols_per_repo.entry(r.repo_id.clone()).or_default() += 1;
436            }
437        }
438
439        MultiRepoStats {
440            total_repos: self.index.repositories.len(),
441            total_symbols: self.index.unified_symbols.len(),
442            total_cross_repo_links: self.index.cross_repo_links.len(),
443            symbols_per_repo,
444        }
445    }
446}
447
448/// Statistics about the multi-repo index
449#[derive(Debug, Clone)]
450pub struct MultiRepoStats {
451    /// Total number of repositories
452    pub total_repos: usize,
453    /// Total number of unique symbols
454    pub total_symbols: usize,
455    /// Total number of cross-repo links
456    pub total_cross_repo_links: usize,
457    /// Symbols per repository
458    pub symbols_per_repo: HashMap<String, u32>,
459}
460
461/// Convenience function to build an index from multiple repositories
462pub fn build_multi_repo_index(
463    repos: &[(String, String, String, Option<String>, Vec<(String, Vec<Symbol>)>)],
464) -> MultiRepoIndex {
465    let mut builder = MultiRepoIndexBuilder::new();
466
467    for (id, name, path, commit, files) in repos {
468        builder.add_repository(id.clone(), name.clone(), path.clone(), commit.clone());
469
470        for (file_path, symbols) in files {
471            builder.add_file_symbols(id, file_path, symbols);
472        }
473    }
474
475    builder.build()
476}
477
478#[cfg(test)]
479mod tests {
480    use super::*;
481    use crate::types::Visibility;
482
483    fn make_symbol(name: &str, kind: SymbolKind, calls: Vec<&str>) -> Symbol {
484        Symbol {
485            name: name.to_owned(),
486            kind,
487            visibility: Visibility::Public,
488            calls: calls.into_iter().map(String::from).collect(),
489            start_line: 1,
490            end_line: 10,
491            ..Default::default()
492        }
493    }
494
495    #[test]
496    fn test_add_repository() {
497        let mut builder = MultiRepoIndexBuilder::new();
498
499        builder.add_repository("repo1", "MyLib", "/path/to/mylib", Some("abc123".to_owned()));
500
501        let index = builder.build();
502
503        assert_eq!(index.repositories.len(), 1);
504        assert_eq!(index.repositories[0].id, "repo1");
505        assert_eq!(index.repositories[0].name, "MyLib");
506    }
507
508    #[test]
509    fn test_unified_symbol_index() {
510        let mut builder = MultiRepoIndexBuilder::new();
511
512        builder.add_repository("repo1", "Lib1", "/lib1", None);
513        builder.add_repository("repo2", "Lib2", "/lib2", None);
514
515        let symbols1 = vec![make_symbol("common_func", SymbolKind::Function, vec![])];
516        let symbols2 = vec![make_symbol("common_func", SymbolKind::Function, vec![])];
517
518        builder.add_file_symbols("repo1", "src/utils.rs", &symbols1);
519        builder.add_file_symbols("repo2", "src/helpers.rs", &symbols2);
520
521        let index = builder.build();
522        let query = MultiRepoQuery::new(&index);
523
524        let refs = query.find_symbol("common_func");
525        assert_eq!(refs.len(), 2);
526    }
527
528    #[test]
529    fn test_cross_repo_detection() {
530        let mut builder = MultiRepoIndexBuilder::new();
531
532        builder.add_repository("core", "Core", "/core", None);
533        builder.add_repository("app", "App", "/app", None);
534
535        // Add core symbol first
536        let core_symbols = vec![make_symbol("CoreUtil", SymbolKind::Class, vec![])];
537        builder.add_file_symbols("core", "src/util.rs", &core_symbols);
538
539        // Register import pattern
540        builder.register_import_path("core::", "core");
541
542        // App calls core
543        let app_symbols = vec![make_symbol("AppMain", SymbolKind::Class, vec!["core::CoreUtil"])];
544        builder.add_file_symbols("app", "src/main.rs", &app_symbols);
545
546        let index = builder.build();
547        let query = MultiRepoQuery::new(&index);
548
549        let deps = query.get_repo_dependencies("app");
550        // Should have detected a cross-repo call
551        assert!(deps.iter().any(|d| d.target_repo == "core"));
552    }
553
554    #[test]
555    fn test_dependency_graph() {
556        let mut builder = MultiRepoIndexBuilder::new();
557
558        builder.add_repository("a", "A", "/a", None);
559        builder.add_repository("b", "B", "/b", None);
560        builder.add_repository("c", "C", "/c", None);
561
562        // Add symbols
563        let a_symbols = vec![make_symbol("AClass", SymbolKind::Class, vec![])];
564        let b_symbols = vec![make_symbol("BClass", SymbolKind::Class, vec![])];
565        let c_symbols = vec![make_symbol("CClass", SymbolKind::Class, vec!["AClass", "BClass"])];
566
567        builder.add_file_symbols("a", "a.rs", &a_symbols);
568        builder.register_import_path("AClass", "a");
569
570        builder.add_file_symbols("b", "b.rs", &b_symbols);
571        builder.register_import_path("BClass", "b");
572
573        builder.add_file_symbols("c", "c.rs", &c_symbols);
574
575        let index = builder.build();
576        let query = MultiRepoQuery::new(&index);
577
578        let graph = query.get_dependency_graph();
579
580        // C depends on A and B
581        if let Some(c_deps) = graph.get("c") {
582            assert!(c_deps.contains(&"a".to_owned()) || c_deps.contains(&"b".to_owned()));
583        }
584    }
585
586    #[test]
587    fn test_stats() {
588        let mut builder = MultiRepoIndexBuilder::new();
589
590        builder.add_repository("repo1", "R1", "/r1", None);
591        builder.add_repository("repo2", "R2", "/r2", None);
592
593        let symbols = vec![
594            make_symbol("func1", SymbolKind::Function, vec![]),
595            make_symbol("func2", SymbolKind::Function, vec![]),
596        ];
597
598        builder.add_file_symbols("repo1", "file.rs", &symbols);
599        builder.add_file_symbols(
600            "repo2",
601            "file.rs",
602            &[make_symbol("func3", SymbolKind::Function, vec![])],
603        );
604
605        let index = builder.build();
606        let query = MultiRepoQuery::new(&index);
607
608        let stats = query.get_stats();
609
610        assert_eq!(stats.total_repos, 2);
611        assert!(stats.total_symbols >= 3);
612    }
613
614    #[test]
615    fn test_find_by_prefix() {
616        let mut builder = MultiRepoIndexBuilder::new();
617
618        builder.add_repository("repo", "Repo", "/repo", None);
619
620        let symbols = vec![
621            make_symbol("http_get", SymbolKind::Function, vec![]),
622            make_symbol("http_post", SymbolKind::Function, vec![]),
623            make_symbol("db_query", SymbolKind::Function, vec![]),
624        ];
625
626        builder.add_file_symbols("repo", "api.rs", &symbols);
627
628        let index = builder.build();
629        let query = MultiRepoQuery::new(&index);
630
631        let http_funcs = query.find_by_prefix("http_");
632        assert_eq!(http_funcs.len(), 2);
633    }
634}