rag_plusplus_core/index/
registry.rs

1//! Index Registry
2//!
3//! Manages multiple named indexes for multi-index retrieval scenarios.
4//!
5//! # Overview
6//!
7//! The registry provides a centralized way to manage multiple vector indexes,
8//! enabling multi-modal retrieval (e.g., separate indexes for text embeddings,
9//! code embeddings, and image embeddings).
10//!
11//! # Architecture
12//!
13//! ```text
14//! ┌─────────────────────────────────────────────────────────────┐
15//! │                    IndexRegistry                             │
16//! ├─────────────────────────────────────────────────────────────┤
17//! │  indexes: HashMap<String, Box<dyn VectorIndex>>              │
18//! ├─────────────────────────────────────────────────────────────┤
19//! │  + register(name, index)                                     │
20//! │  + get(name) -> &dyn VectorIndex                             │
21//! │  + get_mut(name) -> &mut dyn VectorIndex                     │
22//! │  + remove(name) -> Option<Box<dyn VectorIndex>>              │
23//! │  + list() -> Vec<&str>                                       │
24//! │  + search_all(query, k) -> MultiIndexResults                 │
25//! └─────────────────────────────────────────────────────────────┘
26//! ```
27
28use crate::error::{Error, Result};
29use crate::index::traits::{DistanceType, SearchResult, VectorIndex};
30use ahash::AHashMap;
31use parking_lot::RwLock;
32use std::sync::Arc;
33
34/// Information about a registered index.
35#[derive(Debug, Clone)]
36pub struct IndexInfo {
37    /// Index name
38    pub name: String,
39    /// Vector dimension
40    pub dimension: usize,
41    /// Distance metric
42    pub distance_type: DistanceType,
43    /// Number of vectors
44    pub size: usize,
45    /// Memory usage in bytes
46    pub memory_bytes: usize,
47}
48
49/// Results from a multi-index search.
50#[derive(Debug, Clone)]
51pub struct MultiIndexResult {
52    /// Index name this result came from
53    pub index_name: String,
54    /// Search results from this index
55    pub results: Vec<SearchResult>,
56}
57
58/// Results aggregated from multiple indexes.
59#[derive(Debug, Clone, Default)]
60pub struct MultiIndexResults {
61    /// Results per index
62    pub by_index: Vec<MultiIndexResult>,
63    /// Total results across all indexes
64    pub total_count: usize,
65}
66
67impl MultiIndexResults {
68    /// Create empty results.
69    #[must_use]
70    pub fn new() -> Self {
71        Self::default()
72    }
73
74    /// Add results from an index.
75    pub fn add(&mut self, index_name: String, results: Vec<SearchResult>) {
76        self.total_count += results.len();
77        self.by_index.push(MultiIndexResult {
78            index_name,
79            results,
80        });
81    }
82
83    /// Flatten all results into a single vector.
84    ///
85    /// Note: Results are not re-ranked; use fusion for proper merging.
86    #[must_use]
87    pub fn flatten(&self) -> Vec<(String, SearchResult)> {
88        self.by_index
89            .iter()
90            .flat_map(|mir| {
91                mir.results
92                    .iter()
93                    .cloned()
94                    .map(|r| (mir.index_name.clone(), r))
95            })
96            .collect()
97    }
98}
99
100/// Thread-safe registry for managing multiple named indexes.
101///
102/// # Example
103///
104/// ```ignore
105/// use rag_plusplus_core::index::{IndexRegistry, FlatIndex, IndexConfig};
106///
107/// let mut registry = IndexRegistry::new();
108///
109/// // Register indexes for different modalities
110/// let text_index = FlatIndex::new(IndexConfig::new(768));
111/// let code_index = FlatIndex::new(IndexConfig::new(512));
112///
113/// registry.register("text_embeddings", text_index)?;
114/// registry.register("code_embeddings", code_index)?;
115///
116/// // Search specific index
117/// let results = registry.search("text_embeddings", &query, 10)?;
118///
119/// // Search all indexes
120/// let all_results = registry.search_all(&query, 10)?;
121/// ```
122#[derive(Debug, Default)]
123pub struct IndexRegistry {
124    /// Named indexes
125    indexes: AHashMap<String, Box<dyn VectorIndex>>,
126}
127
128impl IndexRegistry {
129    /// Create a new empty registry.
130    #[must_use]
131    pub fn new() -> Self {
132        Self {
133            indexes: AHashMap::new(),
134        }
135    }
136
137    /// Create registry with pre-allocated capacity.
138    #[must_use]
139    pub fn with_capacity(capacity: usize) -> Self {
140        Self {
141            indexes: AHashMap::with_capacity(capacity),
142        }
143    }
144
145    /// Register a new index with the given name.
146    ///
147    /// # Errors
148    ///
149    /// Returns error if an index with the same name already exists.
150    pub fn register<I: VectorIndex + 'static>(
151        &mut self,
152        name: impl Into<String>,
153        index: I,
154    ) -> Result<()> {
155        let name = name.into();
156        if self.indexes.contains_key(&name) {
157            return Err(Error::DuplicateIndex { name });
158        }
159        self.indexes.insert(name, Box::new(index));
160        Ok(())
161    }
162
163    /// Register or replace an index.
164    ///
165    /// Returns the previous index if one existed.
166    pub fn register_or_replace<I: VectorIndex + 'static>(
167        &mut self,
168        name: impl Into<String>,
169        index: I,
170    ) -> Option<Box<dyn VectorIndex>> {
171        self.indexes.insert(name.into(), Box::new(index))
172    }
173
174    /// Get a reference to an index by name.
175    #[must_use]
176    pub fn get(&self, name: &str) -> Option<&dyn VectorIndex> {
177        self.indexes.get(name).map(AsRef::as_ref)
178    }
179
180    /// Remove an index by name.
181    ///
182    /// Returns the removed index if it existed.
183    pub fn remove(&mut self, name: &str) -> Option<Box<dyn VectorIndex>> {
184        self.indexes.remove(name)
185    }
186
187    /// Check if an index exists.
188    #[must_use]
189    pub fn contains(&self, name: &str) -> bool {
190        self.indexes.contains_key(name)
191    }
192
193    /// List all registered index names.
194    #[must_use]
195    pub fn list(&self) -> Vec<&str> {
196        self.indexes.keys().map(String::as_str).collect()
197    }
198
199    /// Get information about all registered indexes.
200    #[must_use]
201    pub fn info(&self) -> Vec<IndexInfo> {
202        self.indexes
203            .iter()
204            .map(|(name, index)| IndexInfo {
205                name: name.clone(),
206                dimension: index.dimension(),
207                distance_type: index.distance_type(),
208                size: index.len(),
209                memory_bytes: index.memory_usage(),
210            })
211            .collect()
212    }
213
214    /// Number of registered indexes.
215    #[must_use]
216    pub fn len(&self) -> usize {
217        self.indexes.len()
218    }
219
220    /// Check if registry is empty.
221    #[must_use]
222    pub fn is_empty(&self) -> bool {
223        self.indexes.is_empty()
224    }
225
226    /// Total number of vectors across all indexes.
227    #[must_use]
228    pub fn total_vectors(&self) -> usize {
229        self.indexes.values().map(|i| i.len()).sum()
230    }
231
232    /// Total memory usage across all indexes.
233    #[must_use]
234    pub fn total_memory(&self) -> usize {
235        self.indexes.values().map(|i| i.memory_usage()).sum()
236    }
237
238    /// Search a specific index by name.
239    ///
240    /// # Errors
241    ///
242    /// Returns error if index doesn't exist or search fails.
243    pub fn search(&self, name: &str, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
244        let index = self.indexes.get(name).ok_or_else(|| Error::IndexNotFound {
245            name: name.to_string(),
246        })?;
247        index.search(query, k)
248    }
249
250    /// Search all indexes with the same query.
251    ///
252    /// Note: This is a sequential search. For parallel search, use `parallel_search_all`.
253    ///
254    /// # Errors
255    ///
256    /// Returns error if any search fails.
257    pub fn search_all(&self, query: &[f32], k: usize) -> Result<MultiIndexResults> {
258        let mut results = MultiIndexResults::new();
259
260        for (name, index) in &self.indexes {
261            // Skip indexes with incompatible dimensions
262            if index.dimension() != query.len() {
263                continue;
264            }
265
266            let index_results = index.search(query, k)?;
267            results.add(name.clone(), index_results);
268        }
269
270        Ok(results)
271    }
272
273    /// Search multiple specific indexes.
274    ///
275    /// # Arguments
276    ///
277    /// * `names` - Index names to search
278    /// * `query` - Query vector
279    /// * `k` - Number of results per index
280    ///
281    /// # Errors
282    ///
283    /// Returns error if any index doesn't exist or search fails.
284    pub fn search_indexes(
285        &self,
286        names: &[&str],
287        query: &[f32],
288        k: usize,
289    ) -> Result<MultiIndexResults> {
290        let mut results = MultiIndexResults::new();
291
292        for name in names {
293            let index = self.indexes.get(*name).ok_or_else(|| Error::IndexNotFound {
294                name: (*name).to_string(),
295            })?;
296
297            // Check dimension compatibility
298            if index.dimension() != query.len() {
299                return Err(Error::DimensionMismatch {
300                    expected: index.dimension(),
301                    got: query.len(),
302                });
303            }
304
305            let index_results = index.search(query, k)?;
306            results.add((*name).to_string(), index_results);
307        }
308
309        Ok(results)
310    }
311
312    /// Add a vector to a specific index.
313    ///
314    /// # Errors
315    ///
316    /// Returns error if index doesn't exist or add fails.
317    pub fn add(&mut self, index_name: &str, id: String, vector: &[f32]) -> Result<()> {
318        let index = self
319            .indexes
320            .get_mut(index_name)
321            .ok_or_else(|| Error::IndexNotFound {
322                name: index_name.to_string(),
323            })?;
324        index.add(id, vector)
325    }
326
327    /// Remove a vector from a specific index.
328    ///
329    /// # Errors
330    ///
331    /// Returns error if index doesn't exist.
332    pub fn remove_vector(&mut self, index_name: &str, id: &str) -> Result<bool> {
333        let index = self
334            .indexes
335            .get_mut(index_name)
336            .ok_or_else(|| Error::IndexNotFound {
337                name: index_name.to_string(),
338            })?;
339        index.remove(id)
340    }
341
342    /// Clear all vectors from all indexes.
343    pub fn clear_all(&mut self) {
344        for index in self.indexes.values_mut() {
345            index.clear();
346        }
347    }
348}
349
350/// Thread-safe shared registry using Arc<RwLock>.
351pub type SharedRegistry = Arc<RwLock<IndexRegistry>>;
352
353/// Create a new shared registry.
354#[must_use]
355pub fn shared_registry() -> SharedRegistry {
356    Arc::new(RwLock::new(IndexRegistry::new()))
357}
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362    use crate::index::{FlatIndex, IndexConfig};
363
364    fn create_test_index(dim: usize) -> FlatIndex {
365        FlatIndex::new(IndexConfig::new(dim))
366    }
367
368    #[test]
369    fn test_register_and_get() {
370        let mut registry = IndexRegistry::new();
371        let index = create_test_index(128);
372
373        registry.register("test", index).unwrap();
374
375        assert!(registry.contains("test"));
376        assert!(!registry.contains("other"));
377        assert_eq!(registry.len(), 1);
378
379        let retrieved = registry.get("test").unwrap();
380        assert_eq!(retrieved.dimension(), 128);
381    }
382
383    #[test]
384    fn test_duplicate_register_error() {
385        let mut registry = IndexRegistry::new();
386
387        registry.register("test", create_test_index(128)).unwrap();
388        let result = registry.register("test", create_test_index(256));
389
390        assert!(result.is_err());
391    }
392
393    #[test]
394    fn test_register_or_replace() {
395        let mut registry = IndexRegistry::new();
396
397        // First registration
398        let old = registry.register_or_replace("test", create_test_index(128));
399        assert!(old.is_none());
400
401        // Replace
402        let old = registry.register_or_replace("test", create_test_index(256));
403        assert!(old.is_some());
404        assert_eq!(old.unwrap().dimension(), 128);
405
406        // New index has new dimension
407        assert_eq!(registry.get("test").unwrap().dimension(), 256);
408    }
409
410    #[test]
411    fn test_remove() {
412        let mut registry = IndexRegistry::new();
413        registry.register("test", create_test_index(128)).unwrap();
414
415        let removed = registry.remove("test");
416        assert!(removed.is_some());
417        assert_eq!(removed.unwrap().dimension(), 128);
418        assert!(registry.is_empty());
419    }
420
421    #[test]
422    fn test_list_and_info() {
423        let mut registry = IndexRegistry::new();
424        registry.register("a", create_test_index(128)).unwrap();
425        registry.register("b", create_test_index(256)).unwrap();
426
427        let names = registry.list();
428        assert_eq!(names.len(), 2);
429        assert!(names.contains(&"a"));
430        assert!(names.contains(&"b"));
431
432        let info = registry.info();
433        assert_eq!(info.len(), 2);
434    }
435
436    #[test]
437    fn test_search_specific_index() {
438        let mut registry = IndexRegistry::new();
439        let mut index = create_test_index(4);
440
441        // Add vectors
442        index.add("v1".to_string(), &[1.0, 0.0, 0.0, 0.0]).unwrap();
443        index.add("v2".to_string(), &[0.0, 1.0, 0.0, 0.0]).unwrap();
444
445        registry.register("test", index).unwrap();
446
447        let query = [1.0, 0.0, 0.0, 0.0];
448        let results = registry.search("test", &query, 2).unwrap();
449
450        assert_eq!(results.len(), 2);
451        assert_eq!(results[0].id, "v1"); // Closest to query
452    }
453
454    #[test]
455    fn test_search_nonexistent_index() {
456        let registry = IndexRegistry::new();
457        let result = registry.search("nonexistent", &[1.0], 1);
458
459        assert!(result.is_err());
460    }
461
462    #[test]
463    fn test_search_all() {
464        let mut registry = IndexRegistry::new();
465
466        // Create two indexes with same dimension
467        let mut index1 = create_test_index(4);
468        index1.add("a1".to_string(), &[1.0, 0.0, 0.0, 0.0]).unwrap();
469
470        let mut index2 = create_test_index(4);
471        index2.add("b1".to_string(), &[0.0, 1.0, 0.0, 0.0]).unwrap();
472
473        registry.register("index1", index1).unwrap();
474        registry.register("index2", index2).unwrap();
475
476        let query = [0.5, 0.5, 0.0, 0.0];
477        let results = registry.search_all(&query, 10).unwrap();
478
479        assert_eq!(results.by_index.len(), 2);
480        assert_eq!(results.total_count, 2);
481    }
482
483    #[test]
484    fn test_search_all_skips_incompatible_dimensions() {
485        let mut registry = IndexRegistry::new();
486
487        let mut index1 = create_test_index(4);
488        index1.add("a1".to_string(), &[1.0, 0.0, 0.0, 0.0]).unwrap();
489
490        let mut index2 = create_test_index(8); // Different dimension
491        index2
492            .add("b1".to_string(), &[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
493            .unwrap();
494
495        registry.register("index1", index1).unwrap();
496        registry.register("index2", index2).unwrap();
497
498        // Query with dimension 4 - should only search index1
499        let query = [0.5, 0.5, 0.0, 0.0];
500        let results = registry.search_all(&query, 10).unwrap();
501
502        assert_eq!(results.by_index.len(), 1);
503        assert_eq!(results.by_index[0].index_name, "index1");
504    }
505
506    #[test]
507    fn test_search_indexes() {
508        let mut registry = IndexRegistry::new();
509
510        let mut index1 = create_test_index(4);
511        index1.add("a1".to_string(), &[1.0, 0.0, 0.0, 0.0]).unwrap();
512
513        let mut index2 = create_test_index(4);
514        index2.add("b1".to_string(), &[0.0, 1.0, 0.0, 0.0]).unwrap();
515
516        let mut index3 = create_test_index(4);
517        index3.add("c1".to_string(), &[0.0, 0.0, 1.0, 0.0]).unwrap();
518
519        registry.register("idx1", index1).unwrap();
520        registry.register("idx2", index2).unwrap();
521        registry.register("idx3", index3).unwrap();
522
523        // Only search idx1 and idx2
524        let query = [0.5, 0.5, 0.0, 0.0];
525        let results = registry
526            .search_indexes(&["idx1", "idx2"], &query, 10)
527            .unwrap();
528
529        assert_eq!(results.by_index.len(), 2);
530        assert_eq!(results.total_count, 2);
531    }
532
533    #[test]
534    fn test_add_to_index() {
535        let mut registry = IndexRegistry::new();
536        registry.register("test", create_test_index(4)).unwrap();
537
538        registry
539            .add("test", "v1".to_string(), &[1.0, 0.0, 0.0, 0.0])
540            .unwrap();
541
542        assert_eq!(registry.get("test").unwrap().len(), 1);
543    }
544
545    #[test]
546    fn test_multi_index_results_flatten() {
547        let mut results = MultiIndexResults::new();
548
549        results.add(
550            "idx1".to_string(),
551            vec![SearchResult::new("a".to_string(), 0.5, DistanceType::L2)],
552        );
553        results.add(
554            "idx2".to_string(),
555            vec![SearchResult::new("b".to_string(), 0.3, DistanceType::L2)],
556        );
557
558        let flat = results.flatten();
559        assert_eq!(flat.len(), 2);
560        assert_eq!(flat[0].0, "idx1");
561        assert_eq!(flat[0].1.id, "a");
562        assert_eq!(flat[1].0, "idx2");
563        assert_eq!(flat[1].1.id, "b");
564    }
565
566    #[test]
567    fn test_total_vectors_and_memory() {
568        let mut registry = IndexRegistry::new();
569
570        let mut index1 = create_test_index(4);
571        index1.add("a".to_string(), &[1.0, 0.0, 0.0, 0.0]).unwrap();
572        index1.add("b".to_string(), &[0.0, 1.0, 0.0, 0.0]).unwrap();
573
574        let mut index2 = create_test_index(4);
575        index2.add("c".to_string(), &[0.0, 0.0, 1.0, 0.0]).unwrap();
576
577        registry.register("idx1", index1).unwrap();
578        registry.register("idx2", index2).unwrap();
579
580        assert_eq!(registry.total_vectors(), 3);
581        assert!(registry.total_memory() > 0);
582    }
583
584    #[test]
585    fn test_clear_all() {
586        let mut registry = IndexRegistry::new();
587
588        let mut index1 = create_test_index(4);
589        index1.add("a".to_string(), &[1.0, 0.0, 0.0, 0.0]).unwrap();
590
591        let mut index2 = create_test_index(4);
592        index2.add("b".to_string(), &[0.0, 1.0, 0.0, 0.0]).unwrap();
593
594        registry.register("idx1", index1).unwrap();
595        registry.register("idx2", index2).unwrap();
596
597        assert_eq!(registry.total_vectors(), 2);
598
599        registry.clear_all();
600
601        assert_eq!(registry.total_vectors(), 0);
602        assert_eq!(registry.len(), 2); // Indexes still exist, just empty
603    }
604
605    #[test]
606    fn test_shared_registry() {
607        let registry = shared_registry();
608
609        // Write access
610        {
611            let mut reg = registry.write();
612            reg.register("test", create_test_index(128)).unwrap();
613        }
614
615        // Read access
616        {
617            let reg = registry.read();
618            assert!(reg.contains("test"));
619        }
620    }
621}