oxirs_vec/
dynamic_index_selector.rs

1//! Dynamic index selection for optimal query performance
2//!
3//! This module provides runtime index selection based on query characteristics,
4//! automatically choosing the best index type (HNSW, NSG, IVF, PQ, etc.) for each query.
5//!
6//! # Features
7//!
8//! - **Automatic Strategy Selection**: Uses cost-based query planning
9//! - **Multiple Index Support**: Maintains multiple indices for different use cases
10//! - **Performance Learning**: Tracks actual performance to improve future selections
11//! - **Adaptive Parameters**: Automatically tunes parameters based on query requirements
12//!
13//! # Example
14//!
15//! ```rust,ignore
16//! use oxirs_vec::dynamic_index_selector::{DynamicIndexSelector, IndexSelectorConfig};
17//! use oxirs_vec::{Vector, VectorIndex};
18//!
19//! let config = IndexSelectorConfig::default();
20//! let mut selector = DynamicIndexSelector::new(config).unwrap();
21//!
22//! // Add vectors - they'll be indexed in all configured indices
23//! for i in 0..1000 {
24//!     let vec = Vector::new(vec![i as f32, (i * 2) as f32]);
25//!     selector.add(format!("vec_{}", i), vec).unwrap();
26//! }
27//!
28//! // Build all indices
29//! selector.build().unwrap();
30//!
31//! // Search - automatically selects best index
32//! let query = Vector::new(vec![500.0, 1000.0]);
33//! let results = selector.search_knn(&query, 10).unwrap();
34//! ```
35
36use crate::query_planning::*;
37use crate::{hnsw::HnswIndex, ivf::IvfIndex, lsh::LshIndex, nsg::NsgIndex};
38use crate::{Vector, VectorIndex};
39use anyhow::Result;
40use std::collections::HashMap;
41use std::sync::{Arc, RwLock};
42use tracing::{debug, info};
43
44/// Configuration for dynamic index selector
45#[derive(Debug, Clone)]
46pub struct IndexSelectorConfig {
47    /// Enable HNSW index
48    pub enable_hnsw: bool,
49    /// Enable NSG index
50    pub enable_nsg: bool,
51    /// Enable IVF index
52    pub enable_ivf: bool,
53    /// Enable LSH index
54    pub enable_lsh: bool,
55    /// Minimum recall requirement (0.0 to 1.0)
56    pub min_recall: f32,
57    /// Maximum acceptable latency (milliseconds)
58    pub max_latency_ms: f64,
59    /// Enable performance learning
60    pub enable_learning: bool,
61    /// Build all indices immediately
62    pub eager_build: bool,
63}
64
65impl Default for IndexSelectorConfig {
66    fn default() -> Self {
67        Self {
68            enable_hnsw: true,
69            enable_nsg: true,
70            enable_ivf: true,
71            enable_lsh: false, // LSH is less commonly used
72            min_recall: 0.90,
73            max_latency_ms: 100.0,
74            enable_learning: true,
75            eager_build: true,
76        }
77    }
78}
79
80/// Dynamic index selector with multiple index backends
81pub struct DynamicIndexSelector {
82    config: IndexSelectorConfig,
83    hnsw_index: Option<HnswIndex>,
84    nsg_index: Option<NsgIndex>,
85    ivf_index: Option<IvfIndex>,
86    lsh_index: Option<LshIndex>,
87    query_planner: Arc<RwLock<QueryPlanner>>,
88    data: Vec<(String, Vector)>,
89    is_built: bool,
90    performance_stats: Arc<RwLock<PerformanceStats>>,
91}
92
93/// Performance statistics for learning
94#[derive(Debug, Clone, Default)]
95struct PerformanceStats {
96    strategy_latencies: HashMap<QueryStrategy, Vec<f64>>,
97    strategy_recalls: HashMap<QueryStrategy, Vec<f32>>,
98    total_queries: usize,
99}
100
101impl PerformanceStats {
102    fn record(&mut self, strategy: QueryStrategy, latency_ms: f64, recall: f32) {
103        self.strategy_latencies
104            .entry(strategy)
105            .or_default()
106            .push(latency_ms);
107
108        self.strategy_recalls
109            .entry(strategy)
110            .or_default()
111            .push(recall);
112
113        self.total_queries += 1;
114    }
115
116    fn avg_latency(&self, strategy: QueryStrategy) -> Option<f64> {
117        self.strategy_latencies
118            .get(&strategy)
119            .and_then(|latencies| {
120                if latencies.is_empty() {
121                    None
122                } else {
123                    Some(latencies.iter().sum::<f64>() / latencies.len() as f64)
124                }
125            })
126    }
127
128    fn avg_recall(&self, strategy: QueryStrategy) -> Option<f32> {
129        self.strategy_recalls.get(&strategy).and_then(|recalls| {
130            if recalls.is_empty() {
131                None
132            } else {
133                Some(recalls.iter().sum::<f32>() / recalls.len() as f32)
134            }
135        })
136    }
137}
138
139impl DynamicIndexSelector {
140    /// Create a new dynamic index selector
141    pub fn new(config: IndexSelectorConfig) -> Result<Self> {
142        // Determine available indices based on config
143        let mut available_indices = Vec::new();
144        if config.enable_hnsw {
145            available_indices.push(QueryStrategy::HnswApproximate);
146        }
147        if config.enable_nsg {
148            available_indices.push(QueryStrategy::NsgApproximate);
149        }
150        if config.enable_ivf {
151            available_indices.push(QueryStrategy::IvfCoarse);
152        }
153        if config.enable_lsh {
154            available_indices.push(QueryStrategy::LocalitySensitiveHashing);
155        }
156
157        if available_indices.is_empty() {
158            return Err(anyhow::anyhow!("At least one index type must be enabled"));
159        }
160
161        // Create initial index statistics
162        let index_stats = IndexStatistics {
163            vector_count: 0,
164            dimensions: 0,
165            available_indices,
166            avg_latencies: HashMap::new(),
167            avg_recalls: HashMap::new(),
168        };
169
170        let cost_model = CostModel::default();
171        let query_planner = Arc::new(RwLock::new(QueryPlanner::new(cost_model, index_stats)));
172
173        Ok(Self {
174            config,
175            hnsw_index: None,
176            nsg_index: None,
177            ivf_index: None,
178            lsh_index: None,
179            query_planner,
180            data: Vec::new(),
181            is_built: false,
182            performance_stats: Arc::new(RwLock::new(PerformanceStats::default())),
183        })
184    }
185
186    /// Add a vector to all enabled indices
187    pub fn add(&mut self, uri: String, vector: Vector) -> Result<()> {
188        if self.is_built && self.config.eager_build {
189            return Err(anyhow::anyhow!(
190                "Cannot add vectors after indices are built in eager mode"
191            ));
192        }
193
194        self.data.push((uri, vector));
195        Ok(())
196    }
197
198    /// Build all enabled indices
199    pub fn build(&mut self) -> Result<()> {
200        if self.data.is_empty() {
201            return Err(anyhow::anyhow!("No vectors to index"));
202        }
203
204        let dimensions = self.data[0].1.dimensions;
205        let vector_count = self.data.len();
206
207        info!(
208            "Building dynamic index selector with {} vectors, {} dimensions",
209            vector_count, dimensions
210        );
211
212        // Build HNSW index
213        if self.config.enable_hnsw {
214            debug!("Building HNSW index");
215            let mut hnsw = HnswIndex::new(Default::default())?;
216            for (uri, vec) in &self.data {
217                hnsw.insert(uri.clone(), vec.clone())?;
218            }
219            self.hnsw_index = Some(hnsw);
220        }
221
222        // Build NSG index
223        if self.config.enable_nsg {
224            debug!("Building NSG index");
225            let mut nsg = NsgIndex::new(Default::default())?;
226            for (uri, vec) in &self.data {
227                nsg.insert(uri.clone(), vec.clone())?;
228            }
229            nsg.build()?;
230            self.nsg_index = Some(nsg);
231        }
232
233        // Build IVF index
234        if self.config.enable_ivf {
235            debug!("Building IVF index");
236            let mut ivf = IvfIndex::new(Default::default())?;
237            for (uri, vec) in &self.data {
238                ivf.insert(uri.clone(), vec.clone())?;
239            }
240            // IVF trains clusters automatically during insertion
241            self.ivf_index = Some(ivf);
242        }
243
244        // Build LSH index
245        if self.config.enable_lsh {
246            debug!("Building LSH index");
247            let lsh = LshIndex::new(Default::default());
248            let mut lsh_mut = lsh;
249            for (uri, vec) in &self.data {
250                lsh_mut.insert(uri.clone(), vec.clone())?;
251            }
252            self.lsh_index = Some(lsh_mut);
253        }
254
255        // Update query planner statistics
256        let mut planner = self.query_planner.write().unwrap();
257        planner.update_index_metadata(vector_count, dimensions);
258
259        self.is_built = true;
260
261        info!("Dynamic index selector built successfully");
262
263        Ok(())
264    }
265
266    /// Search with automatic index selection
267    pub fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>> {
268        if !self.is_built {
269            return Err(anyhow::anyhow!("Indices not built. Call build() first."));
270        }
271
272        // Create query characteristics
273        let query_chars = QueryCharacteristics {
274            k,
275            dimensions: query.dimensions,
276            min_recall: self.config.min_recall,
277            max_latency_ms: self.config.max_latency_ms,
278            query_type: VectorQueryType::Single,
279        };
280
281        // Get query plan
282        let planner = self.query_planner.read().unwrap();
283        let plan = planner.plan(&query_chars)?;
284        drop(planner); // Release read lock
285
286        debug!(
287            "Selected strategy: {:?} (estimated cost: {:.2} µs, recall: {:.2})",
288            plan.strategy, plan.estimated_cost_us, plan.estimated_recall
289        );
290
291        // Execute query using selected strategy
292        let start = std::time::Instant::now();
293        let results = self.execute_strategy(plan.strategy, query, k)?;
294        let elapsed = start.elapsed().as_secs_f64() * 1000.0; // Convert to ms
295
296        // Record performance if learning is enabled
297        if self.config.enable_learning {
298            let mut stats = self.performance_stats.write().unwrap();
299            stats.record(plan.strategy, elapsed, plan.estimated_recall);
300            drop(stats);
301
302            // Update query planner with actual performance
303            let mut planner = self.query_planner.write().unwrap();
304            if let Some(avg_latency) = self
305                .performance_stats
306                .read()
307                .unwrap()
308                .avg_latency(plan.strategy)
309            {
310                planner.update_statistics(plan.strategy, avg_latency, plan.estimated_recall);
311            }
312        }
313
314        Ok(results)
315    }
316
317    /// Execute query using specific strategy
318    fn execute_strategy(
319        &self,
320        strategy: QueryStrategy,
321        query: &Vector,
322        k: usize,
323    ) -> Result<Vec<(String, f32)>> {
324        match strategy {
325            QueryStrategy::HnswApproximate => {
326                if let Some(ref index) = self.hnsw_index {
327                    index.search_knn(query, k)
328                } else {
329                    Err(anyhow::anyhow!("HNSW index not available"))
330                }
331            }
332            QueryStrategy::NsgApproximate => {
333                if let Some(ref index) = self.nsg_index {
334                    index.search_knn(query, k)
335                } else {
336                    Err(anyhow::anyhow!("NSG index not available"))
337                }
338            }
339            QueryStrategy::IvfCoarse => {
340                if let Some(ref index) = self.ivf_index {
341                    index.search_knn(query, k)
342                } else {
343                    Err(anyhow::anyhow!("IVF index not available"))
344                }
345            }
346            QueryStrategy::LocalitySensitiveHashing => {
347                if let Some(ref index) = self.lsh_index {
348                    index.search_knn(query, k)
349                } else {
350                    Err(anyhow::anyhow!("LSH index not available"))
351                }
352            }
353            _ => Err(anyhow::anyhow!(
354                "Strategy {:?} not supported by dynamic selector",
355                strategy
356            )),
357        }
358    }
359
360    /// Get performance statistics
361    pub fn get_stats(&self) -> HashMap<String, String> {
362        let mut stats = HashMap::new();
363        let perf_stats = self.performance_stats.read().unwrap();
364
365        stats.insert(
366            "total_queries".to_string(),
367            perf_stats.total_queries.to_string(),
368        );
369        stats.insert("vector_count".to_string(), self.data.len().to_string());
370        stats.insert("is_built".to_string(), self.is_built.to_string());
371
372        // Add per-strategy stats
373        for strategy in &[
374            QueryStrategy::HnswApproximate,
375            QueryStrategy::NsgApproximate,
376            QueryStrategy::IvfCoarse,
377            QueryStrategy::LocalitySensitiveHashing,
378        ] {
379            if let Some(avg_lat) = perf_stats.avg_latency(*strategy) {
380                stats.insert(
381                    format!("{:?}_avg_latency_ms", strategy),
382                    format!("{:.2}", avg_lat),
383                );
384            }
385            if let Some(avg_rec) = perf_stats.avg_recall(*strategy) {
386                stats.insert(
387                    format!("{:?}_avg_recall", strategy),
388                    format!("{:.2}", avg_rec),
389                );
390            }
391        }
392
393        stats
394    }
395
396    /// Check if indices are built
397    pub fn is_built(&self) -> bool {
398        self.is_built
399    }
400
401    /// Get number of vectors
402    pub fn len(&self) -> usize {
403        self.data.len()
404    }
405
406    /// Check if empty
407    pub fn is_empty(&self) -> bool {
408        self.data.is_empty()
409    }
410}
411
412#[cfg(test)]
413mod tests {
414    use super::*;
415
416    #[test]
417    fn test_dynamic_selector_creation() {
418        let config = IndexSelectorConfig::default();
419        let selector = DynamicIndexSelector::new(config);
420        assert!(selector.is_ok());
421    }
422
423    #[test]
424    fn test_add_vectors() {
425        let config = IndexSelectorConfig::default();
426        let mut selector = DynamicIndexSelector::new(config).unwrap();
427
428        for i in 0..10 {
429            let vec = Vector::new(vec![i as f32, (i * 2) as f32]);
430            selector.add(format!("vec_{}", i), vec).unwrap();
431        }
432
433        assert_eq!(selector.len(), 10);
434    }
435
436    #[test]
437    fn test_build_and_search() {
438        let config = IndexSelectorConfig {
439            enable_hnsw: true,
440            enable_nsg: true,
441            enable_ivf: false, // Disable IVF to speed up test
442            enable_lsh: false,
443            ..Default::default()
444        };
445        let mut selector = DynamicIndexSelector::new(config).unwrap();
446
447        // Add test vectors
448        for i in 0..50 {
449            let vec = Vector::new(vec![i as f32, (i * 2) as f32, (i * 3) as f32]);
450            selector.add(format!("vec_{}", i), vec).unwrap();
451        }
452
453        // Build indices
454        selector.build().unwrap();
455        assert!(selector.is_built());
456
457        // Search
458        let query = Vector::new(vec![25.0, 50.0, 75.0]);
459        let results = selector.search_knn(&query, 5).unwrap();
460
461        assert_eq!(results.len(), 5);
462        // Results should be sorted by similarity
463        for i in 1..results.len() {
464            assert!(results[i - 1].1 >= results[i].1);
465        }
466    }
467
468    #[test]
469    fn test_performance_learning() {
470        let config = IndexSelectorConfig {
471            enable_hnsw: true,
472            enable_nsg: true,
473            enable_ivf: false, // Disable IVF to avoid training requirement
474            enable_lsh: false,
475            enable_learning: true,
476            ..Default::default()
477        };
478        let mut selector = DynamicIndexSelector::new(config).unwrap();
479
480        // Add vectors
481        for i in 0..30 {
482            let vec = Vector::new(vec![i as f32, (i * 2) as f32]);
483            selector.add(format!("vec_{}", i), vec).unwrap();
484        }
485
486        selector.build().unwrap();
487
488        // Perform multiple searches to build up statistics
489        for _ in 0..5 {
490            let query = Vector::new(vec![15.0, 30.0]);
491            let _ = selector.search_knn(&query, 5);
492        }
493
494        // Check that statistics were recorded
495        let stats = selector.get_stats();
496        assert!(stats.contains_key("total_queries"));
497        let total_queries: usize = stats.get("total_queries").unwrap().parse().unwrap();
498        assert!(total_queries >= 5);
499    }
500}