1use crate::query_planning::*;
37use crate::{hnsw::HnswIndex, ivf::IvfIndex, lsh::LshIndex, nsg::NsgIndex};
38use crate::{Vector, VectorIndex};
39use anyhow::Result;
40use std::collections::HashMap;
41use std::sync::{Arc, RwLock};
42use tracing::{debug, info};
43
44#[derive(Debug, Clone)]
46pub struct IndexSelectorConfig {
47 pub enable_hnsw: bool,
49 pub enable_nsg: bool,
51 pub enable_ivf: bool,
53 pub enable_lsh: bool,
55 pub min_recall: f32,
57 pub max_latency_ms: f64,
59 pub enable_learning: bool,
61 pub eager_build: bool,
63}
64
65impl Default for IndexSelectorConfig {
66 fn default() -> Self {
67 Self {
68 enable_hnsw: true,
69 enable_nsg: true,
70 enable_ivf: true,
71 enable_lsh: false, min_recall: 0.90,
73 max_latency_ms: 100.0,
74 enable_learning: true,
75 eager_build: true,
76 }
77 }
78}
79
80pub struct DynamicIndexSelector {
82 config: IndexSelectorConfig,
83 hnsw_index: Option<HnswIndex>,
84 nsg_index: Option<NsgIndex>,
85 ivf_index: Option<IvfIndex>,
86 lsh_index: Option<LshIndex>,
87 query_planner: Arc<RwLock<QueryPlanner>>,
88 data: Vec<(String, Vector)>,
89 is_built: bool,
90 performance_stats: Arc<RwLock<PerformanceStats>>,
91}
92
93#[derive(Debug, Clone, Default)]
95struct PerformanceStats {
96 strategy_latencies: HashMap<QueryStrategy, Vec<f64>>,
97 strategy_recalls: HashMap<QueryStrategy, Vec<f32>>,
98 total_queries: usize,
99}
100
101impl PerformanceStats {
102 fn record(&mut self, strategy: QueryStrategy, latency_ms: f64, recall: f32) {
103 self.strategy_latencies
104 .entry(strategy)
105 .or_default()
106 .push(latency_ms);
107
108 self.strategy_recalls
109 .entry(strategy)
110 .or_default()
111 .push(recall);
112
113 self.total_queries += 1;
114 }
115
116 fn avg_latency(&self, strategy: QueryStrategy) -> Option<f64> {
117 self.strategy_latencies
118 .get(&strategy)
119 .and_then(|latencies| {
120 if latencies.is_empty() {
121 None
122 } else {
123 Some(latencies.iter().sum::<f64>() / latencies.len() as f64)
124 }
125 })
126 }
127
128 fn avg_recall(&self, strategy: QueryStrategy) -> Option<f32> {
129 self.strategy_recalls.get(&strategy).and_then(|recalls| {
130 if recalls.is_empty() {
131 None
132 } else {
133 Some(recalls.iter().sum::<f32>() / recalls.len() as f32)
134 }
135 })
136 }
137}
138
139impl DynamicIndexSelector {
140 pub fn new(config: IndexSelectorConfig) -> Result<Self> {
142 let mut available_indices = Vec::new();
144 if config.enable_hnsw {
145 available_indices.push(QueryStrategy::HnswApproximate);
146 }
147 if config.enable_nsg {
148 available_indices.push(QueryStrategy::NsgApproximate);
149 }
150 if config.enable_ivf {
151 available_indices.push(QueryStrategy::IvfCoarse);
152 }
153 if config.enable_lsh {
154 available_indices.push(QueryStrategy::LocalitySensitiveHashing);
155 }
156
157 if available_indices.is_empty() {
158 return Err(anyhow::anyhow!("At least one index type must be enabled"));
159 }
160
161 let index_stats = IndexStatistics {
163 vector_count: 0,
164 dimensions: 0,
165 available_indices,
166 avg_latencies: HashMap::new(),
167 avg_recalls: HashMap::new(),
168 };
169
170 let cost_model = CostModel::default();
171 let query_planner = Arc::new(RwLock::new(QueryPlanner::new(cost_model, index_stats)));
172
173 Ok(Self {
174 config,
175 hnsw_index: None,
176 nsg_index: None,
177 ivf_index: None,
178 lsh_index: None,
179 query_planner,
180 data: Vec::new(),
181 is_built: false,
182 performance_stats: Arc::new(RwLock::new(PerformanceStats::default())),
183 })
184 }
185
186 pub fn add(&mut self, uri: String, vector: Vector) -> Result<()> {
188 if self.is_built && self.config.eager_build {
189 return Err(anyhow::anyhow!(
190 "Cannot add vectors after indices are built in eager mode"
191 ));
192 }
193
194 self.data.push((uri, vector));
195 Ok(())
196 }
197
198 pub fn build(&mut self) -> Result<()> {
200 if self.data.is_empty() {
201 return Err(anyhow::anyhow!("No vectors to index"));
202 }
203
204 let dimensions = self.data[0].1.dimensions;
205 let vector_count = self.data.len();
206
207 info!(
208 "Building dynamic index selector with {} vectors, {} dimensions",
209 vector_count, dimensions
210 );
211
212 if self.config.enable_hnsw {
214 debug!("Building HNSW index");
215 let mut hnsw = HnswIndex::new(Default::default())?;
216 for (uri, vec) in &self.data {
217 hnsw.insert(uri.clone(), vec.clone())?;
218 }
219 self.hnsw_index = Some(hnsw);
220 }
221
222 if self.config.enable_nsg {
224 debug!("Building NSG index");
225 let mut nsg = NsgIndex::new(Default::default())?;
226 for (uri, vec) in &self.data {
227 nsg.insert(uri.clone(), vec.clone())?;
228 }
229 nsg.build()?;
230 self.nsg_index = Some(nsg);
231 }
232
233 if self.config.enable_ivf {
235 debug!("Building IVF index");
236 let mut ivf = IvfIndex::new(Default::default())?;
237 for (uri, vec) in &self.data {
238 ivf.insert(uri.clone(), vec.clone())?;
239 }
240 self.ivf_index = Some(ivf);
242 }
243
244 if self.config.enable_lsh {
246 debug!("Building LSH index");
247 let lsh = LshIndex::new(Default::default());
248 let mut lsh_mut = lsh;
249 for (uri, vec) in &self.data {
250 lsh_mut.insert(uri.clone(), vec.clone())?;
251 }
252 self.lsh_index = Some(lsh_mut);
253 }
254
255 let mut planner = self.query_planner.write().unwrap();
257 planner.update_index_metadata(vector_count, dimensions);
258
259 self.is_built = true;
260
261 info!("Dynamic index selector built successfully");
262
263 Ok(())
264 }
265
266 pub fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>> {
268 if !self.is_built {
269 return Err(anyhow::anyhow!("Indices not built. Call build() first."));
270 }
271
272 let query_chars = QueryCharacteristics {
274 k,
275 dimensions: query.dimensions,
276 min_recall: self.config.min_recall,
277 max_latency_ms: self.config.max_latency_ms,
278 query_type: VectorQueryType::Single,
279 };
280
281 let planner = self.query_planner.read().unwrap();
283 let plan = planner.plan(&query_chars)?;
284 drop(planner); debug!(
287 "Selected strategy: {:?} (estimated cost: {:.2} µs, recall: {:.2})",
288 plan.strategy, plan.estimated_cost_us, plan.estimated_recall
289 );
290
291 let start = std::time::Instant::now();
293 let results = self.execute_strategy(plan.strategy, query, k)?;
294 let elapsed = start.elapsed().as_secs_f64() * 1000.0; if self.config.enable_learning {
298 let mut stats = self.performance_stats.write().unwrap();
299 stats.record(plan.strategy, elapsed, plan.estimated_recall);
300 drop(stats);
301
302 let mut planner = self.query_planner.write().unwrap();
304 if let Some(avg_latency) = self
305 .performance_stats
306 .read()
307 .unwrap()
308 .avg_latency(plan.strategy)
309 {
310 planner.update_statistics(plan.strategy, avg_latency, plan.estimated_recall);
311 }
312 }
313
314 Ok(results)
315 }
316
317 fn execute_strategy(
319 &self,
320 strategy: QueryStrategy,
321 query: &Vector,
322 k: usize,
323 ) -> Result<Vec<(String, f32)>> {
324 match strategy {
325 QueryStrategy::HnswApproximate => {
326 if let Some(ref index) = self.hnsw_index {
327 index.search_knn(query, k)
328 } else {
329 Err(anyhow::anyhow!("HNSW index not available"))
330 }
331 }
332 QueryStrategy::NsgApproximate => {
333 if let Some(ref index) = self.nsg_index {
334 index.search_knn(query, k)
335 } else {
336 Err(anyhow::anyhow!("NSG index not available"))
337 }
338 }
339 QueryStrategy::IvfCoarse => {
340 if let Some(ref index) = self.ivf_index {
341 index.search_knn(query, k)
342 } else {
343 Err(anyhow::anyhow!("IVF index not available"))
344 }
345 }
346 QueryStrategy::LocalitySensitiveHashing => {
347 if let Some(ref index) = self.lsh_index {
348 index.search_knn(query, k)
349 } else {
350 Err(anyhow::anyhow!("LSH index not available"))
351 }
352 }
353 _ => Err(anyhow::anyhow!(
354 "Strategy {:?} not supported by dynamic selector",
355 strategy
356 )),
357 }
358 }
359
360 pub fn get_stats(&self) -> HashMap<String, String> {
362 let mut stats = HashMap::new();
363 let perf_stats = self.performance_stats.read().unwrap();
364
365 stats.insert(
366 "total_queries".to_string(),
367 perf_stats.total_queries.to_string(),
368 );
369 stats.insert("vector_count".to_string(), self.data.len().to_string());
370 stats.insert("is_built".to_string(), self.is_built.to_string());
371
372 for strategy in &[
374 QueryStrategy::HnswApproximate,
375 QueryStrategy::NsgApproximate,
376 QueryStrategy::IvfCoarse,
377 QueryStrategy::LocalitySensitiveHashing,
378 ] {
379 if let Some(avg_lat) = perf_stats.avg_latency(*strategy) {
380 stats.insert(
381 format!("{:?}_avg_latency_ms", strategy),
382 format!("{:.2}", avg_lat),
383 );
384 }
385 if let Some(avg_rec) = perf_stats.avg_recall(*strategy) {
386 stats.insert(
387 format!("{:?}_avg_recall", strategy),
388 format!("{:.2}", avg_rec),
389 );
390 }
391 }
392
393 stats
394 }
395
396 pub fn is_built(&self) -> bool {
398 self.is_built
399 }
400
401 pub fn len(&self) -> usize {
403 self.data.len()
404 }
405
406 pub fn is_empty(&self) -> bool {
408 self.data.is_empty()
409 }
410}
411
412#[cfg(test)]
413mod tests {
414 use super::*;
415
416 #[test]
417 fn test_dynamic_selector_creation() {
418 let config = IndexSelectorConfig::default();
419 let selector = DynamicIndexSelector::new(config);
420 assert!(selector.is_ok());
421 }
422
423 #[test]
424 fn test_add_vectors() {
425 let config = IndexSelectorConfig::default();
426 let mut selector = DynamicIndexSelector::new(config).unwrap();
427
428 for i in 0..10 {
429 let vec = Vector::new(vec![i as f32, (i * 2) as f32]);
430 selector.add(format!("vec_{}", i), vec).unwrap();
431 }
432
433 assert_eq!(selector.len(), 10);
434 }
435
436 #[test]
437 fn test_build_and_search() {
438 let config = IndexSelectorConfig {
439 enable_hnsw: true,
440 enable_nsg: true,
441 enable_ivf: false, enable_lsh: false,
443 ..Default::default()
444 };
445 let mut selector = DynamicIndexSelector::new(config).unwrap();
446
447 for i in 0..50 {
449 let vec = Vector::new(vec![i as f32, (i * 2) as f32, (i * 3) as f32]);
450 selector.add(format!("vec_{}", i), vec).unwrap();
451 }
452
453 selector.build().unwrap();
455 assert!(selector.is_built());
456
457 let query = Vector::new(vec![25.0, 50.0, 75.0]);
459 let results = selector.search_knn(&query, 5).unwrap();
460
461 assert_eq!(results.len(), 5);
462 for i in 1..results.len() {
464 assert!(results[i - 1].1 >= results[i].1);
465 }
466 }
467
468 #[test]
469 fn test_performance_learning() {
470 let config = IndexSelectorConfig {
471 enable_hnsw: true,
472 enable_nsg: true,
473 enable_ivf: false, enable_lsh: false,
475 enable_learning: true,
476 ..Default::default()
477 };
478 let mut selector = DynamicIndexSelector::new(config).unwrap();
479
480 for i in 0..30 {
482 let vec = Vector::new(vec![i as f32, (i * 2) as f32]);
483 selector.add(format!("vec_{}", i), vec).unwrap();
484 }
485
486 selector.build().unwrap();
487
488 for _ in 0..5 {
490 let query = Vector::new(vec![15.0, 30.0]);
491 let _ = selector.search_knn(&query, 5);
492 }
493
494 let stats = selector.get_stats();
496 assert!(stats.contains_key("total_queries"));
497 let total_queries: usize = stats.get("total_queries").unwrap().parse().unwrap();
498 assert!(total_queries >= 5);
499 }
500}