1use crate::query_planning::*;
37use crate::{hnsw::HnswIndex, ivf::IvfIndex, lsh::LshIndex, nsg::NsgIndex};
38use crate::{Vector, VectorIndex};
39use anyhow::Result;
40use std::collections::HashMap;
41use std::sync::{Arc, RwLock};
42use tracing::{debug, info};
43
44#[derive(Debug, Clone)]
46pub struct IndexSelectorConfig {
47 pub enable_hnsw: bool,
49 pub enable_nsg: bool,
51 pub enable_ivf: bool,
53 pub enable_lsh: bool,
55 pub min_recall: f32,
57 pub max_latency_ms: f64,
59 pub enable_learning: bool,
61 pub eager_build: bool,
63}
64
65impl Default for IndexSelectorConfig {
66 fn default() -> Self {
67 Self {
68 enable_hnsw: true,
69 enable_nsg: true,
70 enable_ivf: true,
71 enable_lsh: false, min_recall: 0.90,
73 max_latency_ms: 100.0,
74 enable_learning: true,
75 eager_build: true,
76 }
77 }
78}
79
80pub struct DynamicIndexSelector {
82 config: IndexSelectorConfig,
83 hnsw_index: Option<HnswIndex>,
84 nsg_index: Option<NsgIndex>,
85 ivf_index: Option<IvfIndex>,
86 lsh_index: Option<LshIndex>,
87 query_planner: Arc<RwLock<QueryPlanner>>,
88 data: Vec<(String, Vector)>,
89 is_built: bool,
90 performance_stats: Arc<RwLock<PerformanceStats>>,
91}
92
93#[derive(Debug, Clone, Default)]
95struct PerformanceStats {
96 strategy_latencies: HashMap<QueryStrategy, Vec<f64>>,
97 strategy_recalls: HashMap<QueryStrategy, Vec<f32>>,
98 total_queries: usize,
99}
100
101impl PerformanceStats {
102 fn record(&mut self, strategy: QueryStrategy, latency_ms: f64, recall: f32) {
103 self.strategy_latencies
104 .entry(strategy)
105 .or_default()
106 .push(latency_ms);
107
108 self.strategy_recalls
109 .entry(strategy)
110 .or_default()
111 .push(recall);
112
113 self.total_queries += 1;
114 }
115
116 fn avg_latency(&self, strategy: QueryStrategy) -> Option<f64> {
117 self.strategy_latencies
118 .get(&strategy)
119 .and_then(|latencies| {
120 if latencies.is_empty() {
121 None
122 } else {
123 Some(latencies.iter().sum::<f64>() / latencies.len() as f64)
124 }
125 })
126 }
127
128 fn avg_recall(&self, strategy: QueryStrategy) -> Option<f32> {
129 self.strategy_recalls.get(&strategy).and_then(|recalls| {
130 if recalls.is_empty() {
131 None
132 } else {
133 Some(recalls.iter().sum::<f32>() / recalls.len() as f32)
134 }
135 })
136 }
137}
138
139impl DynamicIndexSelector {
140 pub fn new(config: IndexSelectorConfig) -> Result<Self> {
142 let mut available_indices = Vec::new();
144 if config.enable_hnsw {
145 available_indices.push(QueryStrategy::HnswApproximate);
146 }
147 if config.enable_nsg {
148 available_indices.push(QueryStrategy::NsgApproximate);
149 }
150 if config.enable_ivf {
151 available_indices.push(QueryStrategy::IvfCoarse);
152 }
153 if config.enable_lsh {
154 available_indices.push(QueryStrategy::LocalitySensitiveHashing);
155 }
156
157 if available_indices.is_empty() {
158 return Err(anyhow::anyhow!("At least one index type must be enabled"));
159 }
160
161 let index_stats = IndexStatistics {
163 vector_count: 0,
164 dimensions: 0,
165 available_indices,
166 avg_latencies: HashMap::new(),
167 avg_recalls: HashMap::new(),
168 };
169
170 let cost_model = CostModel::default();
171 let query_planner = Arc::new(RwLock::new(QueryPlanner::new(cost_model, index_stats)));
172
173 Ok(Self {
174 config,
175 hnsw_index: None,
176 nsg_index: None,
177 ivf_index: None,
178 lsh_index: None,
179 query_planner,
180 data: Vec::new(),
181 is_built: false,
182 performance_stats: Arc::new(RwLock::new(PerformanceStats::default())),
183 })
184 }
185
186 pub fn add(&mut self, uri: String, vector: Vector) -> Result<()> {
188 if self.is_built && self.config.eager_build {
189 return Err(anyhow::anyhow!(
190 "Cannot add vectors after indices are built in eager mode"
191 ));
192 }
193
194 self.data.push((uri, vector));
195 Ok(())
196 }
197
198 pub fn build(&mut self) -> Result<()> {
200 if self.data.is_empty() {
201 return Err(anyhow::anyhow!("No vectors to index"));
202 }
203
204 let dimensions = self.data[0].1.dimensions;
205 let vector_count = self.data.len();
206
207 info!(
208 "Building dynamic index selector with {} vectors, {} dimensions",
209 vector_count, dimensions
210 );
211
212 if self.config.enable_hnsw {
214 debug!("Building HNSW index");
215 let mut hnsw = HnswIndex::new(Default::default())?;
216 for (uri, vec) in &self.data {
217 hnsw.insert(uri.clone(), vec.clone())?;
218 }
219 self.hnsw_index = Some(hnsw);
220 }
221
222 if self.config.enable_nsg {
224 debug!("Building NSG index");
225 let mut nsg = NsgIndex::new(Default::default())?;
226 for (uri, vec) in &self.data {
227 nsg.insert(uri.clone(), vec.clone())?;
228 }
229 nsg.build()?;
230 self.nsg_index = Some(nsg);
231 }
232
233 if self.config.enable_ivf {
235 debug!("Building IVF index");
236 let mut ivf = IvfIndex::new(Default::default())?;
237 for (uri, vec) in &self.data {
238 ivf.insert(uri.clone(), vec.clone())?;
239 }
240 self.ivf_index = Some(ivf);
242 }
243
244 if self.config.enable_lsh {
246 debug!("Building LSH index");
247 let lsh = LshIndex::new(Default::default());
248 let mut lsh_mut = lsh;
249 for (uri, vec) in &self.data {
250 lsh_mut.insert(uri.clone(), vec.clone())?;
251 }
252 self.lsh_index = Some(lsh_mut);
253 }
254
255 let mut planner = self
257 .query_planner
258 .write()
259 .expect("query_planner write lock should not be poisoned");
260 planner.update_index_metadata(vector_count, dimensions);
261
262 self.is_built = true;
263
264 info!("Dynamic index selector built successfully");
265
266 Ok(())
267 }
268
269 pub fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>> {
271 if !self.is_built {
272 return Err(anyhow::anyhow!("Indices not built. Call build() first."));
273 }
274
275 let query_chars = QueryCharacteristics {
277 k,
278 dimensions: query.dimensions,
279 min_recall: self.config.min_recall,
280 max_latency_ms: self.config.max_latency_ms,
281 query_type: VectorQueryType::Single,
282 };
283
284 let planner = self
286 .query_planner
287 .read()
288 .expect("query_planner read lock should not be poisoned");
289 let plan = planner.plan(&query_chars)?;
290 drop(planner); debug!(
293 "Selected strategy: {:?} (estimated cost: {:.2} µs, recall: {:.2})",
294 plan.strategy, plan.estimated_cost_us, plan.estimated_recall
295 );
296
297 let start = std::time::Instant::now();
299 let results = self.execute_strategy(plan.strategy, query, k)?;
300 let elapsed = start.elapsed().as_secs_f64() * 1000.0; if self.config.enable_learning {
304 let mut stats = self
305 .performance_stats
306 .write()
307 .expect("performance_stats write lock should not be poisoned");
308 stats.record(plan.strategy, elapsed, plan.estimated_recall);
309 drop(stats);
310
311 let mut planner = self
313 .query_planner
314 .write()
315 .expect("query_planner write lock should not be poisoned");
316 if let Some(avg_latency) = self
317 .performance_stats
318 .read()
319 .expect("performance_stats read lock should not be poisoned")
320 .avg_latency(plan.strategy)
321 {
322 planner.update_statistics(plan.strategy, avg_latency, plan.estimated_recall);
323 }
324 }
325
326 Ok(results)
327 }
328
329 fn execute_strategy(
331 &self,
332 strategy: QueryStrategy,
333 query: &Vector,
334 k: usize,
335 ) -> Result<Vec<(String, f32)>> {
336 match strategy {
337 QueryStrategy::HnswApproximate => {
338 if let Some(ref index) = self.hnsw_index {
339 index.search_knn(query, k)
340 } else {
341 Err(anyhow::anyhow!("HNSW index not available"))
342 }
343 }
344 QueryStrategy::NsgApproximate => {
345 if let Some(ref index) = self.nsg_index {
346 index.search_knn(query, k)
347 } else {
348 Err(anyhow::anyhow!("NSG index not available"))
349 }
350 }
351 QueryStrategy::IvfCoarse => {
352 if let Some(ref index) = self.ivf_index {
353 index.search_knn(query, k)
354 } else {
355 Err(anyhow::anyhow!("IVF index not available"))
356 }
357 }
358 QueryStrategy::LocalitySensitiveHashing => {
359 if let Some(ref index) = self.lsh_index {
360 index.search_knn(query, k)
361 } else {
362 Err(anyhow::anyhow!("LSH index not available"))
363 }
364 }
365 _ => Err(anyhow::anyhow!(
366 "Strategy {:?} not supported by dynamic selector",
367 strategy
368 )),
369 }
370 }
371
372 pub fn get_stats(&self) -> HashMap<String, String> {
374 let mut stats = HashMap::new();
375 let perf_stats = self
376 .performance_stats
377 .read()
378 .expect("performance_stats read lock should not be poisoned");
379
380 stats.insert(
381 "total_queries".to_string(),
382 perf_stats.total_queries.to_string(),
383 );
384 stats.insert("vector_count".to_string(), self.data.len().to_string());
385 stats.insert("is_built".to_string(), self.is_built.to_string());
386
387 for strategy in &[
389 QueryStrategy::HnswApproximate,
390 QueryStrategy::NsgApproximate,
391 QueryStrategy::IvfCoarse,
392 QueryStrategy::LocalitySensitiveHashing,
393 ] {
394 if let Some(avg_lat) = perf_stats.avg_latency(*strategy) {
395 stats.insert(
396 format!("{:?}_avg_latency_ms", strategy),
397 format!("{:.2}", avg_lat),
398 );
399 }
400 if let Some(avg_rec) = perf_stats.avg_recall(*strategy) {
401 stats.insert(
402 format!("{:?}_avg_recall", strategy),
403 format!("{:.2}", avg_rec),
404 );
405 }
406 }
407
408 stats
409 }
410
411 pub fn is_built(&self) -> bool {
413 self.is_built
414 }
415
416 pub fn len(&self) -> usize {
418 self.data.len()
419 }
420
421 pub fn is_empty(&self) -> bool {
423 self.data.is_empty()
424 }
425}
426
427#[cfg(test)]
428mod tests {
429 use super::*;
430
431 #[test]
432 fn test_dynamic_selector_creation() {
433 let config = IndexSelectorConfig::default();
434 let selector = DynamicIndexSelector::new(config);
435 assert!(selector.is_ok());
436 }
437
438 #[test]
439 fn test_add_vectors() {
440 let config = IndexSelectorConfig::default();
441 let mut selector = DynamicIndexSelector::new(config).unwrap();
442
443 for i in 0..10 {
444 let vec = Vector::new(vec![i as f32, (i * 2) as f32]);
445 selector.add(format!("vec_{}", i), vec).unwrap();
446 }
447
448 assert_eq!(selector.len(), 10);
449 }
450
451 #[test]
452 fn test_build_and_search() {
453 let config = IndexSelectorConfig {
454 enable_hnsw: true,
455 enable_nsg: true,
456 enable_ivf: false, enable_lsh: false,
458 ..Default::default()
459 };
460 let mut selector = DynamicIndexSelector::new(config).unwrap();
461
462 for i in 0..50 {
464 let vec = Vector::new(vec![i as f32, (i * 2) as f32, (i * 3) as f32]);
465 selector.add(format!("vec_{}", i), vec).unwrap();
466 }
467
468 selector.build().unwrap();
470 assert!(selector.is_built());
471
472 let query = Vector::new(vec![25.0, 50.0, 75.0]);
474 let results = selector.search_knn(&query, 5).unwrap();
475
476 assert_eq!(results.len(), 5);
477 for i in 1..results.len() {
479 assert!(results[i - 1].1 >= results[i].1);
480 }
481 }
482
483 #[test]
484 fn test_performance_learning() {
485 let config = IndexSelectorConfig {
486 enable_hnsw: true,
487 enable_nsg: true,
488 enable_ivf: false, enable_lsh: false,
490 enable_learning: true,
491 ..Default::default()
492 };
493 let mut selector = DynamicIndexSelector::new(config).unwrap();
494
495 for i in 0..30 {
497 let vec = Vector::new(vec![i as f32, (i * 2) as f32]);
498 selector.add(format!("vec_{}", i), vec).unwrap();
499 }
500
501 selector.build().unwrap();
502
503 for _ in 0..5 {
505 let query = Vector::new(vec![15.0, 30.0]);
506 let _ = selector.search_knn(&query, 5);
507 }
508
509 let stats = selector.get_stats();
511 assert!(stats.contains_key("total_queries"));
512 let total_queries: usize = stats.get("total_queries").unwrap().parse().unwrap();
513 assert!(total_queries >= 5);
514 }
515}