1use serde::{Deserialize, Serialize};
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
26pub enum MemoryConstraint {
27 Minimal,
29 #[default]
31 Balanced,
32 Unlimited,
34}
35
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
38pub enum LatencyRequirement {
39 UltraLow,
41 #[default]
43 Low,
44 Medium,
46 Relaxed,
48}
49
50#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
52pub enum AccuracyRequirement {
53 Exact,
55 VeryHigh,
57 #[default]
59 High,
60 Moderate,
62 Relaxed,
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct DataCharacteristics {
69 pub num_vectors: usize,
71 pub dimensions: usize,
73 pub frequent_updates: bool,
75 pub frequent_deletes: bool,
77}
78
79#[derive(Debug, Clone, Default, Serialize, Deserialize)]
81pub struct IndexRequirements {
82 pub memory: MemoryConstraint,
84 pub latency: LatencyRequirement,
86 pub accuracy: AccuracyRequirement,
88}
89
90#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
92pub enum RecommendedIndex {
93 Flat,
95 Hnsw,
97 Ivf,
99 IvfPq,
101 IvfSq,
103 HnswSq,
105 SpFresh,
107}
108
109impl std::fmt::Display for RecommendedIndex {
110 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
111 match self {
112 RecommendedIndex::Flat => write!(f, "Flat (Brute Force)"),
113 RecommendedIndex::Hnsw => write!(f, "HNSW"),
114 RecommendedIndex::Ivf => write!(f, "IVF"),
115 RecommendedIndex::IvfPq => write!(f, "IVF-PQ"),
116 RecommendedIndex::IvfSq => write!(f, "IVF-SQ"),
117 RecommendedIndex::HnswSq => write!(f, "HNSW-SQ"),
118 RecommendedIndex::SpFresh => write!(f, "SPFresh"),
119 }
120 }
121}
122
123#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct IndexRecommendation {
126 pub primary: RecommendedIndex,
128 pub alternatives: Vec<RecommendedIndex>,
130 pub reasoning: String,
132 pub estimated_memory_bytes: usize,
134 pub estimated_latency_ms: f32,
136 pub estimated_recall: f32,
138 pub suggested_params: IndexParams,
140}
141
142#[derive(Debug, Clone, Default, Serialize, Deserialize)]
144pub struct IndexParams {
145 pub hnsw_m: Option<usize>,
147 pub hnsw_ef_construction: Option<usize>,
148 pub hnsw_ef_search: Option<usize>,
149
150 pub ivf_num_clusters: Option<usize>,
152 pub ivf_num_probes: Option<usize>,
153
154 pub pq_num_subquantizers: Option<usize>,
156 pub pq_bits_per_subquantizer: Option<usize>,
157
158 pub sq_bits: Option<usize>,
160}
161
162pub struct AutoIndexSelector;
164
165impl AutoIndexSelector {
166 pub fn select(
168 data: &DataCharacteristics,
169 requirements: &IndexRequirements,
170 ) -> IndexRecommendation {
171 let num_vectors = data.num_vectors;
172 let _dimensions = data.dimensions;
173
174 let (primary, alternatives, base_reasoning) = if num_vectors < 10_000 {
176 if requirements.accuracy == AccuracyRequirement::Exact {
178 (
179 RecommendedIndex::Flat,
180 vec![RecommendedIndex::Hnsw],
181 "Small dataset (<10K vectors) - flat index provides exact results efficiently",
182 )
183 } else {
184 (
185 RecommendedIndex::Hnsw,
186 vec![RecommendedIndex::Flat, RecommendedIndex::Ivf],
187 "Small dataset (<10K vectors) - HNSW provides fast approximate search",
188 )
189 }
190 } else if num_vectors < 100_000 {
191 match requirements.memory {
193 MemoryConstraint::Minimal => {
194 (
195 RecommendedIndex::IvfSq,
196 vec![RecommendedIndex::IvfPq, RecommendedIndex::Ivf],
197 "Medium dataset (10K-100K) with memory constraints - IVF-SQ balances memory and accuracy",
198 )
199 }
200 MemoryConstraint::Balanced => {
201 match requirements.latency {
202 LatencyRequirement::UltraLow | LatencyRequirement::Low => {
203 (
204 RecommendedIndex::Hnsw,
205 vec![RecommendedIndex::HnswSq, RecommendedIndex::Ivf],
206 "Medium dataset (10K-100K) with low latency requirement - HNSW provides fast queries",
207 )
208 }
209 _ => {
210 (
211 RecommendedIndex::Ivf,
212 vec![RecommendedIndex::Hnsw, RecommendedIndex::IvfPq],
213 "Medium dataset (10K-100K) - IVF provides good balance of speed and memory",
214 )
215 }
216 }
217 }
218 MemoryConstraint::Unlimited => {
219 (
220 RecommendedIndex::Hnsw,
221 vec![RecommendedIndex::Ivf],
222 "Medium dataset (10K-100K) with no memory constraints - HNSW provides best query performance",
223 )
224 }
225 }
226 } else if num_vectors < 1_000_000 {
227 match requirements.memory {
229 MemoryConstraint::Minimal => {
230 (
231 RecommendedIndex::IvfPq,
232 vec![RecommendedIndex::IvfSq, RecommendedIndex::SpFresh],
233 "Large dataset (100K-1M) with memory constraints - IVF-PQ provides best compression",
234 )
235 }
236 MemoryConstraint::Balanced => {
237 if data.frequent_updates || data.frequent_deletes {
238 (
239 RecommendedIndex::SpFresh,
240 vec![RecommendedIndex::Hnsw, RecommendedIndex::Ivf],
241 "Large dataset (100K-1M) with frequent updates - SPFresh handles streaming updates well",
242 )
243 } else {
244 match requirements.latency {
245 LatencyRequirement::UltraLow | LatencyRequirement::Low => {
246 (
247 RecommendedIndex::HnswSq,
248 vec![RecommendedIndex::Hnsw, RecommendedIndex::IvfPq],
249 "Large dataset (100K-1M) with low latency requirement - HNSW-SQ balances speed and memory",
250 )
251 }
252 _ => {
253 (
254 RecommendedIndex::IvfPq,
255 vec![RecommendedIndex::IvfSq, RecommendedIndex::Hnsw],
256 "Large dataset (100K-1M) - IVF-PQ provides good compression with acceptable latency",
257 )
258 }
259 }
260 }
261 }
262 MemoryConstraint::Unlimited => {
263 (
264 RecommendedIndex::Hnsw,
265 vec![RecommendedIndex::HnswSq],
266 "Large dataset (100K-1M) with no memory constraints - HNSW provides best query performance",
267 )
268 }
269 }
270 } else {
271 match requirements.memory {
273 MemoryConstraint::Minimal => {
274 (
275 RecommendedIndex::IvfPq,
276 vec![RecommendedIndex::SpFresh],
277 "Very large dataset (>1M) with memory constraints - IVF-PQ is essential for memory efficiency",
278 )
279 }
280 MemoryConstraint::Balanced => {
281 if data.frequent_updates || data.frequent_deletes {
282 (
283 RecommendedIndex::SpFresh,
284 vec![RecommendedIndex::IvfPq],
285 "Very large dataset (>1M) with frequent updates - SPFresh handles streaming workloads",
286 )
287 } else {
288 (
289 RecommendedIndex::IvfPq,
290 vec![RecommendedIndex::HnswSq, RecommendedIndex::SpFresh],
291 "Very large dataset (>1M) - IVF-PQ provides best memory/performance tradeoff",
292 )
293 }
294 }
295 MemoryConstraint::Unlimited => {
296 (
297 RecommendedIndex::HnswSq,
298 vec![RecommendedIndex::Hnsw, RecommendedIndex::IvfPq],
299 "Very large dataset (>1M) with no memory constraints - HNSW-SQ provides fast queries with reduced memory",
300 )
301 }
302 }
303 };
304
305 let (estimated_memory, estimated_latency, estimated_recall) =
307 Self::estimate_performance(&primary, data, requirements);
308
309 let suggested_params = Self::suggest_params(&primary, data, requirements);
311
312 IndexRecommendation {
313 primary,
314 alternatives,
315 reasoning: base_reasoning.to_string(),
316 estimated_memory_bytes: estimated_memory,
317 estimated_latency_ms: estimated_latency,
318 estimated_recall,
319 suggested_params,
320 }
321 }
322
323 fn estimate_performance(
325 index_type: &RecommendedIndex,
326 data: &DataCharacteristics,
327 requirements: &IndexRequirements,
328 ) -> (usize, f32, f32) {
329 let n = data.num_vectors;
330 let d = data.dimensions;
331 let base_vector_size = n * d * 4; match index_type {
334 RecommendedIndex::Flat => {
335 let memory = base_vector_size;
337 let latency = (n as f32 / 100_000.0) * 10.0; (memory, latency, 1.0) }
340 RecommendedIndex::Hnsw => {
341 let memory = base_vector_size * 2;
343 let latency = ((n as f32).log2() * 0.1).max(0.5); let recall = match requirements.accuracy {
345 AccuracyRequirement::Exact => 0.999,
346 AccuracyRequirement::VeryHigh => 0.995,
347 _ => 0.98,
348 };
349 (memory, latency, recall)
350 }
351 RecommendedIndex::Ivf => {
352 let num_clusters = (n as f32).sqrt() as usize;
354 let memory = base_vector_size + num_clusters * d * 4;
355 let latency = (n as f32 / num_clusters as f32 / 10_000.0) * 5.0;
356 (memory, latency.max(1.0), 0.95)
357 }
358 RecommendedIndex::IvfPq => {
359 let compression_ratio = 32; let memory = base_vector_size / compression_ratio;
362 let latency = 5.0 + (n as f32 / 1_000_000.0) * 2.0;
363 (memory, latency, 0.90)
364 }
365 RecommendedIndex::IvfSq => {
366 let memory = base_vector_size / 4;
368 let latency = 3.0 + (n as f32 / 500_000.0) * 2.0;
369 (memory, latency, 0.95)
370 }
371 RecommendedIndex::HnswSq => {
372 let memory = (base_vector_size / 4) * 2; let latency = ((n as f32).log2() * 0.15).max(0.5);
375 (memory, latency, 0.96)
376 }
377 RecommendedIndex::SpFresh => {
378 let memory = base_vector_size / 2; let latency = 10.0 + (n as f32 / 100_000.0) * 1.0;
381 (memory, latency, 0.92)
382 }
383 }
384 }
385
386 fn suggest_params(
388 index_type: &RecommendedIndex,
389 data: &DataCharacteristics,
390 requirements: &IndexRequirements,
391 ) -> IndexParams {
392 let mut params = IndexParams::default();
393 let n = data.num_vectors;
394 let d = data.dimensions;
395
396 match index_type {
397 RecommendedIndex::Hnsw | RecommendedIndex::HnswSq => {
398 params.hnsw_m = Some(match requirements.accuracy {
400 AccuracyRequirement::Exact | AccuracyRequirement::VeryHigh => 32,
401 AccuracyRequirement::High => 16,
402 _ => 12,
403 });
404 params.hnsw_ef_construction = Some(params.hnsw_m.unwrap_or(16) * 10);
405 params.hnsw_ef_search = Some(match requirements.latency {
406 LatencyRequirement::UltraLow => 50,
407 LatencyRequirement::Low => 100,
408 LatencyRequirement::Medium => 200,
409 LatencyRequirement::Relaxed => 400,
410 });
411
412 if matches!(index_type, RecommendedIndex::HnswSq) {
413 params.sq_bits = Some(8);
414 }
415 }
416 RecommendedIndex::Ivf | RecommendedIndex::IvfPq | RecommendedIndex::IvfSq => {
417 let num_clusters = ((n as f32).sqrt() as usize).clamp(16, 65536);
419 params.ivf_num_clusters = Some(num_clusters);
420 params.ivf_num_probes = Some(
421 match requirements.accuracy {
422 AccuracyRequirement::Exact | AccuracyRequirement::VeryHigh => {
423 num_clusters / 4
424 }
425 AccuracyRequirement::High => num_clusters / 8,
426 AccuracyRequirement::Moderate => num_clusters / 16,
427 AccuracyRequirement::Relaxed => num_clusters / 32,
428 }
429 .max(1),
430 );
431
432 if matches!(index_type, RecommendedIndex::IvfPq) {
433 let num_subquantizers = (d / 4).clamp(1, 64);
435 params.pq_num_subquantizers = Some(num_subquantizers);
436 params.pq_bits_per_subquantizer = Some(8);
437 }
438
439 if matches!(index_type, RecommendedIndex::IvfSq) {
440 params.sq_bits = Some(8);
441 }
442 }
443 RecommendedIndex::SpFresh => {
444 let num_clusters = ((n as f32).sqrt() as usize).clamp(16, 4096);
446 params.ivf_num_clusters = Some(num_clusters);
447 params.ivf_num_probes = Some((num_clusters / 10).max(1));
448 }
449 RecommendedIndex::Flat => {
450 }
452 }
453
454 params
455 }
456
457 pub fn quick_select(num_vectors: usize, dimensions: usize) -> RecommendedIndex {
459 let data = DataCharacteristics {
460 num_vectors,
461 dimensions,
462 frequent_updates: false,
463 frequent_deletes: false,
464 };
465 let requirements = IndexRequirements::default();
466 Self::select(&data, &requirements).primary
467 }
468}
469
470#[cfg(test)]
471mod tests {
472 use super::*;
473
474 #[test]
475 fn test_small_dataset() {
476 let data = DataCharacteristics {
477 num_vectors: 1000,
478 dimensions: 128,
479 frequent_updates: false,
480 frequent_deletes: false,
481 };
482 let requirements = IndexRequirements::default();
483
484 let rec = AutoIndexSelector::select(&data, &requirements);
485 assert!(matches!(
486 rec.primary,
487 RecommendedIndex::Hnsw | RecommendedIndex::Flat
488 ));
489 }
490
491 #[test]
492 fn test_large_dataset_memory_constrained() {
493 let data = DataCharacteristics {
494 num_vectors: 5_000_000,
495 dimensions: 768,
496 frequent_updates: false,
497 frequent_deletes: false,
498 };
499 let requirements = IndexRequirements {
500 memory: MemoryConstraint::Minimal,
501 latency: LatencyRequirement::Medium,
502 accuracy: AccuracyRequirement::Moderate,
503 };
504
505 let rec = AutoIndexSelector::select(&data, &requirements);
506 assert_eq!(rec.primary, RecommendedIndex::IvfPq);
507 assert!(rec.estimated_memory_bytes < data.num_vectors * data.dimensions * 4 / 10);
508 }
509
510 #[test]
511 fn test_streaming_workload() {
512 let data = DataCharacteristics {
513 num_vectors: 500_000,
514 dimensions: 256,
515 frequent_updates: true,
516 frequent_deletes: true,
517 };
518 let requirements = IndexRequirements {
519 memory: MemoryConstraint::Balanced,
520 latency: LatencyRequirement::Medium,
521 accuracy: AccuracyRequirement::High,
522 };
523
524 let rec = AutoIndexSelector::select(&data, &requirements);
525 assert_eq!(rec.primary, RecommendedIndex::SpFresh);
526 }
527
528 #[test]
529 fn test_exact_search() {
530 let data = DataCharacteristics {
531 num_vectors: 5000,
532 dimensions: 64,
533 frequent_updates: false,
534 frequent_deletes: false,
535 };
536 let requirements = IndexRequirements {
537 memory: MemoryConstraint::Unlimited,
538 latency: LatencyRequirement::Relaxed,
539 accuracy: AccuracyRequirement::Exact,
540 };
541
542 let rec = AutoIndexSelector::select(&data, &requirements);
543 assert_eq!(rec.primary, RecommendedIndex::Flat);
544 assert_eq!(rec.estimated_recall, 1.0);
545 }
546
547 #[test]
548 fn test_quick_select() {
549 let small = AutoIndexSelector::quick_select(5000, 128);
551 assert!(matches!(
552 small,
553 RecommendedIndex::Hnsw | RecommendedIndex::Flat
554 ));
555
556 let medium = AutoIndexSelector::quick_select(50_000, 256);
558 assert!(matches!(
559 medium,
560 RecommendedIndex::Hnsw | RecommendedIndex::Ivf
561 ));
562
563 let large = AutoIndexSelector::quick_select(2_000_000, 512);
565 assert!(matches!(
566 large,
567 RecommendedIndex::IvfPq | RecommendedIndex::HnswSq
568 ));
569 }
570
571 #[test]
572 fn test_suggested_params() {
573 let data = DataCharacteristics {
574 num_vectors: 100_000,
575 dimensions: 128,
576 frequent_updates: false,
577 frequent_deletes: false,
578 };
579 let requirements = IndexRequirements::default();
580
581 let rec = AutoIndexSelector::select(&data, &requirements);
582
583 if matches!(rec.primary, RecommendedIndex::Hnsw) {
585 assert!(rec.suggested_params.hnsw_m.is_some());
586 assert!(rec.suggested_params.hnsw_ef_construction.is_some());
587 assert!(rec.suggested_params.hnsw_ef_search.is_some());
588 }
589 }
590}