Skip to main content

engine/
auto_index.rs

1//! Auto-Index Selection
2//!
3//! Automatically selects the optimal index type based on data characteristics:
4//! - Dataset size (number of vectors)
5//! - Dimensionality
6//! - Memory constraints
7//! - Query latency requirements
8//! - Accuracy requirements
9//!
10//! # Index Selection Matrix
11//!
12//! | Vectors | Dimensions | Memory | Latency | Recommended Index |
13//! |---------|------------|--------|---------|-------------------|
14//! | < 10K   | Any        | Any    | Any     | Flat (brute force) |
15//! | 10K-100K| < 256      | Low    | Medium  | IVF + SQ8         |
16//! | 10K-100K| < 256      | Normal | Low     | HNSW              |
17//! | 100K-1M | Any        | Low    | Medium  | IVF-PQ            |
18//! | 100K-1M | Any        | Normal | Low     | HNSW              |
19//! | > 1M    | Any        | Low    | Any     | IVF-PQ            |
20//! | > 1M    | Any        | Normal | Low     | HNSW + SQ8        |
21
22use serde::{Deserialize, Serialize};
23
24/// Memory constraint level
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
26pub enum MemoryConstraint {
27    /// Minimize memory usage at all costs
28    Minimal,
29    /// Balance memory and performance
30    #[default]
31    Balanced,
32    /// Prioritize performance over memory
33    Unlimited,
34}
35
36/// Query latency requirement
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
38pub enum LatencyRequirement {
39    /// Sub-millisecond queries required
40    UltraLow,
41    /// < 10ms queries required
42    #[default]
43    Low,
44    /// < 100ms queries acceptable
45    Medium,
46    /// Latency not critical
47    Relaxed,
48}
49
50/// Accuracy requirement for search results
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
52pub enum AccuracyRequirement {
53    /// Exact nearest neighbors (recall = 1.0)
54    Exact,
55    /// Very high recall (> 0.99)
56    VeryHigh,
57    /// High recall (> 0.95)
58    #[default]
59    High,
60    /// Moderate recall (> 0.90)
61    Moderate,
62    /// Lower recall acceptable (> 0.80)
63    Relaxed,
64}
65
66/// Data characteristics for index selection
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct DataCharacteristics {
69    /// Expected number of vectors
70    pub num_vectors: usize,
71    /// Vector dimensionality
72    pub dimensions: usize,
73    /// Whether vectors will be frequently updated
74    pub frequent_updates: bool,
75    /// Whether vectors will be frequently deleted
76    pub frequent_deletes: bool,
77}
78
79/// Requirements for the index
80#[derive(Debug, Clone, Default, Serialize, Deserialize)]
81pub struct IndexRequirements {
82    /// Memory constraint level
83    pub memory: MemoryConstraint,
84    /// Query latency requirement
85    pub latency: LatencyRequirement,
86    /// Accuracy requirement
87    pub accuracy: AccuracyRequirement,
88}
89
90/// Recommended index type
91#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
92pub enum RecommendedIndex {
93    /// Flat index (brute force) - exact but slow for large datasets
94    Flat,
95    /// HNSW - fast queries, moderate memory, good for < 1M vectors
96    Hnsw,
97    /// IVF - balanced performance, works well with clustering
98    Ivf,
99    /// IVF with Product Quantization - memory efficient for large datasets
100    IvfPq,
101    /// IVF with Scalar Quantization - good balance of memory and accuracy
102    IvfSq,
103    /// HNSW with Scalar Quantization - fast queries with reduced memory
104    HnswSq,
105    /// SPFresh - optimized for cold storage with streaming updates
106    SpFresh,
107}
108
109impl std::fmt::Display for RecommendedIndex {
110    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
111        match self {
112            RecommendedIndex::Flat => write!(f, "Flat (Brute Force)"),
113            RecommendedIndex::Hnsw => write!(f, "HNSW"),
114            RecommendedIndex::Ivf => write!(f, "IVF"),
115            RecommendedIndex::IvfPq => write!(f, "IVF-PQ"),
116            RecommendedIndex::IvfSq => write!(f, "IVF-SQ"),
117            RecommendedIndex::HnswSq => write!(f, "HNSW-SQ"),
118            RecommendedIndex::SpFresh => write!(f, "SPFresh"),
119        }
120    }
121}
122
123/// Index recommendation with reasoning
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct IndexRecommendation {
126    /// Primary recommended index type
127    pub primary: RecommendedIndex,
128    /// Alternative index types that could work
129    pub alternatives: Vec<RecommendedIndex>,
130    /// Explanation of why this index was chosen
131    pub reasoning: String,
132    /// Estimated memory usage in bytes
133    pub estimated_memory_bytes: usize,
134    /// Estimated query latency in milliseconds
135    pub estimated_latency_ms: f32,
136    /// Estimated recall/accuracy
137    pub estimated_recall: f32,
138    /// Suggested configuration parameters
139    pub suggested_params: IndexParams,
140}
141
142/// Suggested index parameters
143#[derive(Debug, Clone, Default, Serialize, Deserialize)]
144pub struct IndexParams {
145    // HNSW params
146    pub hnsw_m: Option<usize>,
147    pub hnsw_ef_construction: Option<usize>,
148    pub hnsw_ef_search: Option<usize>,
149
150    // IVF params
151    pub ivf_num_clusters: Option<usize>,
152    pub ivf_num_probes: Option<usize>,
153
154    // PQ params
155    pub pq_num_subquantizers: Option<usize>,
156    pub pq_bits_per_subquantizer: Option<usize>,
157
158    // SQ params
159    pub sq_bits: Option<usize>,
160}
161
162/// Auto-index selector
163pub struct AutoIndexSelector;
164
165impl AutoIndexSelector {
166    /// Select the optimal index based on data characteristics and requirements
167    pub fn select(
168        data: &DataCharacteristics,
169        requirements: &IndexRequirements,
170    ) -> IndexRecommendation {
171        let num_vectors = data.num_vectors;
172        let _dimensions = data.dimensions;
173
174        // Decision logic based on dataset size
175        let (primary, alternatives, base_reasoning) = if num_vectors < 10_000 {
176            // Small dataset: flat index is fine
177            if requirements.accuracy == AccuracyRequirement::Exact {
178                (
179                    RecommendedIndex::Flat,
180                    vec![RecommendedIndex::Hnsw],
181                    "Small dataset (<10K vectors) - flat index provides exact results efficiently",
182                )
183            } else {
184                (
185                    RecommendedIndex::Hnsw,
186                    vec![RecommendedIndex::Flat, RecommendedIndex::Ivf],
187                    "Small dataset (<10K vectors) - HNSW provides fast approximate search",
188                )
189            }
190        } else if num_vectors < 100_000 {
191            // Medium dataset
192            match requirements.memory {
193                MemoryConstraint::Minimal => {
194                    (
195                        RecommendedIndex::IvfSq,
196                        vec![RecommendedIndex::IvfPq, RecommendedIndex::Ivf],
197                        "Medium dataset (10K-100K) with memory constraints - IVF-SQ balances memory and accuracy",
198                    )
199                }
200                MemoryConstraint::Balanced => {
201                    match requirements.latency {
202                        LatencyRequirement::UltraLow | LatencyRequirement::Low => {
203                            (
204                                RecommendedIndex::Hnsw,
205                                vec![RecommendedIndex::HnswSq, RecommendedIndex::Ivf],
206                                "Medium dataset (10K-100K) with low latency requirement - HNSW provides fast queries",
207                            )
208                        }
209                        _ => {
210                            (
211                                RecommendedIndex::Ivf,
212                                vec![RecommendedIndex::Hnsw, RecommendedIndex::IvfPq],
213                                "Medium dataset (10K-100K) - IVF provides good balance of speed and memory",
214                            )
215                        }
216                    }
217                }
218                MemoryConstraint::Unlimited => {
219                    (
220                        RecommendedIndex::Hnsw,
221                        vec![RecommendedIndex::Ivf],
222                        "Medium dataset (10K-100K) with no memory constraints - HNSW provides best query performance",
223                    )
224                }
225            }
226        } else if num_vectors < 1_000_000 {
227            // Large dataset
228            match requirements.memory {
229                MemoryConstraint::Minimal => {
230                    (
231                        RecommendedIndex::IvfPq,
232                        vec![RecommendedIndex::IvfSq, RecommendedIndex::SpFresh],
233                        "Large dataset (100K-1M) with memory constraints - IVF-PQ provides best compression",
234                    )
235                }
236                MemoryConstraint::Balanced => {
237                    if data.frequent_updates || data.frequent_deletes {
238                        (
239                            RecommendedIndex::SpFresh,
240                            vec![RecommendedIndex::Hnsw, RecommendedIndex::Ivf],
241                            "Large dataset (100K-1M) with frequent updates - SPFresh handles streaming updates well",
242                        )
243                    } else {
244                        match requirements.latency {
245                            LatencyRequirement::UltraLow | LatencyRequirement::Low => {
246                                (
247                                    RecommendedIndex::HnswSq,
248                                    vec![RecommendedIndex::Hnsw, RecommendedIndex::IvfPq],
249                                    "Large dataset (100K-1M) with low latency requirement - HNSW-SQ balances speed and memory",
250                                )
251                            }
252                            _ => {
253                                (
254                                    RecommendedIndex::IvfPq,
255                                    vec![RecommendedIndex::IvfSq, RecommendedIndex::Hnsw],
256                                    "Large dataset (100K-1M) - IVF-PQ provides good compression with acceptable latency",
257                                )
258                            }
259                        }
260                    }
261                }
262                MemoryConstraint::Unlimited => {
263                    (
264                        RecommendedIndex::Hnsw,
265                        vec![RecommendedIndex::HnswSq],
266                        "Large dataset (100K-1M) with no memory constraints - HNSW provides best query performance",
267                    )
268                }
269            }
270        } else {
271            // Very large dataset (> 1M)
272            match requirements.memory {
273                MemoryConstraint::Minimal => {
274                    (
275                        RecommendedIndex::IvfPq,
276                        vec![RecommendedIndex::SpFresh],
277                        "Very large dataset (>1M) with memory constraints - IVF-PQ is essential for memory efficiency",
278                    )
279                }
280                MemoryConstraint::Balanced => {
281                    if data.frequent_updates || data.frequent_deletes {
282                        (
283                            RecommendedIndex::SpFresh,
284                            vec![RecommendedIndex::IvfPq],
285                            "Very large dataset (>1M) with frequent updates - SPFresh handles streaming workloads",
286                        )
287                    } else {
288                        (
289                            RecommendedIndex::IvfPq,
290                            vec![RecommendedIndex::HnswSq, RecommendedIndex::SpFresh],
291                            "Very large dataset (>1M) - IVF-PQ provides best memory/performance tradeoff",
292                        )
293                    }
294                }
295                MemoryConstraint::Unlimited => {
296                    (
297                        RecommendedIndex::HnswSq,
298                        vec![RecommendedIndex::Hnsw, RecommendedIndex::IvfPq],
299                        "Very large dataset (>1M) with no memory constraints - HNSW-SQ provides fast queries with reduced memory",
300                    )
301                }
302            }
303        };
304
305        // Calculate estimates
306        let (estimated_memory, estimated_latency, estimated_recall) =
307            Self::estimate_performance(&primary, data, requirements);
308
309        // Generate suggested parameters
310        let suggested_params = Self::suggest_params(&primary, data, requirements);
311
312        IndexRecommendation {
313            primary,
314            alternatives,
315            reasoning: base_reasoning.to_string(),
316            estimated_memory_bytes: estimated_memory,
317            estimated_latency_ms: estimated_latency,
318            estimated_recall,
319            suggested_params,
320        }
321    }
322
323    /// Estimate performance characteristics for an index type
324    fn estimate_performance(
325        index_type: &RecommendedIndex,
326        data: &DataCharacteristics,
327        requirements: &IndexRequirements,
328    ) -> (usize, f32, f32) {
329        let n = data.num_vectors;
330        let d = data.dimensions;
331        let base_vector_size = n * d * 4; // 4 bytes per f32
332
333        match index_type {
334            RecommendedIndex::Flat => {
335                // Flat: stores all vectors, O(n) search
336                let memory = base_vector_size;
337                let latency = (n as f32 / 100_000.0) * 10.0; // ~10ms per 100K vectors
338                (memory, latency, 1.0) // Exact recall
339            }
340            RecommendedIndex::Hnsw => {
341                // HNSW: vectors + graph structure (~2x overhead)
342                let memory = base_vector_size * 2;
343                let latency = ((n as f32).log2() * 0.1).max(0.5); // O(log n)
344                let recall = match requirements.accuracy {
345                    AccuracyRequirement::Exact => 0.999,
346                    AccuracyRequirement::VeryHigh => 0.995,
347                    _ => 0.98,
348                };
349                (memory, latency, recall)
350            }
351            RecommendedIndex::Ivf => {
352                // IVF: vectors + centroids
353                let num_clusters = (n as f32).sqrt() as usize;
354                let memory = base_vector_size + num_clusters * d * 4;
355                let latency = (n as f32 / num_clusters as f32 / 10_000.0) * 5.0;
356                (memory, latency.max(1.0), 0.95)
357            }
358            RecommendedIndex::IvfPq => {
359                // IVF-PQ: heavily compressed
360                let compression_ratio = 32; // Typically 32x compression
361                let memory = base_vector_size / compression_ratio;
362                let latency = 5.0 + (n as f32 / 1_000_000.0) * 2.0;
363                (memory, latency, 0.90)
364            }
365            RecommendedIndex::IvfSq => {
366                // IVF-SQ: 4x compression
367                let memory = base_vector_size / 4;
368                let latency = 3.0 + (n as f32 / 500_000.0) * 2.0;
369                (memory, latency, 0.95)
370            }
371            RecommendedIndex::HnswSq => {
372                // HNSW-SQ: HNSW with quantized vectors
373                let memory = (base_vector_size / 4) * 2; // Quantized + graph
374                let latency = ((n as f32).log2() * 0.15).max(0.5);
375                (memory, latency, 0.96)
376            }
377            RecommendedIndex::SpFresh => {
378                // SPFresh: optimized for cold storage
379                let memory = base_vector_size / 2; // Partial in-memory
380                let latency = 10.0 + (n as f32 / 100_000.0) * 1.0;
381                (memory, latency, 0.92)
382            }
383        }
384    }
385
386    /// Suggest optimal parameters for the index type
387    fn suggest_params(
388        index_type: &RecommendedIndex,
389        data: &DataCharacteristics,
390        requirements: &IndexRequirements,
391    ) -> IndexParams {
392        let mut params = IndexParams::default();
393        let n = data.num_vectors;
394        let d = data.dimensions;
395
396        match index_type {
397            RecommendedIndex::Hnsw | RecommendedIndex::HnswSq => {
398                // HNSW parameters
399                params.hnsw_m = Some(match requirements.accuracy {
400                    AccuracyRequirement::Exact | AccuracyRequirement::VeryHigh => 32,
401                    AccuracyRequirement::High => 16,
402                    _ => 12,
403                });
404                params.hnsw_ef_construction = Some(params.hnsw_m.unwrap_or(16) * 10);
405                params.hnsw_ef_search = Some(match requirements.latency {
406                    LatencyRequirement::UltraLow => 50,
407                    LatencyRequirement::Low => 100,
408                    LatencyRequirement::Medium => 200,
409                    LatencyRequirement::Relaxed => 400,
410                });
411
412                if matches!(index_type, RecommendedIndex::HnswSq) {
413                    params.sq_bits = Some(8);
414                }
415            }
416            RecommendedIndex::Ivf | RecommendedIndex::IvfPq | RecommendedIndex::IvfSq => {
417                // IVF parameters
418                let num_clusters = ((n as f32).sqrt() as usize).clamp(16, 65536);
419                params.ivf_num_clusters = Some(num_clusters);
420                params.ivf_num_probes = Some(
421                    match requirements.accuracy {
422                        AccuracyRequirement::Exact | AccuracyRequirement::VeryHigh => {
423                            num_clusters / 4
424                        }
425                        AccuracyRequirement::High => num_clusters / 8,
426                        AccuracyRequirement::Moderate => num_clusters / 16,
427                        AccuracyRequirement::Relaxed => num_clusters / 32,
428                    }
429                    .max(1),
430                );
431
432                if matches!(index_type, RecommendedIndex::IvfPq) {
433                    // PQ parameters
434                    let num_subquantizers = (d / 4).clamp(1, 64);
435                    params.pq_num_subquantizers = Some(num_subquantizers);
436                    params.pq_bits_per_subquantizer = Some(8);
437                }
438
439                if matches!(index_type, RecommendedIndex::IvfSq) {
440                    params.sq_bits = Some(8);
441                }
442            }
443            RecommendedIndex::SpFresh => {
444                // SPFresh uses IVF-like clustering
445                let num_clusters = ((n as f32).sqrt() as usize).clamp(16, 4096);
446                params.ivf_num_clusters = Some(num_clusters);
447                params.ivf_num_probes = Some((num_clusters / 10).max(1));
448            }
449            RecommendedIndex::Flat => {
450                // No parameters needed for flat index
451            }
452        }
453
454        params
455    }
456
457    /// Quick recommendation based on just vector count and dimensions
458    pub fn quick_select(num_vectors: usize, dimensions: usize) -> RecommendedIndex {
459        let data = DataCharacteristics {
460            num_vectors,
461            dimensions,
462            frequent_updates: false,
463            frequent_deletes: false,
464        };
465        let requirements = IndexRequirements::default();
466        Self::select(&data, &requirements).primary
467    }
468}
469
470#[cfg(test)]
471mod tests {
472    use super::*;
473
474    #[test]
475    fn test_small_dataset() {
476        let data = DataCharacteristics {
477            num_vectors: 1000,
478            dimensions: 128,
479            frequent_updates: false,
480            frequent_deletes: false,
481        };
482        let requirements = IndexRequirements::default();
483
484        let rec = AutoIndexSelector::select(&data, &requirements);
485        assert!(matches!(
486            rec.primary,
487            RecommendedIndex::Hnsw | RecommendedIndex::Flat
488        ));
489    }
490
491    #[test]
492    fn test_large_dataset_memory_constrained() {
493        let data = DataCharacteristics {
494            num_vectors: 5_000_000,
495            dimensions: 768,
496            frequent_updates: false,
497            frequent_deletes: false,
498        };
499        let requirements = IndexRequirements {
500            memory: MemoryConstraint::Minimal,
501            latency: LatencyRequirement::Medium,
502            accuracy: AccuracyRequirement::Moderate,
503        };
504
505        let rec = AutoIndexSelector::select(&data, &requirements);
506        assert_eq!(rec.primary, RecommendedIndex::IvfPq);
507        assert!(rec.estimated_memory_bytes < data.num_vectors * data.dimensions * 4 / 10);
508    }
509
510    #[test]
511    fn test_streaming_workload() {
512        let data = DataCharacteristics {
513            num_vectors: 500_000,
514            dimensions: 256,
515            frequent_updates: true,
516            frequent_deletes: true,
517        };
518        let requirements = IndexRequirements {
519            memory: MemoryConstraint::Balanced,
520            latency: LatencyRequirement::Medium,
521            accuracy: AccuracyRequirement::High,
522        };
523
524        let rec = AutoIndexSelector::select(&data, &requirements);
525        assert_eq!(rec.primary, RecommendedIndex::SpFresh);
526    }
527
528    #[test]
529    fn test_exact_search() {
530        let data = DataCharacteristics {
531            num_vectors: 5000,
532            dimensions: 64,
533            frequent_updates: false,
534            frequent_deletes: false,
535        };
536        let requirements = IndexRequirements {
537            memory: MemoryConstraint::Unlimited,
538            latency: LatencyRequirement::Relaxed,
539            accuracy: AccuracyRequirement::Exact,
540        };
541
542        let rec = AutoIndexSelector::select(&data, &requirements);
543        assert_eq!(rec.primary, RecommendedIndex::Flat);
544        assert_eq!(rec.estimated_recall, 1.0);
545    }
546
547    #[test]
548    fn test_quick_select() {
549        // Small: HNSW or Flat
550        let small = AutoIndexSelector::quick_select(5000, 128);
551        assert!(matches!(
552            small,
553            RecommendedIndex::Hnsw | RecommendedIndex::Flat
554        ));
555
556        // Medium: HNSW
557        let medium = AutoIndexSelector::quick_select(50_000, 256);
558        assert!(matches!(
559            medium,
560            RecommendedIndex::Hnsw | RecommendedIndex::Ivf
561        ));
562
563        // Large: usually IVF-PQ or HNSW-SQ
564        let large = AutoIndexSelector::quick_select(2_000_000, 512);
565        assert!(matches!(
566            large,
567            RecommendedIndex::IvfPq | RecommendedIndex::HnswSq
568        ));
569    }
570
571    #[test]
572    fn test_suggested_params() {
573        let data = DataCharacteristics {
574            num_vectors: 100_000,
575            dimensions: 128,
576            frequent_updates: false,
577            frequent_deletes: false,
578        };
579        let requirements = IndexRequirements::default();
580
581        let rec = AutoIndexSelector::select(&data, &requirements);
582
583        // Should have suggested parameters
584        if matches!(rec.primary, RecommendedIndex::Hnsw) {
585            assert!(rec.suggested_params.hnsw_m.is_some());
586            assert!(rec.suggested_params.hnsw_ef_construction.is_some());
587            assert!(rec.suggested_params.hnsw_ef_search.is_some());
588        }
589    }
590}