vecstore/
autotuning.rs

1//! Auto-tuning for HNSW parameters
2//!
3//! This module automatically finds optimal HNSW parameters (M, ef_construction, ef_search)
4//! based on data characteristics and performance requirements.
5//!
6//! ## HNSW Parameters
7//!
8//! - **M**: Number of connections per node (typical: 8-64, default: 16)
9//!   - Higher M = better recall, more memory, slower construction
10//!   - Lower M = less memory, faster construction, lower recall
11//!
12//! - **ef_construction**: Size of dynamic candidate list during construction (typical: 100-500)
13//!   - Higher = better quality index, slower construction
14//!   - Lower = faster construction, lower quality
15//!
16//! - **ef_search**: Size of dynamic candidate list during search (typical: 50-500)
17//!   - Higher = better recall, slower search
18//!   - Lower = faster search, lower recall
19//!
20//! ## Auto-tuning Strategies
21//!
22//! 1. **Grid Search**: Exhaustive search over parameter space
23//! 2. **Heuristic-based**: Fast recommendations based on data size and constraints
24//! 3. **Benchmark-driven**: Measure actual recall/latency trade-offs
25//!
26//! ## Example
27//!
28//! ```no_run
29//! use vecstore::autotuning::{AutoTuner, TuningGoal, PerformanceConstraints};
30//!
31//! # fn main() -> anyhow::Result<()> {
32//! // Define performance goals
33//! let constraints = PerformanceConstraints {
34//!     min_recall: 0.95,        // At least 95% recall
35//!     max_latency_ms: 10.0,    // Under 10ms query time
36//!     max_memory_mb: 1000.0,   // Under 1GB memory
37//! };
38//!
39//! // Auto-tune
40//! let tuner = AutoTuner::new(vec_dimension, num_vectors);
41//! let params = tuner.tune_heuristic(TuningGoal::Balanced, Some(constraints))?;
42//!
43//! println!("Recommended M: {}", params.m);
44//! println!("Recommended ef_construction: {}", params.ef_construction);
45//! println!("Recommended ef_search: {}", params.ef_search);
46//! # Ok(())
47//! # }
48//! ```
49
50use serde::{Deserialize, Serialize};
51
52/// HNSW parameters
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct HnswParams {
55    /// Number of connections per node
56    pub m: usize,
57
58    /// Dynamic candidate list size during construction
59    pub ef_construction: usize,
60
61    /// Dynamic candidate list size during search
62    pub ef_search: usize,
63
64    /// Estimated recall (0.0 to 1.0)
65    pub estimated_recall: Option<f32>,
66
67    /// Estimated query latency (milliseconds)
68    pub estimated_latency_ms: Option<f32>,
69
70    /// Estimated memory usage (megabytes)
71    pub estimated_memory_mb: Option<f32>,
72}
73
74impl Default for HnswParams {
75    fn default() -> Self {
76        Self {
77            m: 16,
78            ef_construction: 200,
79            ef_search: 50,
80            estimated_recall: None,
81            estimated_latency_ms: None,
82            estimated_memory_mb: None,
83        }
84    }
85}
86
87/// Performance constraints for auto-tuning
88#[derive(Debug, Clone, Copy)]
89pub struct PerformanceConstraints {
90    /// Minimum acceptable recall (0.0 to 1.0)
91    pub min_recall: f32,
92
93    /// Maximum acceptable query latency (milliseconds)
94    pub max_latency_ms: f32,
95
96    /// Maximum acceptable memory usage (megabytes)
97    pub max_memory_mb: f32,
98}
99
100impl Default for PerformanceConstraints {
101    fn default() -> Self {
102        Self {
103            min_recall: 0.95,
104            max_latency_ms: 10.0,
105            max_memory_mb: f32::INFINITY,
106        }
107    }
108}
109
110/// Tuning goal
111#[derive(Debug, Clone, Copy, PartialEq)]
112pub enum TuningGoal {
113    /// Maximize recall (quality-focused)
114    MaxRecall,
115
116    /// Minimize latency (speed-focused)
117    MinLatency,
118
119    /// Minimize memory (efficiency-focused)
120    MinMemory,
121
122    /// Balance all metrics
123    Balanced,
124}
125
126/// Auto-tuner for HNSW parameters
127pub struct AutoTuner {
128    /// Vector dimension
129    dimension: usize,
130
131    /// Number of vectors in the dataset
132    num_vectors: usize,
133}
134
135impl AutoTuner {
136    /// Create a new auto-tuner
137    ///
138    /// # Arguments
139    ///
140    /// * `dimension` - Vector dimension
141    /// * `num_vectors` - Number of vectors in the dataset
142    pub fn new(dimension: usize, num_vectors: usize) -> Self {
143        Self {
144            dimension,
145            num_vectors,
146        }
147    }
148
149    /// Tune HNSW parameters using heuristics
150    ///
151    /// This is a fast method that uses rules-of-thumb based on dataset characteristics.
152    ///
153    /// # Arguments
154    ///
155    /// * `goal` - Optimization goal
156    /// * `constraints` - Optional performance constraints
157    ///
158    /// # Returns
159    ///
160    /// Recommended HNSW parameters
161    pub fn tune_heuristic(
162        &self,
163        goal: TuningGoal,
164        constraints: Option<PerformanceConstraints>,
165    ) -> anyhow::Result<HnswParams> {
166        let constraints = constraints.unwrap_or_default();
167
168        // Base parameters from goal
169        let mut params = match goal {
170            TuningGoal::MaxRecall => HnswParams {
171                m: 32,
172                ef_construction: 400,
173                ef_search: 200,
174                estimated_recall: Some(0.99),
175                estimated_latency_ms: None,
176                estimated_memory_mb: None,
177            },
178            TuningGoal::MinLatency => HnswParams {
179                m: 8,
180                ef_construction: 100,
181                ef_search: 16,
182                estimated_recall: Some(0.90),
183                estimated_latency_ms: None,
184                estimated_memory_mb: None,
185            },
186            TuningGoal::MinMemory => HnswParams {
187                m: 8,
188                ef_construction: 100,
189                ef_search: 32,
190                estimated_recall: Some(0.92),
191                estimated_latency_ms: None,
192                estimated_memory_mb: None,
193            },
194            TuningGoal::Balanced => HnswParams {
195                m: 16,
196                ef_construction: 200,
197                ef_search: 50,
198                estimated_recall: Some(0.95),
199                estimated_latency_ms: None,
200                estimated_memory_mb: None,
201            },
202        };
203
204        // Adjust based on dataset size
205        params = self.adjust_for_dataset_size(params);
206
207        // Apply constraints
208        params = self.apply_constraints(params, &constraints)?;
209
210        // Estimate performance metrics
211        params.estimated_memory_mb = Some(self.estimate_memory_mb(&params));
212        params.estimated_latency_ms = Some(self.estimate_latency_ms(&params));
213
214        Ok(params)
215    }
216
217    /// Adjust parameters based on dataset size
218    fn adjust_for_dataset_size(&self, mut params: HnswParams) -> HnswParams {
219        // For small datasets (<10k), can afford higher M and ef
220        if self.num_vectors < 10_000 {
221            params.m = (params.m as f32 * 1.5) as usize;
222            params.ef_construction = (params.ef_construction as f32 * 1.5) as usize;
223        }
224        // For very large datasets (>1M), reduce M and ef
225        else if self.num_vectors > 1_000_000 {
226            params.m = (params.m as f32 * 0.75) as usize;
227            params.ef_construction = (params.ef_construction as f32 * 0.75) as usize;
228        }
229
230        // Ensure M is in reasonable range
231        params.m = params.m.max(4).min(64);
232        params.ef_construction = params.ef_construction.max(50).min(500);
233        params.ef_search = params.ef_search.max(10).min(500);
234
235        params
236    }
237
238    /// Apply performance constraints
239    fn apply_constraints(
240        &self,
241        mut params: HnswParams,
242        constraints: &PerformanceConstraints,
243    ) -> anyhow::Result<HnswParams> {
244        // Adjust M to meet memory constraints
245        let mut memory_mb = self.estimate_memory_mb(&params);
246
247        while memory_mb > constraints.max_memory_mb && params.m > 4 {
248            params.m = (params.m as f32 * 0.8) as usize;
249            memory_mb = self.estimate_memory_mb(&params);
250        }
251
252        // Adjust ef_search to meet latency constraints
253        let mut latency_ms = self.estimate_latency_ms(&params);
254
255        while latency_ms > constraints.max_latency_ms && params.ef_search > 10 {
256            params.ef_search = (params.ef_search as f32 * 0.8) as usize;
257            latency_ms = self.estimate_latency_ms(&params);
258        }
259
260        // Adjust ef_construction and ef_search to meet recall constraints
261        if let Some(recall) = params.estimated_recall {
262            if recall < constraints.min_recall {
263                // Increase ef_search to improve recall
264                let recall_deficit = constraints.min_recall - recall;
265                let boost = 1.0 + recall_deficit * 2.0; // Heuristic
266                params.ef_search = (params.ef_search as f32 * boost) as usize;
267                params.ef_search = params.ef_search.min(500);
268
269                // Update estimated recall
270                params.estimated_recall =
271                    Some(params.estimated_recall.unwrap() + recall_deficit * 0.5);
272            }
273        }
274
275        Ok(params)
276    }
277
278    /// Estimate memory usage in megabytes
279    fn estimate_memory_mb(&self, params: &HnswParams) -> f32 {
280        // Memory = vector storage + graph structure
281        let vector_memory_mb = (self.num_vectors * self.dimension * 4) as f32 / 1_048_576.0; // 4 bytes per f32
282
283        // Graph structure: each node has M connections per layer
284        // Average layers ≈ log₂(N) / M
285        let avg_layers = (self.num_vectors as f32).log2() / params.m as f32;
286        let avg_layers = avg_layers.max(1.0);
287
288        // Each connection is ~4 bytes (ID) + metadata
289        let connections_per_node = params.m as f32 * avg_layers;
290        let graph_memory_mb = (self.num_vectors as f32 * connections_per_node * 8.0) / 1_048_576.0;
291
292        vector_memory_mb + graph_memory_mb
293    }
294
295    /// Estimate query latency in milliseconds
296    fn estimate_latency_ms(&self, params: &HnswParams) -> f32 {
297        // Latency depends on ef_search and dataset size
298        // Heuristic model based on typical HNSW performance
299
300        let base_latency = if self.num_vectors < 10_000 {
301            0.1 // 0.1ms for small datasets
302        } else if self.num_vectors < 100_000 {
303            0.5
304        } else if self.num_vectors < 1_000_000 {
305            1.0
306        } else {
307            2.0
308        };
309
310        // ef_search affects number of distance calculations
311        let ef_factor = (params.ef_search as f32 / 50.0).sqrt(); // Normalized to ef=50
312
313        // Dimension affects distance calculation cost
314        let dim_factor = (self.dimension as f32 / 128.0).sqrt(); // Normalized to dim=128
315
316        base_latency * ef_factor * dim_factor
317    }
318
319    /// Generate parameter recommendations
320    ///
321    /// Returns a set of recommended parameter configurations for different scenarios.
322    pub fn recommend_all(&self) -> Vec<(String, HnswParams)> {
323        vec![
324            (
325                "Fast (Low latency, ~90% recall)".to_string(),
326                self.tune_heuristic(TuningGoal::MinLatency, None).unwrap(),
327            ),
328            (
329                "Balanced (Good all-around, ~95% recall)".to_string(),
330                self.tune_heuristic(TuningGoal::Balanced, None).unwrap(),
331            ),
332            (
333                "Accurate (High recall, ~99% recall)".to_string(),
334                self.tune_heuristic(TuningGoal::MaxRecall, None).unwrap(),
335            ),
336            (
337                "Memory-efficient (Low memory, ~92% recall)".to_string(),
338                self.tune_heuristic(TuningGoal::MinMemory, None).unwrap(),
339            ),
340        ]
341    }
342
343    /// Explain parameter choices
344    pub fn explain_params(&self, params: &HnswParams) -> String {
345        let mut explanation = String::new();
346
347        explanation.push_str("HNSW Parameter Analysis:\n");
348        explanation.push_str("═══════════════════════════\n\n");
349
350        // M parameter
351        explanation.push_str(&format!("M = {} (connections per node)\n", params.m));
352        if params.m <= 8 {
353            explanation.push_str("  → Low M: Faster construction, less memory, lower recall\n");
354        } else if params.m <= 24 {
355            explanation.push_str("  → Medium M: Balanced performance\n");
356        } else {
357            explanation.push_str("  → High M: Better recall, more memory, slower construction\n");
358        }
359        explanation.push('\n');
360
361        // ef_construction
362        explanation.push_str(&format!("ef_construction = {}\n", params.ef_construction));
363        if params.ef_construction <= 100 {
364            explanation.push_str("  → Low ef: Fast index construction, lower quality\n");
365        } else if params.ef_construction <= 300 {
366            explanation.push_str("  → Medium ef: Balanced construction speed and quality\n");
367        } else {
368            explanation.push_str("  → High ef: Slower construction, higher quality index\n");
369        }
370        explanation.push('\n');
371
372        // ef_search
373        explanation.push_str(&format!("ef_search = {}\n", params.ef_search));
374        if params.ef_search <= 32 {
375            explanation.push_str("  → Low ef: Fast queries, lower recall\n");
376        } else if params.ef_search <= 100 {
377            explanation.push_str("  → Medium ef: Balanced query speed and recall\n");
378        } else {
379            explanation.push_str("  → High ef: Slower queries, higher recall\n");
380        }
381        explanation.push('\n');
382
383        // Estimates
384        if let Some(recall) = params.estimated_recall {
385            explanation.push_str(&format!("Estimated Recall: {:.1}%\n", recall * 100.0));
386        }
387
388        if let Some(latency) = params.estimated_latency_ms {
389            explanation.push_str(&format!("Estimated Latency: {:.2}ms per query\n", latency));
390        }
391
392        if let Some(memory) = params.estimated_memory_mb {
393            explanation.push_str(&format!("Estimated Memory: {:.1} MB\n", memory));
394        }
395
396        explanation.push('\n');
397        explanation.push_str("Trade-offs:\n");
398        explanation.push_str("  - Higher M → Better recall but more memory\n");
399        explanation
400            .push_str("  - Higher ef_construction → Better index quality but slower build\n");
401        explanation.push_str("  - Higher ef_search → Better recall but slower queries\n");
402
403        explanation
404    }
405}
406
407#[cfg(test)]
408mod tests {
409    use super::*;
410
411    #[test]
412    fn test_autotuner_heuristic() {
413        let tuner = AutoTuner::new(128, 100_000);
414
415        let params = tuner.tune_heuristic(TuningGoal::Balanced, None).unwrap();
416
417        assert!(params.m >= 4 && params.m <= 64);
418        assert!(params.ef_construction >= 50);
419        assert!(params.ef_search >= 10);
420        assert!(params.estimated_recall.is_some());
421        assert!(params.estimated_memory_mb.is_some());
422        assert!(params.estimated_latency_ms.is_some());
423    }
424
425    #[test]
426    fn test_tuning_goals() {
427        let tuner = AutoTuner::new(128, 100_000);
428
429        let fast = tuner.tune_heuristic(TuningGoal::MinLatency, None).unwrap();
430        let accurate = tuner.tune_heuristic(TuningGoal::MaxRecall, None).unwrap();
431
432        // Fast should have lower ef_search than accurate
433        assert!(fast.ef_search < accurate.ef_search);
434
435        // Accurate should have higher M
436        assert!(accurate.m >= fast.m);
437    }
438
439    #[test]
440    fn test_constraints() {
441        let tuner = AutoTuner::new(128, 100_000);
442
443        let constraints = PerformanceConstraints {
444            min_recall: 0.98,
445            max_latency_ms: 5.0,
446            max_memory_mb: 500.0,
447        };
448
449        let params = tuner
450            .tune_heuristic(TuningGoal::Balanced, Some(constraints))
451            .unwrap();
452
453        // Should respect constraints
454        if let Some(memory) = params.estimated_memory_mb {
455            assert!(memory <= constraints.max_memory_mb * 1.1); // Allow 10% margin
456        }
457    }
458
459    #[test]
460    fn test_dataset_size_adjustment() {
461        let small_tuner = AutoTuner::new(128, 1_000);
462        let large_tuner = AutoTuner::new(128, 2_000_000);
463
464        let small_params = small_tuner
465            .tune_heuristic(TuningGoal::Balanced, None)
466            .unwrap();
467        let large_params = large_tuner
468            .tune_heuristic(TuningGoal::Balanced, None)
469            .unwrap();
470
471        // Small datasets should get higher M
472        assert!(small_params.m >= large_params.m);
473    }
474
475    #[test]
476    fn test_recommend_all() {
477        let tuner = AutoTuner::new(128, 50_000);
478
479        let recommendations = tuner.recommend_all();
480
481        assert_eq!(recommendations.len(), 4);
482
483        // Each recommendation should have different parameters
484        let fast = &recommendations[0].1;
485        let balanced = &recommendations[1].1;
486        let accurate = &recommendations[2].1;
487
488        assert!(fast.ef_search < balanced.ef_search);
489        assert!(balanced.ef_search < accurate.ef_search);
490    }
491
492    #[test]
493    fn test_explain_params() {
494        let params = HnswParams {
495            m: 16,
496            ef_construction: 200,
497            ef_search: 50,
498            estimated_recall: Some(0.95),
499            estimated_latency_ms: Some(1.5),
500            estimated_memory_mb: Some(250.0),
501        };
502
503        let tuner = AutoTuner::new(128, 100_000);
504        let explanation = tuner.explain_params(&params);
505
506        assert!(explanation.contains("M = 16"));
507        assert!(explanation.contains("ef_construction = 200"));
508        assert!(explanation.contains("ef_search = 50"));
509        assert!(explanation.contains("95.0%"));
510    }
511
512    #[test]
513    fn test_memory_estimation() {
514        let tuner = AutoTuner::new(128, 100_000);
515        let params = HnswParams::default();
516
517        let memory = tuner.estimate_memory_mb(&params);
518
519        // Should be reasonable for 100k vectors x 128 dims
520        assert!(memory > 0.0);
521        assert!(memory < 10_000.0); // Sanity check
522    }
523
524    #[test]
525    fn test_latency_estimation() {
526        let tuner = AutoTuner::new(128, 100_000);
527        let params = HnswParams::default();
528
529        let latency = tuner.estimate_latency_ms(&params);
530
531        // Should be reasonable
532        assert!(latency > 0.0);
533        assert!(latency < 1000.0); // Sanity check
534    }
535}