1use serde::{Deserialize, Serialize};
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct HnswParams {
55 pub m: usize,
57
58 pub ef_construction: usize,
60
61 pub ef_search: usize,
63
64 pub estimated_recall: Option<f32>,
66
67 pub estimated_latency_ms: Option<f32>,
69
70 pub estimated_memory_mb: Option<f32>,
72}
73
74impl Default for HnswParams {
75 fn default() -> Self {
76 Self {
77 m: 16,
78 ef_construction: 200,
79 ef_search: 50,
80 estimated_recall: None,
81 estimated_latency_ms: None,
82 estimated_memory_mb: None,
83 }
84 }
85}
86
87#[derive(Debug, Clone, Copy)]
89pub struct PerformanceConstraints {
90 pub min_recall: f32,
92
93 pub max_latency_ms: f32,
95
96 pub max_memory_mb: f32,
98}
99
100impl Default for PerformanceConstraints {
101 fn default() -> Self {
102 Self {
103 min_recall: 0.95,
104 max_latency_ms: 10.0,
105 max_memory_mb: f32::INFINITY,
106 }
107 }
108}
109
110#[derive(Debug, Clone, Copy, PartialEq)]
112pub enum TuningGoal {
113 MaxRecall,
115
116 MinLatency,
118
119 MinMemory,
121
122 Balanced,
124}
125
126pub struct AutoTuner {
128 dimension: usize,
130
131 num_vectors: usize,
133}
134
135impl AutoTuner {
136 pub fn new(dimension: usize, num_vectors: usize) -> Self {
143 Self {
144 dimension,
145 num_vectors,
146 }
147 }
148
149 pub fn tune_heuristic(
162 &self,
163 goal: TuningGoal,
164 constraints: Option<PerformanceConstraints>,
165 ) -> anyhow::Result<HnswParams> {
166 let constraints = constraints.unwrap_or_default();
167
168 let mut params = match goal {
170 TuningGoal::MaxRecall => HnswParams {
171 m: 32,
172 ef_construction: 400,
173 ef_search: 200,
174 estimated_recall: Some(0.99),
175 estimated_latency_ms: None,
176 estimated_memory_mb: None,
177 },
178 TuningGoal::MinLatency => HnswParams {
179 m: 8,
180 ef_construction: 100,
181 ef_search: 16,
182 estimated_recall: Some(0.90),
183 estimated_latency_ms: None,
184 estimated_memory_mb: None,
185 },
186 TuningGoal::MinMemory => HnswParams {
187 m: 8,
188 ef_construction: 100,
189 ef_search: 32,
190 estimated_recall: Some(0.92),
191 estimated_latency_ms: None,
192 estimated_memory_mb: None,
193 },
194 TuningGoal::Balanced => HnswParams {
195 m: 16,
196 ef_construction: 200,
197 ef_search: 50,
198 estimated_recall: Some(0.95),
199 estimated_latency_ms: None,
200 estimated_memory_mb: None,
201 },
202 };
203
204 params = self.adjust_for_dataset_size(params);
206
207 params = self.apply_constraints(params, &constraints)?;
209
210 params.estimated_memory_mb = Some(self.estimate_memory_mb(¶ms));
212 params.estimated_latency_ms = Some(self.estimate_latency_ms(¶ms));
213
214 Ok(params)
215 }
216
217 fn adjust_for_dataset_size(&self, mut params: HnswParams) -> HnswParams {
219 if self.num_vectors < 10_000 {
221 params.m = (params.m as f32 * 1.5) as usize;
222 params.ef_construction = (params.ef_construction as f32 * 1.5) as usize;
223 }
224 else if self.num_vectors > 1_000_000 {
226 params.m = (params.m as f32 * 0.75) as usize;
227 params.ef_construction = (params.ef_construction as f32 * 0.75) as usize;
228 }
229
230 params.m = params.m.max(4).min(64);
232 params.ef_construction = params.ef_construction.max(50).min(500);
233 params.ef_search = params.ef_search.max(10).min(500);
234
235 params
236 }
237
238 fn apply_constraints(
240 &self,
241 mut params: HnswParams,
242 constraints: &PerformanceConstraints,
243 ) -> anyhow::Result<HnswParams> {
244 let mut memory_mb = self.estimate_memory_mb(¶ms);
246
247 while memory_mb > constraints.max_memory_mb && params.m > 4 {
248 params.m = (params.m as f32 * 0.8) as usize;
249 memory_mb = self.estimate_memory_mb(¶ms);
250 }
251
252 let mut latency_ms = self.estimate_latency_ms(¶ms);
254
255 while latency_ms > constraints.max_latency_ms && params.ef_search > 10 {
256 params.ef_search = (params.ef_search as f32 * 0.8) as usize;
257 latency_ms = self.estimate_latency_ms(¶ms);
258 }
259
260 if let Some(recall) = params.estimated_recall {
262 if recall < constraints.min_recall {
263 let recall_deficit = constraints.min_recall - recall;
265 let boost = 1.0 + recall_deficit * 2.0; params.ef_search = (params.ef_search as f32 * boost) as usize;
267 params.ef_search = params.ef_search.min(500);
268
269 params.estimated_recall =
271 Some(params.estimated_recall.unwrap() + recall_deficit * 0.5);
272 }
273 }
274
275 Ok(params)
276 }
277
278 fn estimate_memory_mb(&self, params: &HnswParams) -> f32 {
280 let vector_memory_mb = (self.num_vectors * self.dimension * 4) as f32 / 1_048_576.0; let avg_layers = (self.num_vectors as f32).log2() / params.m as f32;
286 let avg_layers = avg_layers.max(1.0);
287
288 let connections_per_node = params.m as f32 * avg_layers;
290 let graph_memory_mb = (self.num_vectors as f32 * connections_per_node * 8.0) / 1_048_576.0;
291
292 vector_memory_mb + graph_memory_mb
293 }
294
295 fn estimate_latency_ms(&self, params: &HnswParams) -> f32 {
297 let base_latency = if self.num_vectors < 10_000 {
301 0.1 } else if self.num_vectors < 100_000 {
303 0.5
304 } else if self.num_vectors < 1_000_000 {
305 1.0
306 } else {
307 2.0
308 };
309
310 let ef_factor = (params.ef_search as f32 / 50.0).sqrt(); let dim_factor = (self.dimension as f32 / 128.0).sqrt(); base_latency * ef_factor * dim_factor
317 }
318
319 pub fn recommend_all(&self) -> Vec<(String, HnswParams)> {
323 vec![
324 (
325 "Fast (Low latency, ~90% recall)".to_string(),
326 self.tune_heuristic(TuningGoal::MinLatency, None).unwrap(),
327 ),
328 (
329 "Balanced (Good all-around, ~95% recall)".to_string(),
330 self.tune_heuristic(TuningGoal::Balanced, None).unwrap(),
331 ),
332 (
333 "Accurate (High recall, ~99% recall)".to_string(),
334 self.tune_heuristic(TuningGoal::MaxRecall, None).unwrap(),
335 ),
336 (
337 "Memory-efficient (Low memory, ~92% recall)".to_string(),
338 self.tune_heuristic(TuningGoal::MinMemory, None).unwrap(),
339 ),
340 ]
341 }
342
343 pub fn explain_params(&self, params: &HnswParams) -> String {
345 let mut explanation = String::new();
346
347 explanation.push_str("HNSW Parameter Analysis:\n");
348 explanation.push_str("═══════════════════════════\n\n");
349
350 explanation.push_str(&format!("M = {} (connections per node)\n", params.m));
352 if params.m <= 8 {
353 explanation.push_str(" → Low M: Faster construction, less memory, lower recall\n");
354 } else if params.m <= 24 {
355 explanation.push_str(" → Medium M: Balanced performance\n");
356 } else {
357 explanation.push_str(" → High M: Better recall, more memory, slower construction\n");
358 }
359 explanation.push('\n');
360
361 explanation.push_str(&format!("ef_construction = {}\n", params.ef_construction));
363 if params.ef_construction <= 100 {
364 explanation.push_str(" → Low ef: Fast index construction, lower quality\n");
365 } else if params.ef_construction <= 300 {
366 explanation.push_str(" → Medium ef: Balanced construction speed and quality\n");
367 } else {
368 explanation.push_str(" → High ef: Slower construction, higher quality index\n");
369 }
370 explanation.push('\n');
371
372 explanation.push_str(&format!("ef_search = {}\n", params.ef_search));
374 if params.ef_search <= 32 {
375 explanation.push_str(" → Low ef: Fast queries, lower recall\n");
376 } else if params.ef_search <= 100 {
377 explanation.push_str(" → Medium ef: Balanced query speed and recall\n");
378 } else {
379 explanation.push_str(" → High ef: Slower queries, higher recall\n");
380 }
381 explanation.push('\n');
382
383 if let Some(recall) = params.estimated_recall {
385 explanation.push_str(&format!("Estimated Recall: {:.1}%\n", recall * 100.0));
386 }
387
388 if let Some(latency) = params.estimated_latency_ms {
389 explanation.push_str(&format!("Estimated Latency: {:.2}ms per query\n", latency));
390 }
391
392 if let Some(memory) = params.estimated_memory_mb {
393 explanation.push_str(&format!("Estimated Memory: {:.1} MB\n", memory));
394 }
395
396 explanation.push('\n');
397 explanation.push_str("Trade-offs:\n");
398 explanation.push_str(" - Higher M → Better recall but more memory\n");
399 explanation
400 .push_str(" - Higher ef_construction → Better index quality but slower build\n");
401 explanation.push_str(" - Higher ef_search → Better recall but slower queries\n");
402
403 explanation
404 }
405}
406
407#[cfg(test)]
408mod tests {
409 use super::*;
410
411 #[test]
412 fn test_autotuner_heuristic() {
413 let tuner = AutoTuner::new(128, 100_000);
414
415 let params = tuner.tune_heuristic(TuningGoal::Balanced, None).unwrap();
416
417 assert!(params.m >= 4 && params.m <= 64);
418 assert!(params.ef_construction >= 50);
419 assert!(params.ef_search >= 10);
420 assert!(params.estimated_recall.is_some());
421 assert!(params.estimated_memory_mb.is_some());
422 assert!(params.estimated_latency_ms.is_some());
423 }
424
425 #[test]
426 fn test_tuning_goals() {
427 let tuner = AutoTuner::new(128, 100_000);
428
429 let fast = tuner.tune_heuristic(TuningGoal::MinLatency, None).unwrap();
430 let accurate = tuner.tune_heuristic(TuningGoal::MaxRecall, None).unwrap();
431
432 assert!(fast.ef_search < accurate.ef_search);
434
435 assert!(accurate.m >= fast.m);
437 }
438
439 #[test]
440 fn test_constraints() {
441 let tuner = AutoTuner::new(128, 100_000);
442
443 let constraints = PerformanceConstraints {
444 min_recall: 0.98,
445 max_latency_ms: 5.0,
446 max_memory_mb: 500.0,
447 };
448
449 let params = tuner
450 .tune_heuristic(TuningGoal::Balanced, Some(constraints))
451 .unwrap();
452
453 if let Some(memory) = params.estimated_memory_mb {
455 assert!(memory <= constraints.max_memory_mb * 1.1); }
457 }
458
459 #[test]
460 fn test_dataset_size_adjustment() {
461 let small_tuner = AutoTuner::new(128, 1_000);
462 let large_tuner = AutoTuner::new(128, 2_000_000);
463
464 let small_params = small_tuner
465 .tune_heuristic(TuningGoal::Balanced, None)
466 .unwrap();
467 let large_params = large_tuner
468 .tune_heuristic(TuningGoal::Balanced, None)
469 .unwrap();
470
471 assert!(small_params.m >= large_params.m);
473 }
474
475 #[test]
476 fn test_recommend_all() {
477 let tuner = AutoTuner::new(128, 50_000);
478
479 let recommendations = tuner.recommend_all();
480
481 assert_eq!(recommendations.len(), 4);
482
483 let fast = &recommendations[0].1;
485 let balanced = &recommendations[1].1;
486 let accurate = &recommendations[2].1;
487
488 assert!(fast.ef_search < balanced.ef_search);
489 assert!(balanced.ef_search < accurate.ef_search);
490 }
491
492 #[test]
493 fn test_explain_params() {
494 let params = HnswParams {
495 m: 16,
496 ef_construction: 200,
497 ef_search: 50,
498 estimated_recall: Some(0.95),
499 estimated_latency_ms: Some(1.5),
500 estimated_memory_mb: Some(250.0),
501 };
502
503 let tuner = AutoTuner::new(128, 100_000);
504 let explanation = tuner.explain_params(¶ms);
505
506 assert!(explanation.contains("M = 16"));
507 assert!(explanation.contains("ef_construction = 200"));
508 assert!(explanation.contains("ef_search = 50"));
509 assert!(explanation.contains("95.0%"));
510 }
511
512 #[test]
513 fn test_memory_estimation() {
514 let tuner = AutoTuner::new(128, 100_000);
515 let params = HnswParams::default();
516
517 let memory = tuner.estimate_memory_mb(¶ms);
518
519 assert!(memory > 0.0);
521 assert!(memory < 10_000.0); }
523
524 #[test]
525 fn test_latency_estimation() {
526 let tuner = AutoTuner::new(128, 100_000);
527 let params = HnswParams::default();
528
529 let latency = tuner.estimate_latency_ms(¶ms);
530
531 assert!(latency > 0.0);
533 assert!(latency < 1000.0); }
535}