oxirs_vec/hnsw/
adaptive_search.rs

1//! Adaptive ef_search parameter tuning for HNSW
2//!
3//! This module provides intelligent, data-driven optimization of the ef_search parameter
4//! to automatically balance search accuracy and latency based on query characteristics.
5
6use anyhow::Result;
7use parking_lot::RwLock;
8use std::collections::VecDeque;
9use std::sync::Arc;
10use std::time::Instant;
11
12/// Configuration for adaptive ef_search tuning
13#[derive(Debug, Clone)]
14pub struct AdaptiveSearchConfig {
15    /// Initial ef_search value
16    pub initial_ef_search: usize,
17    /// Minimum allowed ef_search
18    pub min_ef_search: usize,
19    /// Maximum allowed ef_search
20    pub max_ef_search: usize,
21    /// Target recall (0.0 to 1.0)
22    pub target_recall: f32,
23    /// Target latency in milliseconds
24    pub target_latency_ms: f64,
25    /// Tolerance for recall deviation
26    pub recall_tolerance: f32,
27    /// Tolerance for latency deviation (as fraction)
28    pub latency_tolerance: f64,
29    /// Number of queries to consider for adaptation
30    pub adaptation_window: usize,
31    /// Minimum queries before adaptation starts
32    pub min_queries_for_adaptation: usize,
33    /// Adaptation step size (multiplier for ef_search adjustment)
34    pub adaptation_rate: f32,
35    /// Enable aggressive optimization
36    pub aggressive_mode: bool,
37}
38
39impl Default for AdaptiveSearchConfig {
40    fn default() -> Self {
41        Self {
42            initial_ef_search: 64,
43            min_ef_search: 16,
44            max_ef_search: 512,
45            target_recall: 0.95,
46            target_latency_ms: 10.0,
47            recall_tolerance: 0.02,
48            latency_tolerance: 0.2,
49            adaptation_window: 100,
50            min_queries_for_adaptation: 20,
51            adaptation_rate: 0.1,
52            aggressive_mode: false,
53        }
54    }
55}
56
57/// Query performance metrics
58#[derive(Debug, Clone)]
59pub struct QueryMetrics {
60    /// Query execution time
61    pub latency_ms: f64,
62    /// Achieved recall (if ground truth available)
63    pub recall: Option<f32>,
64    /// Number of distance calculations performed
65    pub distance_computations: usize,
66    /// ef_search value used
67    pub ef_search_used: usize,
68    /// Query timestamp
69    pub timestamp: Instant,
70}
71
72/// Adaptive search statistics
73#[derive(Debug, Clone)]
74pub struct AdaptiveSearchStats {
75    /// Total queries processed
76    pub total_queries: usize,
77    /// Current ef_search value
78    pub current_ef_search: usize,
79    /// Average latency over window
80    pub avg_latency_ms: f64,
81    /// Average recall over window (if available)
82    pub avg_recall: Option<f32>,
83    /// Number of adaptations performed
84    pub adaptation_count: usize,
85    /// Last adaptation timestamp
86    pub last_adaptation: Option<Instant>,
87    /// Current performance score (0.0 to 1.0)
88    pub performance_score: f32,
89}
90
91/// Adaptive ef_search tuner
92pub struct AdaptiveSearchTuner {
93    config: AdaptiveSearchConfig,
94    current_ef_search: Arc<RwLock<usize>>,
95    query_history: Arc<RwLock<VecDeque<QueryMetrics>>>,
96    stats: Arc<RwLock<AdaptiveSearchStats>>,
97}
98
99impl AdaptiveSearchTuner {
100    /// Create a new adaptive search tuner
101    pub fn new(config: AdaptiveSearchConfig) -> Self {
102        let initial_ef = config.initial_ef_search;
103
104        Self {
105            config,
106            current_ef_search: Arc::new(RwLock::new(initial_ef)),
107            query_history: Arc::new(RwLock::new(VecDeque::new())),
108            stats: Arc::new(RwLock::new(AdaptiveSearchStats {
109                total_queries: 0,
110                current_ef_search: initial_ef,
111                avg_latency_ms: 0.0,
112                avg_recall: None,
113                adaptation_count: 0,
114                last_adaptation: None,
115                performance_score: 0.5,
116            })),
117        }
118    }
119
120    /// Get the current ef_search value to use
121    pub fn get_ef_search(&self) -> usize {
122        *self.current_ef_search.read()
123    }
124
125    /// Record query metrics and potentially adapt ef_search
126    pub fn record_query(&self, metrics: QueryMetrics) -> Result<()> {
127        let mut history = self.query_history.write();
128        let mut stats = self.stats.write();
129
130        // Add to history
131        history.push_back(metrics.clone());
132        stats.total_queries += 1;
133
134        // Trim history to window size
135        while history.len() > self.config.adaptation_window {
136            history.pop_front();
137        }
138
139        // Update statistics
140        self.update_statistics(&mut stats, &history);
141
142        // Attempt adaptation if enough data
143        if history.len() >= self.config.min_queries_for_adaptation {
144            // Pass mutable reference to avoid re-acquiring the lock
145            self.adapt_ef_search_internal(&mut stats, &history)?;
146        }
147
148        Ok(())
149    }
150
151    /// Update running statistics
152    fn update_statistics(&self, stats: &mut AdaptiveSearchStats, history: &VecDeque<QueryMetrics>) {
153        if history.is_empty() {
154            return;
155        }
156
157        // Calculate average latency
158        let sum_latency: f64 = history.iter().map(|m| m.latency_ms).sum();
159        stats.avg_latency_ms = sum_latency / history.len() as f64;
160
161        // Calculate average recall if available
162        let recalls: Vec<f32> = history.iter().filter_map(|m| m.recall).collect();
163
164        if !recalls.is_empty() {
165            let sum_recall: f32 = recalls.iter().sum();
166            stats.avg_recall = Some(sum_recall / recalls.len() as f32);
167        }
168
169        // Calculate performance score (weighted combination of recall and latency)
170        let recall_score = stats.avg_recall.unwrap_or(0.8); // Default to 0.8 if unknown
171        let latency_ratio = self.config.target_latency_ms / stats.avg_latency_ms.max(0.001);
172        let latency_score = latency_ratio.min(1.0);
173
174        // Weight recall more heavily (70/30 split)
175        stats.performance_score = (0.7 * recall_score + 0.3 * latency_score as f32).min(1.0);
176
177        stats.current_ef_search = *self.current_ef_search.read();
178    }
179
180    /// Adapt ef_search based on performance (internal method with mutable refs)
181    fn adapt_ef_search_internal(
182        &self,
183        stats: &mut AdaptiveSearchStats,
184        _history: &VecDeque<QueryMetrics>,
185    ) -> Result<()> {
186        let mut current_ef = self.current_ef_search.write();
187
188        let avg_latency = stats.avg_latency_ms;
189        let avg_recall = stats.avg_recall;
190
191        // Determine if we need to adjust
192        let recall_too_low = avg_recall
193            .is_some_and(|r| r < self.config.target_recall - self.config.recall_tolerance);
194
195        let recall_sufficient = match avg_recall {
196            Some(r) => r >= self.config.target_recall,
197            None => true,
198        };
199
200        let latency_too_high =
201            avg_latency > self.config.target_latency_ms * (1.0 + self.config.latency_tolerance);
202        let latency_acceptable = avg_latency <= self.config.target_latency_ms;
203
204        // Decision logic
205        let new_ef = if recall_too_low {
206            // Recall is too low, increase ef_search
207            let increase = if self.config.aggressive_mode {
208                (*current_ef as f32 * (1.0 + 2.0 * self.config.adaptation_rate)) as usize
209            } else {
210                (*current_ef as f32 * (1.0 + self.config.adaptation_rate)) as usize
211            };
212            increase.min(self.config.max_ef_search)
213        } else if recall_sufficient && latency_too_high {
214            // Recall is good but latency is too high, decrease ef_search
215            let decrease = if self.config.aggressive_mode {
216                (*current_ef as f32 * (1.0 - 2.0 * self.config.adaptation_rate)) as usize
217            } else {
218                (*current_ef as f32 * (1.0 - self.config.adaptation_rate)) as usize
219            };
220            decrease.max(self.config.min_ef_search)
221        } else if recall_sufficient && latency_acceptable {
222            // Both metrics are good, try to reduce ef_search slightly for better performance
223            let decrease =
224                (*current_ef as f32 * (1.0 - 0.5 * self.config.adaptation_rate)) as usize;
225            decrease.max(self.config.min_ef_search)
226        } else {
227            // No change needed
228            *current_ef
229        };
230
231        // Apply change if significant
232        if new_ef != *current_ef {
233            tracing::debug!(
234                "Adapting ef_search: {} -> {} (recall: {:?}, latency: {:.2}ms)",
235                *current_ef,
236                new_ef,
237                avg_recall,
238                avg_latency
239            );
240
241            *current_ef = new_ef;
242            stats.current_ef_search = new_ef;
243            stats.adaptation_count += 1;
244            stats.last_adaptation = Some(Instant::now());
245        }
246
247        Ok(())
248    }
249
250    /// Get current statistics
251    pub fn stats(&self) -> AdaptiveSearchStats {
252        self.stats.read().clone()
253    }
254
255    /// Reset adaptation (useful for testing or mode changes)
256    pub fn reset(&self) {
257        let mut ef_search = self.current_ef_search.write();
258        *ef_search = self.config.initial_ef_search;
259
260        let mut history = self.query_history.write();
261        history.clear();
262
263        let mut stats = self.stats.write();
264        *stats = AdaptiveSearchStats {
265            total_queries: 0,
266            current_ef_search: self.config.initial_ef_search,
267            avg_latency_ms: 0.0,
268            avg_recall: None,
269            adaptation_count: 0,
270            last_adaptation: None,
271            performance_score: 0.5,
272        };
273    }
274
275    /// Manually set ef_search (overrides adaptation)
276    pub fn set_ef_search(&self, ef_search: usize) {
277        let mut current_ef = self.current_ef_search.write();
278        *current_ef = ef_search.clamp(self.config.min_ef_search, self.config.max_ef_search);
279
280        let mut stats = self.stats.write();
281        stats.current_ef_search = *current_ef;
282    }
283
284    /// Get configuration
285    pub fn config(&self) -> &AdaptiveSearchConfig {
286        &self.config
287    }
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293
294    #[test]
295    fn test_adaptive_search_creation() {
296        let config = AdaptiveSearchConfig::default();
297        let tuner = AdaptiveSearchTuner::new(config);
298        assert_eq!(tuner.get_ef_search(), 64);
299    }
300
301    #[test]
302    fn test_record_query() {
303        let tuner = AdaptiveSearchTuner::new(AdaptiveSearchConfig::default());
304
305        let metrics = QueryMetrics {
306            latency_ms: 5.0,
307            recall: Some(0.95),
308            distance_computations: 100,
309            ef_search_used: 64,
310            timestamp: Instant::now(),
311        };
312
313        assert!(tuner.record_query(metrics).is_ok());
314
315        let stats = tuner.stats();
316        assert_eq!(stats.total_queries, 1);
317    }
318
319    #[test]
320    fn test_adaptation_on_low_recall() {
321        let config = AdaptiveSearchConfig {
322            min_queries_for_adaptation: 5,
323            target_recall: 0.95,
324            initial_ef_search: 32,
325            ..Default::default()
326        };
327
328        let tuner = AdaptiveSearchTuner::new(config);
329
330        // Record queries with low recall
331        for _ in 0..10 {
332            let metrics = QueryMetrics {
333                latency_ms: 2.0,
334                recall: Some(0.80), // Below target
335                distance_computations: 50,
336                ef_search_used: 32,
337                timestamp: Instant::now(),
338            };
339            tuner.record_query(metrics).unwrap();
340        }
341
342        // ef_search should have increased
343        assert!(tuner.get_ef_search() > 32);
344    }
345
346    #[test]
347    fn test_adaptation_on_high_latency() {
348        let config = AdaptiveSearchConfig {
349            min_queries_for_adaptation: 5,
350            target_latency_ms: 5.0,
351            target_recall: 0.90,
352            initial_ef_search: 128,
353            ..Default::default()
354        };
355
356        let tuner = AdaptiveSearchTuner::new(config);
357
358        // Record queries with high latency but good recall
359        for _ in 0..10 {
360            let metrics = QueryMetrics {
361                latency_ms: 15.0,   // Well above target
362                recall: Some(0.98), // Good recall
363                distance_computations: 200,
364                ef_search_used: 128,
365                timestamp: Instant::now(),
366            };
367            tuner.record_query(metrics).unwrap();
368        }
369
370        // ef_search should have decreased
371        assert!(tuner.get_ef_search() < 128);
372    }
373
374    #[test]
375    fn test_manual_override() {
376        let tuner = AdaptiveSearchTuner::new(AdaptiveSearchConfig::default());
377        tuner.set_ef_search(200);
378        assert_eq!(tuner.get_ef_search(), 200);
379    }
380
381    #[test]
382    fn test_reset() {
383        let config = AdaptiveSearchConfig::default();
384        let initial_ef = config.initial_ef_search;
385        let tuner = AdaptiveSearchTuner::new(config);
386
387        tuner.set_ef_search(200);
388        tuner.reset();
389
390        assert_eq!(tuner.get_ef_search(), initial_ef);
391        assert_eq!(tuner.stats().total_queries, 0);
392    }
393}