ipfrs_semantic/
optimization.rs

1//! Index optimization utilities
2//!
3//! This module provides tools for optimizing index performance,
4//! including automatic parameter tuning, query optimization, and resource management.
5
6use std::time::Duration;
7
8/// Optimization goal for index tuning
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum OptimizationGoal {
11    /// Minimize query latency
12    MinimizeLatency,
13    /// Maximize recall/accuracy
14    MaximizeRecall,
15    /// Minimize memory usage
16    MinimizeMemory,
17    /// Balance between all factors
18    Balanced,
19}
20
21/// Result of optimization analysis
22#[derive(Debug, Clone)]
23pub struct OptimizationResult {
24    /// Current configuration quality score (0.0 - 1.0)
25    pub current_score: f32,
26    /// Recommended M parameter
27    pub recommended_m: usize,
28    /// Recommended ef_construction
29    pub recommended_ef_construction: usize,
30    /// Recommended ef_search
31    pub recommended_ef_search: usize,
32    /// Estimated improvement (0.0 - 1.0)
33    pub estimated_improvement: f32,
34    /// Reasoning for recommendations
35    pub reasoning: Vec<String>,
36}
37
38/// Analyze index and provide optimization recommendations
39pub fn analyze_optimization(
40    index_size: usize,
41    dimension: usize,
42    current_m: usize,
43    current_ef_construction: usize,
44    goal: OptimizationGoal,
45) -> OptimizationResult {
46    let mut reasoning = Vec::new();
47
48    // Compute recommended parameters based on goal
49    let (recommended_m, recommended_ef_construction, recommended_ef_search) = match goal {
50        OptimizationGoal::MinimizeLatency => {
51            reasoning
52                .push("Optimizing for low latency with reduced graph connectivity".to_string());
53            let m = if index_size < 10_000 {
54                12
55            } else if index_size < 100_000 {
56                16
57            } else {
58                20
59            };
60            (m, 150, 32)
61        }
62        OptimizationGoal::MaximizeRecall => {
63            reasoning
64                .push("Optimizing for high recall with increased graph connectivity".to_string());
65            let m = if index_size < 10_000 {
66                32
67            } else if index_size < 100_000 {
68                48
69            } else {
70                64
71            };
72            (m, 400, 200)
73        }
74        OptimizationGoal::MinimizeMemory => {
75            reasoning.push("Optimizing for low memory with minimal graph connectivity".to_string());
76            (8, 100, 50)
77        }
78        OptimizationGoal::Balanced => {
79            reasoning.push("Balanced optimization for general use cases".to_string());
80            let m = if index_size < 10_000 {
81                16
82            } else if index_size < 100_000 {
83                24
84            } else {
85                32
86            };
87            let ef_c = if index_size < 10_000 { 200 } else { 300 };
88            (m, ef_c, 100)
89        }
90    };
91
92    // Evaluate current configuration
93    let current_score =
94        evaluate_config_quality(current_m, current_ef_construction, index_size, goal);
95
96    // Evaluate recommended configuration
97    let recommended_score =
98        evaluate_config_quality(recommended_m, recommended_ef_construction, index_size, goal);
99
100    let estimated_improvement = (recommended_score - current_score).max(0.0);
101
102    // Add specific recommendations
103    if current_m < recommended_m {
104        reasoning.push(format!(
105            "Increase M from {} to {} for better connectivity",
106            current_m, recommended_m
107        ));
108    } else if current_m > recommended_m {
109        reasoning.push(format!(
110            "Decrease M from {} to {} to reduce memory usage",
111            current_m, recommended_m
112        ));
113    }
114
115    if dimension > 1024 {
116        reasoning
117            .push("High dimensionality detected. Consider dimensionality reduction.".to_string());
118    }
119
120    if index_size > 1_000_000 {
121        reasoning.push("Large index detected. Consider using DiskANN or partitioning.".to_string());
122    }
123
124    OptimizationResult {
125        current_score,
126        recommended_m,
127        recommended_ef_construction,
128        recommended_ef_search,
129        estimated_improvement,
130        reasoning,
131    }
132}
133
134/// Evaluate configuration quality for a given goal
135fn evaluate_config_quality(
136    m: usize,
137    ef_construction: usize,
138    index_size: usize,
139    goal: OptimizationGoal,
140) -> f32 {
141    let optimal_m = match index_size {
142        0..=10_000 => 16,
143        10_001..=100_000 => 24,
144        _ => 32,
145    };
146
147    let optimal_ef_c = match index_size {
148        0..=10_000 => 200,
149        10_001..=100_000 => 300,
150        _ => 400,
151    };
152
153    // Compute distance from optimal for this index size
154    let m_score = 1.0 - ((m as f32 - optimal_m as f32).abs() / optimal_m as f32).min(1.0);
155    let ef_score =
156        1.0 - ((ef_construction as f32 - optimal_ef_c as f32).abs() / optimal_ef_c as f32).min(1.0);
157
158    // Weight scores based on goal
159    match goal {
160        OptimizationGoal::MinimizeLatency => {
161            // Prefer lower M and ef_construction
162            let latency_penalty = (m as f32 / 32.0).min(1.0) * 0.5;
163            (m_score * 0.3 + ef_score * 0.7) * (1.0 - latency_penalty)
164        }
165        OptimizationGoal::MaximizeRecall => {
166            // Prefer higher M and ef_construction
167            let recall_bonus = (m as f32 / 64.0).min(1.0) * 0.3;
168            (m_score * 0.7 + ef_score * 0.3) * (1.0 + recall_bonus)
169        }
170        OptimizationGoal::MinimizeMemory => {
171            // Strongly prefer lower M
172            let memory_penalty = (m as f32 / 16.0).min(1.0) * 0.7;
173            m_score * (1.0 - memory_penalty)
174        }
175        OptimizationGoal::Balanced => {
176            // Equal weight
177            (m_score + ef_score) / 2.0
178        }
179    }
180}
181
182/// Query optimizer for adaptive ef_search selection
183pub struct QueryOptimizer {
184    /// Query performance history
185    latency_samples: Vec<Duration>,
186    /// Maximum samples to keep
187    max_samples: usize,
188    /// Current ef_search value
189    current_ef_search: usize,
190    /// Minimum ef_search
191    min_ef_search: usize,
192    /// Maximum ef_search
193    max_ef_search: usize,
194    /// Target latency
195    target_latency: Duration,
196}
197
198impl QueryOptimizer {
199    /// Create a new query optimizer
200    pub fn new(initial_ef_search: usize, target_latency: Duration) -> Self {
201        Self {
202            latency_samples: Vec::new(),
203            max_samples: 100,
204            current_ef_search: initial_ef_search,
205            min_ef_search: 16,
206            max_ef_search: 512,
207            target_latency,
208        }
209    }
210
211    /// Record a query latency and adjust ef_search if needed
212    pub fn record_query(&mut self, latency: Duration) {
213        self.latency_samples.push(latency);
214        if self.latency_samples.len() > self.max_samples {
215            self.latency_samples.remove(0);
216        }
217
218        // Adjust ef_search based on recent performance
219        if self.latency_samples.len() >= 10 {
220            self.adjust_ef_search();
221        }
222    }
223
224    /// Get current recommended ef_search
225    pub fn get_ef_search(&self) -> usize {
226        self.current_ef_search
227    }
228
229    /// Adjust ef_search based on observed latency
230    fn adjust_ef_search(&mut self) {
231        let avg_latency =
232            self.latency_samples.iter().sum::<Duration>() / self.latency_samples.len() as u32;
233
234        if avg_latency > self.target_latency {
235            // Too slow, decrease ef_search
236            self.current_ef_search = (self.current_ef_search * 9 / 10).max(self.min_ef_search);
237        } else if avg_latency < self.target_latency / 2 {
238            // Too fast, we can afford to increase ef_search for better recall
239            self.current_ef_search = (self.current_ef_search * 11 / 10).min(self.max_ef_search);
240        }
241    }
242
243    /// Reset optimizer state
244    pub fn reset(&mut self) {
245        self.latency_samples.clear();
246    }
247
248    /// Get average latency from recent queries
249    pub fn avg_latency(&self) -> Option<Duration> {
250        if self.latency_samples.is_empty() {
251            None
252        } else {
253            Some(self.latency_samples.iter().sum::<Duration>() / self.latency_samples.len() as u32)
254        }
255    }
256}
257
258/// Memory optimizer for managing index memory usage
259pub struct MemoryOptimizer {
260    /// Target memory budget in bytes
261    target_memory: usize,
262    /// Estimated memory per vector
263    memory_per_vector: usize,
264}
265
266impl MemoryOptimizer {
267    /// Create a new memory optimizer
268    pub fn new(target_memory: usize) -> Self {
269        Self {
270            target_memory,
271            memory_per_vector: 0,
272        }
273    }
274
275    /// Estimate memory usage for an index configuration
276    pub fn estimate_memory(&mut self, num_vectors: usize, dimension: usize, m: usize) -> usize {
277        // Vector storage: dimension * 4 bytes per f32
278        let vector_memory = num_vectors * dimension * 4;
279
280        // HNSW graph: approximately (M * 2) * num_vectors * 8 bytes for node IDs
281        let graph_memory = num_vectors * m * 2 * 8;
282
283        // Metadata overhead (mappings, etc.)
284        let overhead = num_vectors * 100;
285
286        let total = vector_memory + graph_memory + overhead;
287
288        if num_vectors > 0 {
289            self.memory_per_vector = total / num_vectors;
290        }
291
292        total
293    }
294
295    /// Check if adding more vectors would exceed budget
296    pub fn can_add_vectors(&self, num_new_vectors: usize) -> bool {
297        let estimated_additional = num_new_vectors * self.memory_per_vector;
298        estimated_additional <= self.target_memory
299    }
300
301    /// Get maximum vectors that can fit in budget
302    pub fn max_vectors(&self, dimension: usize, m: usize) -> usize {
303        if self.memory_per_vector == 0 {
304            // First estimate
305            let bytes_per_vector = dimension * 4 + m * 2 * 8 + 100;
306            self.target_memory / bytes_per_vector
307        } else {
308            self.target_memory / self.memory_per_vector
309        }
310    }
311
312    /// Recommend configuration for memory budget
313    pub fn recommend_config(&self, dimension: usize) -> (usize, usize, usize) {
314        // Try different M values to maximize vectors within budget
315        for m in [8, 12, 16, 24, 32, 48, 64].iter().rev() {
316            let bytes_per_vector = dimension * 4 + m * 2 * 8 + 100;
317            let max_vectors = self.target_memory / bytes_per_vector;
318
319            if max_vectors >= 1000 {
320                // Found a viable configuration
321                let ef_construction = if max_vectors < 10_000 {
322                    200
323                } else if max_vectors < 100_000 {
324                    300
325                } else {
326                    400
327                };
328                return (*m, ef_construction, max_vectors);
329            }
330        }
331
332        // Minimum configuration
333        (
334            8,
335            100,
336            self.target_memory / (dimension * 4 + 8 * 2 * 8 + 100),
337        )
338    }
339}
340
341#[cfg(test)]
342mod tests {
343    use super::*;
344
345    #[test]
346    fn test_analyze_optimization_latency() {
347        let result = analyze_optimization(10_000, 768, 16, 200, OptimizationGoal::MinimizeLatency);
348
349        assert!(result.recommended_m <= 16);
350        assert!(result.recommended_ef_search <= 50);
351        assert!(!result.reasoning.is_empty());
352    }
353
354    #[test]
355    fn test_analyze_optimization_recall() {
356        let result = analyze_optimization(10_000, 768, 16, 200, OptimizationGoal::MaximizeRecall);
357
358        assert!(result.recommended_m >= 16);
359        assert!(result.recommended_ef_construction >= 200);
360        assert!(!result.reasoning.is_empty());
361    }
362
363    #[test]
364    fn test_query_optimizer() {
365        let mut optimizer = QueryOptimizer::new(50, Duration::from_millis(10));
366
367        // Record some fast queries
368        for _ in 0..15 {
369            optimizer.record_query(Duration::from_millis(2));
370        }
371
372        // Should increase ef_search since we're under target
373        assert!(optimizer.get_ef_search() > 50);
374    }
375
376    #[test]
377    fn test_query_optimizer_slow_queries() {
378        let mut optimizer = QueryOptimizer::new(50, Duration::from_millis(10));
379
380        // Record some slow queries
381        for _ in 0..15 {
382            optimizer.record_query(Duration::from_millis(20));
383        }
384
385        // Should decrease ef_search since we're over target
386        assert!(optimizer.get_ef_search() < 50);
387    }
388
389    #[test]
390    fn test_memory_optimizer() {
391        let mut optimizer = MemoryOptimizer::new(1024 * 1024 * 1024); // 1GB
392
393        let memory = optimizer.estimate_memory(10_000, 768, 16);
394        assert!(memory > 0);
395        assert!(memory <= 1024 * 1024 * 1024);
396    }
397
398    #[test]
399    fn test_memory_optimizer_recommend() {
400        let optimizer = MemoryOptimizer::new(1024 * 1024 * 1024); // 1GB
401
402        let (m, ef_c, max_vecs) = optimizer.recommend_config(768);
403
404        assert!(m >= 8);
405        assert!(ef_c >= 100);
406        assert!(max_vecs > 0);
407    }
408}