1use std::time::Duration;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum OptimizationGoal {
11 MinimizeLatency,
13 MaximizeRecall,
15 MinimizeMemory,
17 Balanced,
19}
20
21#[derive(Debug, Clone)]
23pub struct OptimizationResult {
24 pub current_score: f32,
26 pub recommended_m: usize,
28 pub recommended_ef_construction: usize,
30 pub recommended_ef_search: usize,
32 pub estimated_improvement: f32,
34 pub reasoning: Vec<String>,
36}
37
38pub fn analyze_optimization(
40 index_size: usize,
41 dimension: usize,
42 current_m: usize,
43 current_ef_construction: usize,
44 goal: OptimizationGoal,
45) -> OptimizationResult {
46 let mut reasoning = Vec::new();
47
48 let (recommended_m, recommended_ef_construction, recommended_ef_search) = match goal {
50 OptimizationGoal::MinimizeLatency => {
51 reasoning
52 .push("Optimizing for low latency with reduced graph connectivity".to_string());
53 let m = if index_size < 10_000 {
54 12
55 } else if index_size < 100_000 {
56 16
57 } else {
58 20
59 };
60 (m, 150, 32)
61 }
62 OptimizationGoal::MaximizeRecall => {
63 reasoning
64 .push("Optimizing for high recall with increased graph connectivity".to_string());
65 let m = if index_size < 10_000 {
66 32
67 } else if index_size < 100_000 {
68 48
69 } else {
70 64
71 };
72 (m, 400, 200)
73 }
74 OptimizationGoal::MinimizeMemory => {
75 reasoning.push("Optimizing for low memory with minimal graph connectivity".to_string());
76 (8, 100, 50)
77 }
78 OptimizationGoal::Balanced => {
79 reasoning.push("Balanced optimization for general use cases".to_string());
80 let m = if index_size < 10_000 {
81 16
82 } else if index_size < 100_000 {
83 24
84 } else {
85 32
86 };
87 let ef_c = if index_size < 10_000 { 200 } else { 300 };
88 (m, ef_c, 100)
89 }
90 };
91
92 let current_score =
94 evaluate_config_quality(current_m, current_ef_construction, index_size, goal);
95
96 let recommended_score =
98 evaluate_config_quality(recommended_m, recommended_ef_construction, index_size, goal);
99
100 let estimated_improvement = (recommended_score - current_score).max(0.0);
101
102 if current_m < recommended_m {
104 reasoning.push(format!(
105 "Increase M from {} to {} for better connectivity",
106 current_m, recommended_m
107 ));
108 } else if current_m > recommended_m {
109 reasoning.push(format!(
110 "Decrease M from {} to {} to reduce memory usage",
111 current_m, recommended_m
112 ));
113 }
114
115 if dimension > 1024 {
116 reasoning
117 .push("High dimensionality detected. Consider dimensionality reduction.".to_string());
118 }
119
120 if index_size > 1_000_000 {
121 reasoning.push("Large index detected. Consider using DiskANN or partitioning.".to_string());
122 }
123
124 OptimizationResult {
125 current_score,
126 recommended_m,
127 recommended_ef_construction,
128 recommended_ef_search,
129 estimated_improvement,
130 reasoning,
131 }
132}
133
134fn evaluate_config_quality(
136 m: usize,
137 ef_construction: usize,
138 index_size: usize,
139 goal: OptimizationGoal,
140) -> f32 {
141 let optimal_m = match index_size {
142 0..=10_000 => 16,
143 10_001..=100_000 => 24,
144 _ => 32,
145 };
146
147 let optimal_ef_c = match index_size {
148 0..=10_000 => 200,
149 10_001..=100_000 => 300,
150 _ => 400,
151 };
152
153 let m_score = 1.0 - ((m as f32 - optimal_m as f32).abs() / optimal_m as f32).min(1.0);
155 let ef_score =
156 1.0 - ((ef_construction as f32 - optimal_ef_c as f32).abs() / optimal_ef_c as f32).min(1.0);
157
158 match goal {
160 OptimizationGoal::MinimizeLatency => {
161 let latency_penalty = (m as f32 / 32.0).min(1.0) * 0.5;
163 (m_score * 0.3 + ef_score * 0.7) * (1.0 - latency_penalty)
164 }
165 OptimizationGoal::MaximizeRecall => {
166 let recall_bonus = (m as f32 / 64.0).min(1.0) * 0.3;
168 (m_score * 0.7 + ef_score * 0.3) * (1.0 + recall_bonus)
169 }
170 OptimizationGoal::MinimizeMemory => {
171 let memory_penalty = (m as f32 / 16.0).min(1.0) * 0.7;
173 m_score * (1.0 - memory_penalty)
174 }
175 OptimizationGoal::Balanced => {
176 (m_score + ef_score) / 2.0
178 }
179 }
180}
181
182pub struct QueryOptimizer {
184 latency_samples: Vec<Duration>,
186 max_samples: usize,
188 current_ef_search: usize,
190 min_ef_search: usize,
192 max_ef_search: usize,
194 target_latency: Duration,
196}
197
198impl QueryOptimizer {
199 pub fn new(initial_ef_search: usize, target_latency: Duration) -> Self {
201 Self {
202 latency_samples: Vec::new(),
203 max_samples: 100,
204 current_ef_search: initial_ef_search,
205 min_ef_search: 16,
206 max_ef_search: 512,
207 target_latency,
208 }
209 }
210
211 pub fn record_query(&mut self, latency: Duration) {
213 self.latency_samples.push(latency);
214 if self.latency_samples.len() > self.max_samples {
215 self.latency_samples.remove(0);
216 }
217
218 if self.latency_samples.len() >= 10 {
220 self.adjust_ef_search();
221 }
222 }
223
224 pub fn get_ef_search(&self) -> usize {
226 self.current_ef_search
227 }
228
229 fn adjust_ef_search(&mut self) {
231 let avg_latency =
232 self.latency_samples.iter().sum::<Duration>() / self.latency_samples.len() as u32;
233
234 if avg_latency > self.target_latency {
235 self.current_ef_search = (self.current_ef_search * 9 / 10).max(self.min_ef_search);
237 } else if avg_latency < self.target_latency / 2 {
238 self.current_ef_search = (self.current_ef_search * 11 / 10).min(self.max_ef_search);
240 }
241 }
242
243 pub fn reset(&mut self) {
245 self.latency_samples.clear();
246 }
247
248 pub fn avg_latency(&self) -> Option<Duration> {
250 if self.latency_samples.is_empty() {
251 None
252 } else {
253 Some(self.latency_samples.iter().sum::<Duration>() / self.latency_samples.len() as u32)
254 }
255 }
256}
257
258pub struct MemoryOptimizer {
260 target_memory: usize,
262 memory_per_vector: usize,
264}
265
266impl MemoryOptimizer {
267 pub fn new(target_memory: usize) -> Self {
269 Self {
270 target_memory,
271 memory_per_vector: 0,
272 }
273 }
274
275 pub fn estimate_memory(&mut self, num_vectors: usize, dimension: usize, m: usize) -> usize {
277 let vector_memory = num_vectors * dimension * 4;
279
280 let graph_memory = num_vectors * m * 2 * 8;
282
283 let overhead = num_vectors * 100;
285
286 let total = vector_memory + graph_memory + overhead;
287
288 if num_vectors > 0 {
289 self.memory_per_vector = total / num_vectors;
290 }
291
292 total
293 }
294
295 pub fn can_add_vectors(&self, num_new_vectors: usize) -> bool {
297 let estimated_additional = num_new_vectors * self.memory_per_vector;
298 estimated_additional <= self.target_memory
299 }
300
301 pub fn max_vectors(&self, dimension: usize, m: usize) -> usize {
303 if self.memory_per_vector == 0 {
304 let bytes_per_vector = dimension * 4 + m * 2 * 8 + 100;
306 self.target_memory / bytes_per_vector
307 } else {
308 self.target_memory / self.memory_per_vector
309 }
310 }
311
312 pub fn recommend_config(&self, dimension: usize) -> (usize, usize, usize) {
314 for m in [8, 12, 16, 24, 32, 48, 64].iter().rev() {
316 let bytes_per_vector = dimension * 4 + m * 2 * 8 + 100;
317 let max_vectors = self.target_memory / bytes_per_vector;
318
319 if max_vectors >= 1000 {
320 let ef_construction = if max_vectors < 10_000 {
322 200
323 } else if max_vectors < 100_000 {
324 300
325 } else {
326 400
327 };
328 return (*m, ef_construction, max_vectors);
329 }
330 }
331
332 (
334 8,
335 100,
336 self.target_memory / (dimension * 4 + 8 * 2 * 8 + 100),
337 )
338 }
339}
340
341#[cfg(test)]
342mod tests {
343 use super::*;
344
345 #[test]
346 fn test_analyze_optimization_latency() {
347 let result = analyze_optimization(10_000, 768, 16, 200, OptimizationGoal::MinimizeLatency);
348
349 assert!(result.recommended_m <= 16);
350 assert!(result.recommended_ef_search <= 50);
351 assert!(!result.reasoning.is_empty());
352 }
353
354 #[test]
355 fn test_analyze_optimization_recall() {
356 let result = analyze_optimization(10_000, 768, 16, 200, OptimizationGoal::MaximizeRecall);
357
358 assert!(result.recommended_m >= 16);
359 assert!(result.recommended_ef_construction >= 200);
360 assert!(!result.reasoning.is_empty());
361 }
362
363 #[test]
364 fn test_query_optimizer() {
365 let mut optimizer = QueryOptimizer::new(50, Duration::from_millis(10));
366
367 for _ in 0..15 {
369 optimizer.record_query(Duration::from_millis(2));
370 }
371
372 assert!(optimizer.get_ef_search() > 50);
374 }
375
376 #[test]
377 fn test_query_optimizer_slow_queries() {
378 let mut optimizer = QueryOptimizer::new(50, Duration::from_millis(10));
379
380 for _ in 0..15 {
382 optimizer.record_query(Duration::from_millis(20));
383 }
384
385 assert!(optimizer.get_ef_search() < 50);
387 }
388
389 #[test]
390 fn test_memory_optimizer() {
391 let mut optimizer = MemoryOptimizer::new(1024 * 1024 * 1024); let memory = optimizer.estimate_memory(10_000, 768, 16);
394 assert!(memory > 0);
395 assert!(memory <= 1024 * 1024 * 1024);
396 }
397
398 #[test]
399 fn test_memory_optimizer_recommend() {
400 let optimizer = MemoryOptimizer::new(1024 * 1024 * 1024); let (m, ef_c, max_vecs) = optimizer.recommend_config(768);
403
404 assert!(m >= 8);
405 assert!(ef_c >= 100);
406 assert!(max_vecs > 0);
407 }
408}