1use anyhow::Result;
42use serde::{Deserialize, Serialize};
43use std::time::{Duration, Instant};
44
45use crate::types::SearchResult;
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct ProfilingConfig {
50 pub detailed_timing: bool,
52 pub memory_profiling: bool,
54 pub slow_query_threshold_ms: u64,
56 pub enable_recommendations: bool,
58}
59
60impl Default for ProfilingConfig {
61 fn default() -> Self {
62 Self {
63 detailed_timing: true,
64 memory_profiling: false,
65 slow_query_threshold_ms: 100,
66 enable_recommendations: true,
67 }
68 }
69}
70
71#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
73pub enum Bottleneck {
74 HighDimensionality,
76 DatasetSize,
78 FilterSelectivity,
80 HighK,
82 None,
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct Recommendation {
89 pub category: String,
91 pub description: String,
93 pub impact: ImpactLevel,
95}
96
97#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
99pub enum ImpactLevel {
100 High,
102 Medium,
104 Low,
106}
107
108#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct QueryProfile {
111 pub total_duration: Duration,
113 pub result_count: usize,
115 pub bottleneck: Bottleneck,
117 pub recommendations: Vec<Recommendation>,
119 pub is_slow_query: bool,
121}
122
123#[derive(Debug, Clone)]
125pub struct QueryProfiler {
126 config: ProfilingConfig,
127}
128
129impl QueryProfiler {
130 pub fn new(config: ProfilingConfig) -> Self {
132 Self { config }
133 }
134
135 pub fn profile_search<F>(&mut self, f: F) -> Result<QueryProfile>
140 where
141 F: FnOnce() -> Result<Vec<SearchResult>>,
142 {
143 let start = Instant::now();
144 let results = f()?;
145 let duration = start.elapsed();
146
147 let result_count = results.len();
148 let is_slow_query = duration.as_millis() > self.config.slow_query_threshold_ms as u128;
149
150 let bottleneck = self.detect_bottleneck(&results, duration);
151 let recommendations = if self.config.enable_recommendations {
152 self.generate_recommendations(&bottleneck, duration, result_count)
153 } else {
154 Vec::new()
155 };
156
157 Ok(QueryProfile {
158 total_duration: duration,
159 result_count,
160 bottleneck,
161 recommendations,
162 is_slow_query,
163 })
164 }
165
166 fn detect_bottleneck(&self, _results: &[SearchResult], duration: Duration) -> Bottleneck {
168 if duration.as_millis() > 1000 {
170 Bottleneck::DatasetSize
172 } else {
173 Bottleneck::None
174 }
175 }
176
177 fn generate_recommendations(
179 &self,
180 bottleneck: &Bottleneck,
181 duration: Duration,
182 result_count: usize,
183 ) -> Vec<Recommendation> {
184 let mut recommendations = Vec::new();
185
186 match bottleneck {
187 Bottleneck::DatasetSize => {
188 recommendations.push(Recommendation {
189 category: "Index Strategy".to_string(),
190 description:
191 "Consider using HNSW or IVF-PQ for approximate search on large datasets"
192 .to_string(),
193 impact: ImpactLevel::High,
194 });
195 }
196 Bottleneck::HighDimensionality => {
197 recommendations.push(Recommendation {
198 category: "Dimensionality".to_string(),
199 description: "Consider using dimensionality reduction (PCA) or quantization"
200 .to_string(),
201 impact: ImpactLevel::Medium,
202 });
203 }
204 Bottleneck::FilterSelectivity => {
205 recommendations.push(Recommendation {
206 category: "Filtering".to_string(),
207 description: "Use pre-filtering for highly selective filters".to_string(),
208 impact: ImpactLevel::Medium,
209 });
210 }
211 Bottleneck::HighK => {
212 recommendations.push(Recommendation {
213 category: "Query Parameters".to_string(),
214 description: "Reduce k value if you don't need all top results".to_string(),
215 impact: ImpactLevel::Low,
216 });
217 }
218 Bottleneck::None => {}
219 }
220
221 if duration.as_millis() > self.config.slow_query_threshold_ms as u128 && result_count > 100
223 {
224 recommendations.push(Recommendation {
225 category: "Result Count".to_string(),
226 description: "Consider reducing k to improve query speed".to_string(),
227 impact: ImpactLevel::Low,
228 });
229 }
230
231 recommendations
232 }
233
234 pub fn config(&self) -> &ProfilingConfig {
236 &self.config
237 }
238}
239
240#[derive(Debug)]
242pub struct IndexHealthChecker;
243
244impl IndexHealthChecker {
245 pub fn new() -> Self {
247 Self
248 }
249
250 pub fn check_health(
257 &self,
258 num_vectors: usize,
259 dimensions: usize,
260 avg_query_time_ms: f64,
261 ) -> Vec<Recommendation> {
262 let mut recommendations = Vec::new();
263
264 if dimensions > 1024 {
266 recommendations.push(Recommendation {
267 category: "Dimensionality".to_string(),
268 description: format!(
269 "Vector dimensionality ({}) is very high. Consider dimensionality reduction.",
270 dimensions
271 ),
272 impact: ImpactLevel::Medium,
273 });
274 }
275
276 if num_vectors > 100_000 && avg_query_time_ms > 50.0 {
278 recommendations.push(Recommendation {
279 category: "Index Strategy".to_string(),
280 description: "Large dataset with slow queries. Consider using HNSW or IVF-PQ."
281 .to_string(),
282 impact: ImpactLevel::High,
283 });
284 }
285
286 if num_vectors > 10_000_000 {
288 recommendations.push(Recommendation {
289 category: "Scalability".to_string(),
290 description: "Very large dataset. Consider distributed search with sharding."
291 .to_string(),
292 impact: ImpactLevel::High,
293 });
294 }
295
296 if avg_query_time_ms > 100.0 {
298 recommendations.push(Recommendation {
299 category: "Performance".to_string(),
300 description:
301 "Queries are slow. Consider enabling SIMD optimizations or using quantization."
302 .to_string(),
303 impact: ImpactLevel::High,
304 });
305 }
306
307 recommendations
308 }
309}
310
311impl Default for IndexHealthChecker {
312 fn default() -> Self {
313 Self::new()
314 }
315}
316
317#[cfg(test)]
318mod tests {
319 use super::*;
320
321 #[test]
322 fn test_profiling_config_default() {
323 let config = ProfilingConfig::default();
324 assert!(config.detailed_timing);
325 assert!(config.enable_recommendations);
326 assert_eq!(config.slow_query_threshold_ms, 100);
327 }
328
329 #[test]
330 fn test_query_profiler_creation() {
331 let config = ProfilingConfig::default();
332 let profiler = QueryProfiler::new(config);
333 assert!(profiler.config().enable_recommendations);
334 }
335
336 #[test]
337 fn test_profile_fast_query() {
338 let config = ProfilingConfig::default();
339 let mut profiler = QueryProfiler::new(config);
340
341 let profile = profiler
342 .profile_search(|| -> Result<Vec<SearchResult>> {
343 std::thread::sleep(Duration::from_millis(10));
345 Ok(vec![SearchResult {
346 entity_id: "doc1".to_string(),
347 score: 0.95,
348 distance: 0.05,
349 rank: 1,
350 }])
351 })
352 .unwrap();
353
354 assert_eq!(profile.result_count, 1);
355 assert!(!profile.is_slow_query);
356 }
357
358 #[test]
359 fn test_profile_slow_query() {
360 let config = ProfilingConfig {
361 slow_query_threshold_ms: 50,
362 ..Default::default()
363 };
364 let mut profiler = QueryProfiler::new(config);
365
366 let profile = profiler
367 .profile_search(|| -> Result<Vec<SearchResult>> {
368 std::thread::sleep(Duration::from_millis(150));
370 Ok(vec![])
371 })
372 .unwrap();
373
374 assert!(profile.is_slow_query);
375 assert!(profile.total_duration.as_millis() >= 150);
376 }
377
378 #[test]
379 fn test_bottleneck_detection_slow_query() {
380 let config = ProfilingConfig::default();
381 let profiler = QueryProfiler::new(config);
382
383 let results = vec![];
384 let duration = Duration::from_millis(2000);
385
386 let bottleneck = profiler.detect_bottleneck(&results, duration);
387 assert_eq!(bottleneck, Bottleneck::DatasetSize);
388 }
389
390 #[test]
391 fn test_bottleneck_detection_fast_query() {
392 let config = ProfilingConfig::default();
393 let profiler = QueryProfiler::new(config);
394
395 let results = vec![];
396 let duration = Duration::from_millis(10);
397
398 let bottleneck = profiler.detect_bottleneck(&results, duration);
399 assert_eq!(bottleneck, Bottleneck::None);
400 }
401
402 #[test]
403 fn test_generate_recommendations_dataset_size() {
404 let config = ProfilingConfig::default();
405 let profiler = QueryProfiler::new(config);
406
407 let recommendations = profiler.generate_recommendations(
408 &Bottleneck::DatasetSize,
409 Duration::from_millis(100),
410 10,
411 );
412
413 assert!(!recommendations.is_empty());
414 assert_eq!(recommendations[0].category, "Index Strategy");
415 assert_eq!(recommendations[0].impact, ImpactLevel::High);
416 }
417
418 #[test]
419 fn test_generate_recommendations_high_k() {
420 let config = ProfilingConfig::default();
421 let profiler = QueryProfiler::new(config);
422
423 let recommendations =
424 profiler.generate_recommendations(&Bottleneck::None, Duration::from_millis(150), 200);
425
426 assert!(!recommendations.is_empty());
427 }
429
430 #[test]
431 fn test_index_health_checker_creation() {
432 let checker = IndexHealthChecker::new();
433 let recommendations = checker.check_health(1000, 768, 10.0);
434 assert!(recommendations.is_empty()); }
436
437 #[test]
438 fn test_index_health_high_dimensionality() {
439 let checker = IndexHealthChecker::new();
440 let recommendations = checker.check_health(10_000, 2048, 10.0);
441
442 assert!(!recommendations.is_empty());
443 assert!(recommendations
444 .iter()
445 .any(|r| r.category == "Dimensionality"));
446 }
447
448 #[test]
449 fn test_index_health_large_dataset_slow() {
450 let checker = IndexHealthChecker::new();
451 let recommendations = checker.check_health(200_000, 768, 100.0);
452
453 assert!(!recommendations.is_empty());
454 assert!(recommendations
455 .iter()
456 .any(|r| r.category == "Index Strategy" || r.category == "Performance"));
457 }
458
459 #[test]
460 fn test_index_health_very_large_dataset() {
461 let checker = IndexHealthChecker::new();
462 let recommendations = checker.check_health(15_000_000, 768, 50.0);
463
464 assert!(!recommendations.is_empty());
465 assert!(recommendations.iter().any(|r| r.category == "Scalability"));
466 }
467
468 #[test]
469 fn test_recommendation_impact_levels() {
470 let high_impact = Recommendation {
471 category: "Test".to_string(),
472 description: "Test".to_string(),
473 impact: ImpactLevel::High,
474 };
475
476 let medium_impact = Recommendation {
477 category: "Test".to_string(),
478 description: "Test".to_string(),
479 impact: ImpactLevel::Medium,
480 };
481
482 let low_impact = Recommendation {
483 category: "Test".to_string(),
484 description: "Test".to_string(),
485 impact: ImpactLevel::Low,
486 };
487
488 assert_eq!(high_impact.impact, ImpactLevel::High);
489 assert_eq!(medium_impact.impact, ImpactLevel::Medium);
490 assert_eq!(low_impact.impact, ImpactLevel::Low);
491 }
492}