use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::time::{Duration, Instant};
use crate::types::SearchResult;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProfilingConfig {
pub detailed_timing: bool,
pub memory_profiling: bool,
pub slow_query_threshold_ms: u64,
pub enable_recommendations: bool,
}
impl Default for ProfilingConfig {
fn default() -> Self {
Self {
detailed_timing: true,
memory_profiling: false,
slow_query_threshold_ms: 100,
enable_recommendations: true,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum Bottleneck {
HighDimensionality,
DatasetSize,
FilterSelectivity,
HighK,
None,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Recommendation {
pub category: String,
pub description: String,
pub impact: ImpactLevel,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum ImpactLevel {
High,
Medium,
Low,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryProfile {
pub total_duration: Duration,
pub result_count: usize,
pub bottleneck: Bottleneck,
pub recommendations: Vec<Recommendation>,
pub is_slow_query: bool,
}
#[derive(Debug, Clone)]
pub struct QueryProfiler {
config: ProfilingConfig,
}
impl QueryProfiler {
pub fn new(config: ProfilingConfig) -> Self {
Self { config }
}
pub fn profile_search<F>(&mut self, f: F) -> Result<QueryProfile>
where
F: FnOnce() -> Result<Vec<SearchResult>>,
{
let start = Instant::now();
let results = f()?;
let duration = start.elapsed();
let result_count = results.len();
let is_slow_query = duration.as_millis() > self.config.slow_query_threshold_ms as u128;
let bottleneck = self.detect_bottleneck(&results, duration);
let recommendations = if self.config.enable_recommendations {
self.generate_recommendations(&bottleneck, duration, result_count)
} else {
Vec::new()
};
Ok(QueryProfile {
total_duration: duration,
result_count,
bottleneck,
recommendations,
is_slow_query,
})
}
fn detect_bottleneck(&self, _results: &[SearchResult], duration: Duration) -> Bottleneck {
if duration.as_millis() > 1000 {
Bottleneck::DatasetSize
} else {
Bottleneck::None
}
}
fn generate_recommendations(
&self,
bottleneck: &Bottleneck,
duration: Duration,
result_count: usize,
) -> Vec<Recommendation> {
let mut recommendations = Vec::new();
match bottleneck {
Bottleneck::DatasetSize => {
recommendations.push(Recommendation {
category: "Index Strategy".to_string(),
description:
"Consider using HNSW or IVF-PQ for approximate search on large datasets"
.to_string(),
impact: ImpactLevel::High,
});
}
Bottleneck::HighDimensionality => {
recommendations.push(Recommendation {
category: "Dimensionality".to_string(),
description: "Consider using dimensionality reduction (PCA) or quantization"
.to_string(),
impact: ImpactLevel::Medium,
});
}
Bottleneck::FilterSelectivity => {
recommendations.push(Recommendation {
category: "Filtering".to_string(),
description: "Use pre-filtering for highly selective filters".to_string(),
impact: ImpactLevel::Medium,
});
}
Bottleneck::HighK => {
recommendations.push(Recommendation {
category: "Query Parameters".to_string(),
description: "Reduce k value if you don't need all top results".to_string(),
impact: ImpactLevel::Low,
});
}
Bottleneck::None => {}
}
if duration.as_millis() > self.config.slow_query_threshold_ms as u128 && result_count > 100
{
recommendations.push(Recommendation {
category: "Result Count".to_string(),
description: "Consider reducing k to improve query speed".to_string(),
impact: ImpactLevel::Low,
});
}
recommendations
}
pub fn config(&self) -> &ProfilingConfig {
&self.config
}
}
#[derive(Debug)]
pub struct IndexHealthChecker;
impl IndexHealthChecker {
pub fn new() -> Self {
Self
}
pub fn check_health(
&self,
num_vectors: usize,
dimensions: usize,
avg_query_time_ms: f64,
) -> Vec<Recommendation> {
let mut recommendations = Vec::new();
if dimensions > 1024 {
recommendations.push(Recommendation {
category: "Dimensionality".to_string(),
description: format!(
"Vector dimensionality ({}) is very high. Consider dimensionality reduction.",
dimensions
),
impact: ImpactLevel::Medium,
});
}
if num_vectors > 100_000 && avg_query_time_ms > 50.0 {
recommendations.push(Recommendation {
category: "Index Strategy".to_string(),
description: "Large dataset with slow queries. Consider using HNSW or IVF-PQ."
.to_string(),
impact: ImpactLevel::High,
});
}
if num_vectors > 10_000_000 {
recommendations.push(Recommendation {
category: "Scalability".to_string(),
description: "Very large dataset. Consider distributed search with sharding."
.to_string(),
impact: ImpactLevel::High,
});
}
if avg_query_time_ms > 100.0 {
recommendations.push(Recommendation {
category: "Performance".to_string(),
description:
"Queries are slow. Consider enabling SIMD optimizations or using quantization."
.to_string(),
impact: ImpactLevel::High,
});
}
recommendations
}
}
impl Default for IndexHealthChecker {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_profiling_config_default() {
let config = ProfilingConfig::default();
assert!(config.detailed_timing);
assert!(config.enable_recommendations);
assert_eq!(config.slow_query_threshold_ms, 100);
}
#[test]
fn test_query_profiler_creation() {
let config = ProfilingConfig::default();
let profiler = QueryProfiler::new(config);
assert!(profiler.config().enable_recommendations);
}
#[test]
fn test_profile_fast_query() {
let config = ProfilingConfig::default();
let mut profiler = QueryProfiler::new(config);
let profile = profiler
.profile_search(|| -> Result<Vec<SearchResult>> {
std::thread::sleep(Duration::from_millis(10));
Ok(vec![SearchResult {
entity_id: "doc1".to_string(),
score: 0.95,
distance: 0.05,
rank: 1,
}])
})
.unwrap();
assert_eq!(profile.result_count, 1);
assert!(!profile.is_slow_query);
}
#[test]
fn test_profile_slow_query() {
let config = ProfilingConfig {
slow_query_threshold_ms: 50,
..Default::default()
};
let mut profiler = QueryProfiler::new(config);
let profile = profiler
.profile_search(|| -> Result<Vec<SearchResult>> {
std::thread::sleep(Duration::from_millis(150));
Ok(vec![])
})
.unwrap();
assert!(profile.is_slow_query);
assert!(profile.total_duration.as_millis() >= 150);
}
#[test]
fn test_bottleneck_detection_slow_query() {
let config = ProfilingConfig::default();
let profiler = QueryProfiler::new(config);
let results = vec![];
let duration = Duration::from_millis(2000);
let bottleneck = profiler.detect_bottleneck(&results, duration);
assert_eq!(bottleneck, Bottleneck::DatasetSize);
}
#[test]
fn test_bottleneck_detection_fast_query() {
let config = ProfilingConfig::default();
let profiler = QueryProfiler::new(config);
let results = vec![];
let duration = Duration::from_millis(10);
let bottleneck = profiler.detect_bottleneck(&results, duration);
assert_eq!(bottleneck, Bottleneck::None);
}
#[test]
fn test_generate_recommendations_dataset_size() {
let config = ProfilingConfig::default();
let profiler = QueryProfiler::new(config);
let recommendations = profiler.generate_recommendations(
&Bottleneck::DatasetSize,
Duration::from_millis(100),
10,
);
assert!(!recommendations.is_empty());
assert_eq!(recommendations[0].category, "Index Strategy");
assert_eq!(recommendations[0].impact, ImpactLevel::High);
}
#[test]
fn test_generate_recommendations_high_k() {
let config = ProfilingConfig::default();
let profiler = QueryProfiler::new(config);
let recommendations =
profiler.generate_recommendations(&Bottleneck::None, Duration::from_millis(150), 200);
assert!(!recommendations.is_empty());
}
#[test]
fn test_index_health_checker_creation() {
let checker = IndexHealthChecker::new();
let recommendations = checker.check_health(1000, 768, 10.0);
assert!(recommendations.is_empty()); }
#[test]
fn test_index_health_high_dimensionality() {
let checker = IndexHealthChecker::new();
let recommendations = checker.check_health(10_000, 2048, 10.0);
assert!(!recommendations.is_empty());
assert!(recommendations
.iter()
.any(|r| r.category == "Dimensionality"));
}
#[test]
fn test_index_health_large_dataset_slow() {
let checker = IndexHealthChecker::new();
let recommendations = checker.check_health(200_000, 768, 100.0);
assert!(!recommendations.is_empty());
assert!(recommendations
.iter()
.any(|r| r.category == "Index Strategy" || r.category == "Performance"));
}
#[test]
fn test_index_health_very_large_dataset() {
let checker = IndexHealthChecker::new();
let recommendations = checker.check_health(15_000_000, 768, 50.0);
assert!(!recommendations.is_empty());
assert!(recommendations.iter().any(|r| r.category == "Scalability"));
}
#[test]
fn test_recommendation_impact_levels() {
let high_impact = Recommendation {
category: "Test".to_string(),
description: "Test".to_string(),
impact: ImpactLevel::High,
};
let medium_impact = Recommendation {
category: "Test".to_string(),
description: "Test".to_string(),
impact: ImpactLevel::Medium,
};
let low_impact = Recommendation {
category: "Test".to_string(),
description: "Test".to_string(),
impact: ImpactLevel::Low,
};
assert_eq!(high_impact.impact, ImpactLevel::High);
assert_eq!(medium_impact.impact, ImpactLevel::Medium);
assert_eq!(low_impact.impact, ImpactLevel::Low);
}
}