use crate::benchmarking::{BenchmarkConfig, BenchmarkDataset, BenchmarkOutputFormat};
use crate::VectorIndex;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::time::Duration;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AdvancedBenchmarkConfig {
pub base_config: BenchmarkConfig,
pub confidence_level: f64,
pub min_runs: usize,
pub max_cv: f64,
pub memory_profiling: bool,
pub latency_distribution: bool,
pub throughput_testing: bool,
pub quality_degradation: bool,
pub hyperparameter_optimization: bool,
pub comparative_analysis: bool,
pub ann_benchmarks_mode: bool,
pub export_traces: bool,
pub parallel_config: ParallelBenchmarkConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ParallelBenchmarkConfig {
pub num_threads: usize,
pub numa_aware: bool,
pub thread_affinity: bool,
pub memory_bandwidth_test: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EnhancedBenchmarkDataset {
pub base_dataset: BenchmarkDataset,
pub statistics: DatasetStatistics,
pub quality_metrics: DatasetQualityMetrics,
pub intrinsic_dimensionality: f32,
pub clustering_coefficient: f32,
pub hubness_score: f32,
pub local_id: Vec<f32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DatasetStatistics {
pub vector_count: usize,
pub dimensions: usize,
pub mean_magnitude: f32,
pub std_magnitude: f32,
pub distance_stats: DistanceStatistics,
pub nn_distribution: Vec<f32>,
pub sparsity_ratio: Option<f32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DistanceStatistics {
pub mean_distance: f32,
pub std_distance: f32,
pub min_distance: f32,
pub max_distance: f32,
pub percentiles: Vec<(f32, f32)>, }
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DatasetQualityMetrics {
pub effective_dimensionality: f32,
pub concentration_measure: f32,
pub outlier_ratio: f32,
pub cluster_quality: f32,
pub manifold_quality: f32,
}
pub struct BenchmarkAlgorithm {
pub name: String,
pub description: String,
pub index: Box<dyn VectorIndex>,
pub parameters: AlgorithmParameters,
pub build_time: Option<Duration>,
pub memory_usage: Option<usize>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlgorithmParameters {
pub params: HashMap<String, ParameterValue>,
pub search_params: HashMap<String, ParameterValue>,
pub build_params: HashMap<String, ParameterValue>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ParameterValue {
Integer(i64),
Float(f64),
String(String),
Boolean(bool),
IntegerRange(i64, i64, i64), FloatRange(f64, f64, f64), }
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AdvancedBenchmarkResult {
pub algorithm_name: String,
pub dataset_name: String,
pub timestamp: std::time::SystemTime,
pub performance: PerformanceMetrics,
pub quality: QualityMetrics,
pub scalability: ScalabilityMetrics,
pub memory: MemoryMetrics,
pub statistics: StatisticalMetrics,
pub traces: Option<BenchmarkTraces>,
pub errors: Vec<String>,
}
impl Default for AdvancedBenchmarkResult {
fn default() -> Self {
Self {
algorithm_name: String::new(),
dataset_name: String::new(),
timestamp: std::time::SystemTime::now(),
performance: PerformanceMetrics {
latency: LatencyMetrics {
mean_ms: 0.0,
std_ms: 0.0,
percentiles: std::collections::HashMap::new(),
distribution: Vec::new(),
max_ms: 0.0,
min_ms: 0.0,
},
throughput: ThroughputMetrics {
qps: 0.0,
batch_qps: std::collections::HashMap::new(),
concurrent_qps: std::collections::HashMap::new(),
saturation_qps: 0.0,
},
build_time: BuildTimeMetrics {
total_seconds: 0.0,
per_vector_ms: 0.0,
allocation_seconds: 0.0,
construction_seconds: 0.0,
optimization_seconds: 0.0,
},
index_size: IndexSizeMetrics {
total_bytes: 0,
per_vector_bytes: 0.0,
overhead_ratio: 0.0,
compression_ratio: 0.0,
serialized_bytes: 0,
},
},
quality: QualityMetrics {
recall_at_k: std::collections::HashMap::new(),
precision_at_k: std::collections::HashMap::new(),
mean_average_precision: 0.0,
ndcg_at_k: std::collections::HashMap::new(),
f1_at_k: std::collections::HashMap::new(),
mean_reciprocal_rank: 0.0,
quality_degradation: QualityDegradation {
recall_latency_tradeoff: Vec::new(),
quality_size_tradeoff: Vec::new(),
quality_buildtime_tradeoff: Vec::new(),
},
},
scalability: ScalabilityMetrics {
latency_scaling: Vec::new(),
memory_scaling: Vec::new(),
buildtime_scaling: Vec::new(),
throughput_scaling: Vec::new(),
scaling_efficiency: 0.0,
},
memory: MemoryMetrics {
peak_memory_mb: 0.0,
average_memory_mb: 0.0,
allocation_patterns: Vec::new(),
fragmentation_ratio: 0.0,
cache_metrics: CacheMetrics {
l1_hit_ratio: 0.0,
l2_hit_ratio: 0.0,
l3_hit_ratio: 0.0,
memory_bandwidth_util: 0.0,
},
},
statistics: StatisticalMetrics {
sample_size: 0,
confidence_intervals: std::collections::HashMap::new(),
significance_tests: std::collections::HashMap::new(),
effect_sizes: std::collections::HashMap::new(),
power_analysis: PowerAnalysis {
power: 0.0,
effect_size: 0.0,
required_sample_size: 0,
},
},
traces: None,
errors: Vec::new(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceMetrics {
pub latency: LatencyMetrics,
pub throughput: ThroughputMetrics,
pub build_time: BuildTimeMetrics,
pub index_size: IndexSizeMetrics,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LatencyMetrics {
pub mean_ms: f64,
pub std_ms: f64,
pub percentiles: HashMap<String, f64>, pub distribution: Vec<f64>,
pub max_ms: f64,
pub min_ms: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ThroughputMetrics {
pub qps: f64,
pub batch_qps: HashMap<usize, f64>, pub concurrent_qps: HashMap<usize, f64>, pub saturation_qps: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BuildTimeMetrics {
pub total_seconds: f64,
pub per_vector_ms: f64,
pub allocation_seconds: f64,
pub construction_seconds: f64,
pub optimization_seconds: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndexSizeMetrics {
pub total_bytes: usize,
pub per_vector_bytes: f64,
pub overhead_ratio: f64,
pub compression_ratio: f64,
pub serialized_bytes: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QualityMetrics {
pub recall_at_k: HashMap<usize, f64>,
pub precision_at_k: HashMap<usize, f64>,
pub mean_average_precision: f64,
pub ndcg_at_k: HashMap<usize, f64>,
pub f1_at_k: HashMap<usize, f64>,
pub mean_reciprocal_rank: f64,
pub quality_degradation: QualityDegradation,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QualityDegradation {
pub recall_latency_tradeoff: Vec<(f64, f64)>, pub quality_size_tradeoff: Vec<(f64, usize)>, pub quality_buildtime_tradeoff: Vec<(f64, f64)>, }
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScalabilityMetrics {
pub latency_scaling: Vec<(usize, f64)>, pub memory_scaling: Vec<(usize, usize)>, pub buildtime_scaling: Vec<(usize, f64)>, pub throughput_scaling: Vec<(usize, f64)>, pub scaling_efficiency: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryMetrics {
pub peak_memory_mb: f64,
pub average_memory_mb: f64,
pub allocation_patterns: Vec<MemoryAllocation>,
pub fragmentation_ratio: f64,
pub cache_metrics: CacheMetrics,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryAllocation {
pub timestamp_ms: u64,
pub allocated_bytes: usize,
pub allocation_type: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CacheMetrics {
pub l1_hit_ratio: f64,
pub l2_hit_ratio: f64,
pub l3_hit_ratio: f64,
pub memory_bandwidth_util: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StatisticalMetrics {
pub sample_size: usize,
pub confidence_intervals: HashMap<String, (f64, f64)>, pub significance_tests: HashMap<String, StatisticalTest>,
pub effect_sizes: HashMap<String, f64>,
pub power_analysis: PowerAnalysis,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StatisticalTest {
pub test_type: String,
pub p_value: f64,
pub test_statistic: f64,
pub is_significant: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PowerAnalysis {
pub power: f64,
pub effect_size: f64,
pub required_sample_size: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BenchmarkTraces {
pub query_traces: Vec<QueryTrace>,
pub system_traces: Vec<SystemTrace>,
pub memory_traces: Vec<MemoryTrace>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryTrace {
pub query_id: usize,
pub start_time: u64,
pub end_time: u64,
pub results_count: usize,
pub distance_computations: usize,
pub cache_hits: usize,
pub memory_allocations: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SystemTrace {
pub timestamp: u64,
pub cpu_usage: f64,
pub memory_usage: usize,
pub io_operations: usize,
pub context_switches: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryTrace {
pub timestamp: u64,
pub heap_usage: usize,
pub stack_usage: usize,
pub page_faults: usize,
pub memory_bandwidth: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum OptimizationStrategy {
GridSearch,
RandomSearch,
BayesianOptimization,
EvolutionaryOptimization,
MultiObjective,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ParameterSpace {
pub parameter_type: ParameterType,
pub constraints: Vec<ParameterConstraint>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ParameterType {
Categorical(Vec<String>),
Continuous { min: f64, max: f64 },
Integer { min: i64, max: i64 },
Boolean,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ParameterConstraint {
GreaterThan(f64),
LessThan(f64),
Conditional {
if_param: String,
if_value: String,
then_constraint: Box<ParameterConstraint>,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ObjectiveFunction {
Recall { k: usize, weight: f64 },
Latency { percentile: f64, weight: f64 },
Throughput { weight: f64 },
MemoryUsage { weight: f64 },
Composite { objectives: Vec<ObjectiveFunction> },
Pareto { objectives: Vec<ObjectiveFunction> },
}
impl Default for AdvancedBenchmarkConfig {
fn default() -> Self {
Self::new()
}
}
impl AdvancedBenchmarkConfig {
pub fn new() -> Self {
Self {
base_config: BenchmarkConfig::default(),
confidence_level: 0.95,
min_runs: 10,
max_cv: 0.05, memory_profiling: true,
latency_distribution: true,
throughput_testing: true,
quality_degradation: true,
hyperparameter_optimization: false,
comparative_analysis: true,
ann_benchmarks_mode: false,
export_traces: false,
parallel_config: ParallelBenchmarkConfig {
num_threads: num_cpus::get(),
numa_aware: false,
thread_affinity: false,
memory_bandwidth_test: false,
},
}
}
pub fn ann_benchmarks_compatible() -> Self {
let mut config = Self::new();
config.ann_benchmarks_mode = true;
config.base_config.output_format = BenchmarkOutputFormat::AnnBenchmarks;
config.base_config.quality_metrics = true;
config.comparative_analysis = false;
config
}
}