1use crate::benchmarking::{BenchmarkConfig, BenchmarkDataset, BenchmarkOutputFormat};
4use crate::VectorIndex;
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use std::time::Duration;
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct AdvancedBenchmarkConfig {
12 pub base_config: BenchmarkConfig,
14 pub confidence_level: f64,
16 pub min_runs: usize,
18 pub max_cv: f64,
20 pub memory_profiling: bool,
22 pub latency_distribution: bool,
24 pub throughput_testing: bool,
26 pub quality_degradation: bool,
28 pub hyperparameter_optimization: bool,
30 pub comparative_analysis: bool,
32 pub ann_benchmarks_mode: bool,
34 pub export_traces: bool,
36 pub parallel_config: ParallelBenchmarkConfig,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct ParallelBenchmarkConfig {
43 pub num_threads: usize,
45 pub numa_aware: bool,
47 pub thread_affinity: bool,
49 pub memory_bandwidth_test: bool,
51}
52
53#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct EnhancedBenchmarkDataset {
56 pub base_dataset: BenchmarkDataset,
58 pub statistics: DatasetStatistics,
60 pub quality_metrics: DatasetQualityMetrics,
62 pub intrinsic_dimensionality: f32,
64 pub clustering_coefficient: f32,
66 pub hubness_score: f32,
68 pub local_id: Vec<f32>,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct DatasetStatistics {
75 pub vector_count: usize,
77 pub dimensions: usize,
79 pub mean_magnitude: f32,
81 pub std_magnitude: f32,
83 pub distance_stats: DistanceStatistics,
85 pub nn_distribution: Vec<f32>,
87 pub sparsity_ratio: Option<f32>,
89}
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
93pub struct DistanceStatistics {
94 pub mean_distance: f32,
96 pub std_distance: f32,
98 pub min_distance: f32,
100 pub max_distance: f32,
102 pub percentiles: Vec<(f32, f32)>, }
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct DatasetQualityMetrics {
109 pub effective_dimensionality: f32,
111 pub concentration_measure: f32,
113 pub outlier_ratio: f32,
115 pub cluster_quality: f32,
117 pub manifold_quality: f32,
119}
120
121pub struct BenchmarkAlgorithm {
123 pub name: String,
124 pub description: String,
125 pub index: Box<dyn VectorIndex>,
126 pub parameters: AlgorithmParameters,
127 pub build_time: Option<Duration>,
128 pub memory_usage: Option<usize>,
129}
130
131#[derive(Debug, Clone, Serialize, Deserialize)]
133pub struct AlgorithmParameters {
134 pub params: HashMap<String, ParameterValue>,
136 pub search_params: HashMap<String, ParameterValue>,
138 pub build_params: HashMap<String, ParameterValue>,
140}
141
142#[derive(Debug, Clone, Serialize, Deserialize)]
144pub enum ParameterValue {
145 Integer(i64),
146 Float(f64),
147 String(String),
148 Boolean(bool),
149 IntegerRange(i64, i64, i64), FloatRange(f64, f64, f64), }
152
153#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct AdvancedBenchmarkResult {
156 pub algorithm_name: String,
158 pub dataset_name: String,
159 pub timestamp: std::time::SystemTime,
160
161 pub performance: PerformanceMetrics,
163 pub quality: QualityMetrics,
165 pub scalability: ScalabilityMetrics,
167 pub memory: MemoryMetrics,
169 pub statistics: StatisticalMetrics,
171
172 pub traces: Option<BenchmarkTraces>,
174 pub errors: Vec<String>,
176}
177
178impl Default for AdvancedBenchmarkResult {
179 fn default() -> Self {
180 Self {
181 algorithm_name: String::new(),
182 dataset_name: String::new(),
183 timestamp: std::time::SystemTime::now(),
184 performance: PerformanceMetrics {
185 latency: LatencyMetrics {
186 mean_ms: 0.0,
187 std_ms: 0.0,
188 percentiles: std::collections::HashMap::new(),
189 distribution: Vec::new(),
190 max_ms: 0.0,
191 min_ms: 0.0,
192 },
193 throughput: ThroughputMetrics {
194 qps: 0.0,
195 batch_qps: std::collections::HashMap::new(),
196 concurrent_qps: std::collections::HashMap::new(),
197 saturation_qps: 0.0,
198 },
199 build_time: BuildTimeMetrics {
200 total_seconds: 0.0,
201 per_vector_ms: 0.0,
202 allocation_seconds: 0.0,
203 construction_seconds: 0.0,
204 optimization_seconds: 0.0,
205 },
206 index_size: IndexSizeMetrics {
207 total_bytes: 0,
208 per_vector_bytes: 0.0,
209 overhead_ratio: 0.0,
210 compression_ratio: 0.0,
211 serialized_bytes: 0,
212 },
213 },
214 quality: QualityMetrics {
215 recall_at_k: std::collections::HashMap::new(),
216 precision_at_k: std::collections::HashMap::new(),
217 mean_average_precision: 0.0,
218 ndcg_at_k: std::collections::HashMap::new(),
219 f1_at_k: std::collections::HashMap::new(),
220 mean_reciprocal_rank: 0.0,
221 quality_degradation: QualityDegradation {
222 recall_latency_tradeoff: Vec::new(),
223 quality_size_tradeoff: Vec::new(),
224 quality_buildtime_tradeoff: Vec::new(),
225 },
226 },
227 scalability: ScalabilityMetrics {
228 latency_scaling: Vec::new(),
229 memory_scaling: Vec::new(),
230 buildtime_scaling: Vec::new(),
231 throughput_scaling: Vec::new(),
232 scaling_efficiency: 0.0,
233 },
234 memory: MemoryMetrics {
235 peak_memory_mb: 0.0,
236 average_memory_mb: 0.0,
237 allocation_patterns: Vec::new(),
238 fragmentation_ratio: 0.0,
239 cache_metrics: CacheMetrics {
240 l1_hit_ratio: 0.0,
241 l2_hit_ratio: 0.0,
242 l3_hit_ratio: 0.0,
243 memory_bandwidth_util: 0.0,
244 },
245 },
246 statistics: StatisticalMetrics {
247 sample_size: 0,
248 confidence_intervals: std::collections::HashMap::new(),
249 significance_tests: std::collections::HashMap::new(),
250 effect_sizes: std::collections::HashMap::new(),
251 power_analysis: PowerAnalysis {
252 power: 0.0,
253 effect_size: 0.0,
254 required_sample_size: 0,
255 },
256 },
257 traces: None,
258 errors: Vec::new(),
259 }
260 }
261}
262
263#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct PerformanceMetrics {
266 pub latency: LatencyMetrics,
268 pub throughput: ThroughputMetrics,
270 pub build_time: BuildTimeMetrics,
272 pub index_size: IndexSizeMetrics,
274}
275
276#[derive(Debug, Clone, Serialize, Deserialize)]
278pub struct LatencyMetrics {
279 pub mean_ms: f64,
281 pub std_ms: f64,
283 pub percentiles: HashMap<String, f64>, pub distribution: Vec<f64>,
287 pub max_ms: f64,
289 pub min_ms: f64,
291}
292
293#[derive(Debug, Clone, Serialize, Deserialize)]
295pub struct ThroughputMetrics {
296 pub qps: f64,
298 pub batch_qps: HashMap<usize, f64>, pub concurrent_qps: HashMap<usize, f64>, pub saturation_qps: f64,
304}
305
306#[derive(Debug, Clone, Serialize, Deserialize)]
308pub struct BuildTimeMetrics {
309 pub total_seconds: f64,
311 pub per_vector_ms: f64,
313 pub allocation_seconds: f64,
315 pub construction_seconds: f64,
317 pub optimization_seconds: f64,
319}
320
321#[derive(Debug, Clone, Serialize, Deserialize)]
323pub struct IndexSizeMetrics {
324 pub total_bytes: usize,
326 pub per_vector_bytes: f64,
328 pub overhead_ratio: f64,
330 pub compression_ratio: f64,
332 pub serialized_bytes: usize,
334}
335
336#[derive(Debug, Clone, Serialize, Deserialize)]
338pub struct QualityMetrics {
339 pub recall_at_k: HashMap<usize, f64>,
341 pub precision_at_k: HashMap<usize, f64>,
343 pub mean_average_precision: f64,
345 pub ndcg_at_k: HashMap<usize, f64>,
347 pub f1_at_k: HashMap<usize, f64>,
349 pub mean_reciprocal_rank: f64,
351 pub quality_degradation: QualityDegradation,
353}
354
355#[derive(Debug, Clone, Serialize, Deserialize)]
357pub struct QualityDegradation {
358 pub recall_latency_tradeoff: Vec<(f64, f64)>, pub quality_size_tradeoff: Vec<(f64, usize)>, pub quality_buildtime_tradeoff: Vec<(f64, f64)>, }
365
366#[derive(Debug, Clone, Serialize, Deserialize)]
368pub struct ScalabilityMetrics {
369 pub latency_scaling: Vec<(usize, f64)>, pub memory_scaling: Vec<(usize, usize)>, pub buildtime_scaling: Vec<(usize, f64)>, pub throughput_scaling: Vec<(usize, f64)>, pub scaling_efficiency: f64,
379}
380
381#[derive(Debug, Clone, Serialize, Deserialize)]
383pub struct MemoryMetrics {
384 pub peak_memory_mb: f64,
386 pub average_memory_mb: f64,
388 pub allocation_patterns: Vec<MemoryAllocation>,
390 pub fragmentation_ratio: f64,
392 pub cache_metrics: CacheMetrics,
394}
395
396#[derive(Debug, Clone, Serialize, Deserialize)]
398pub struct MemoryAllocation {
399 pub timestamp_ms: u64,
401 pub allocated_bytes: usize,
403 pub allocation_type: String,
405}
406
407#[derive(Debug, Clone, Serialize, Deserialize)]
409pub struct CacheMetrics {
410 pub l1_hit_ratio: f64,
412 pub l2_hit_ratio: f64,
414 pub l3_hit_ratio: f64,
416 pub memory_bandwidth_util: f64,
418}
419
420#[derive(Debug, Clone, Serialize, Deserialize)]
422pub struct StatisticalMetrics {
423 pub sample_size: usize,
425 pub confidence_intervals: HashMap<String, (f64, f64)>, pub significance_tests: HashMap<String, StatisticalTest>,
429 pub effect_sizes: HashMap<String, f64>,
431 pub power_analysis: PowerAnalysis,
433}
434
435#[derive(Debug, Clone, Serialize, Deserialize)]
437pub struct StatisticalTest {
438 pub test_type: String,
440 pub p_value: f64,
442 pub test_statistic: f64,
444 pub is_significant: bool,
446}
447
448#[derive(Debug, Clone, Serialize, Deserialize)]
450pub struct PowerAnalysis {
451 pub power: f64,
453 pub effect_size: f64,
455 pub required_sample_size: usize,
457}
458
459#[derive(Debug, Clone, Serialize, Deserialize)]
461pub struct BenchmarkTraces {
462 pub query_traces: Vec<QueryTrace>,
464 pub system_traces: Vec<SystemTrace>,
466 pub memory_traces: Vec<MemoryTrace>,
468}
469
470#[derive(Debug, Clone, Serialize, Deserialize)]
472pub struct QueryTrace {
473 pub query_id: usize,
475 pub start_time: u64,
477 pub end_time: u64,
479 pub results_count: usize,
481 pub distance_computations: usize,
483 pub cache_hits: usize,
485 pub memory_allocations: usize,
487}
488
489#[derive(Debug, Clone, Serialize, Deserialize)]
491pub struct SystemTrace {
492 pub timestamp: u64,
494 pub cpu_usage: f64,
496 pub memory_usage: usize,
498 pub io_operations: usize,
500 pub context_switches: usize,
502}
503
504#[derive(Debug, Clone, Serialize, Deserialize)]
506pub struct MemoryTrace {
507 pub timestamp: u64,
509 pub heap_usage: usize,
511 pub stack_usage: usize,
513 pub page_faults: usize,
515 pub memory_bandwidth: f64,
517}
518
519#[derive(Debug, Clone, Serialize, Deserialize)]
521pub enum OptimizationStrategy {
522 GridSearch,
523 RandomSearch,
524 BayesianOptimization,
525 EvolutionaryOptimization,
526 MultiObjective,
527}
528
529#[derive(Debug, Clone, Serialize, Deserialize)]
531pub struct ParameterSpace {
532 pub parameter_type: ParameterType,
533 pub constraints: Vec<ParameterConstraint>,
534}
535
536#[derive(Debug, Clone, Serialize, Deserialize)]
537pub enum ParameterType {
538 Categorical(Vec<String>),
539 Continuous { min: f64, max: f64 },
540 Integer { min: i64, max: i64 },
541 Boolean,
542}
543
544#[derive(Debug, Clone, Serialize, Deserialize)]
545pub enum ParameterConstraint {
546 GreaterThan(f64),
547 LessThan(f64),
548 Conditional {
549 if_param: String,
550 if_value: String,
551 then_constraint: Box<ParameterConstraint>,
552 },
553}
554
555#[derive(Debug, Clone, Serialize, Deserialize)]
557pub enum ObjectiveFunction {
558 Recall { k: usize, weight: f64 },
559 Latency { percentile: f64, weight: f64 },
560 Throughput { weight: f64 },
561 MemoryUsage { weight: f64 },
562 Composite { objectives: Vec<ObjectiveFunction> },
563 Pareto { objectives: Vec<ObjectiveFunction> },
564}
565
566impl Default for AdvancedBenchmarkConfig {
567 fn default() -> Self {
568 Self::new()
569 }
570}
571
572impl AdvancedBenchmarkConfig {
573 pub fn new() -> Self {
574 Self {
575 base_config: BenchmarkConfig::default(),
576 confidence_level: 0.95,
577 min_runs: 10,
578 max_cv: 0.05, memory_profiling: true,
580 latency_distribution: true,
581 throughput_testing: true,
582 quality_degradation: true,
583 hyperparameter_optimization: false,
584 comparative_analysis: true,
585 ann_benchmarks_mode: false,
586 export_traces: false,
587 parallel_config: ParallelBenchmarkConfig {
588 num_threads: num_cpus::get(),
589 numa_aware: false,
590 thread_affinity: false,
591 memory_bandwidth_test: false,
592 },
593 }
594 }
595
596 pub fn ann_benchmarks_compatible() -> Self {
597 let mut config = Self::new();
598 config.ann_benchmarks_mode = true;
599 config.base_config.output_format = BenchmarkOutputFormat::AnnBenchmarks;
600 config.base_config.quality_metrics = true;
601 config.comparative_analysis = false;
602 config
603 }
604}