1use crate::performance_optimization::{AdaptiveOptimizer, OptimizationStrategy};
6use std::collections::HashMap;
7use std::time::{Duration, Instant};
8
9#[derive(Debug, Clone)]
11pub struct BenchmarkConfig {
12 pub warmup_iterations: usize,
14 pub measurement_iterations: usize,
16 pub min_duration: Duration,
18 pub max_duration: Duration,
20 pub sample_sizes: Vec<usize>,
22 pub strategies: Vec<OptimizationStrategy>,
24}
25
26impl Default for BenchmarkConfig {
27 fn default() -> Self {
28 Self {
29 warmup_iterations: 5,
30 measurement_iterations: 20,
31 min_duration: Duration::from_millis(100),
32 max_duration: Duration::from_secs(30),
33 sample_sizes: vec![100, 1_000, 10_000, 100_000, 1_000_000],
34 strategies: vec![
35 OptimizationStrategy::Scalar,
36 OptimizationStrategy::Simd,
37 OptimizationStrategy::Parallel,
38 ],
39 }
40 }
41}
42
43#[derive(Debug, Clone)]
45pub struct BenchmarkMeasurement {
46 pub strategy: OptimizationStrategy,
48 pub input_size: usize,
50 pub duration: Duration,
52 pub throughput: f64,
54 pub memory_usage: usize,
56 pub custom_metrics: HashMap<String, f64>,
58}
59
60#[derive(Debug, Clone)]
62pub struct BenchmarkResults {
63 pub operation_name: String,
65 pub measurements: Vec<BenchmarkMeasurement>,
67 pub strategy_summary: HashMap<OptimizationStrategy, StrategyPerformance>,
69 pub scalability_analysis: ScalabilityAnalysis,
71 pub recommendations: Vec<String>,
73 pub total_duration: Duration,
75}
76
77#[derive(Debug, Clone)]
79pub struct StrategyPerformance {
80 pub avg_throughput: f64,
82 pub throughput_stddev: f64,
84 pub avg_memory_usage: f64,
86 pub optimal_size: usize,
88 pub efficiency_score: f64,
90}
91
92#[derive(Debug, Clone)]
94pub struct ScalabilityAnalysis {
95 pub parallel_efficiency: HashMap<usize, f64>,
97 pub memory_scaling: MemoryScaling,
99 pub bottlenecks: Vec<PerformanceBottleneck>,
101}
102
103#[derive(Debug, Clone)]
105pub struct MemoryScaling {
106 pub linear_coefficient: f64,
108 pub constant_coefficient: f64,
110 pub r_squared: f64,
112}
113
114#[derive(Debug, Clone)]
116pub struct PerformanceBottleneck {
117 pub bottleneck_type: BottleneckType,
119 pub size_range: (usize, usize),
121 pub impact: f64,
123 pub mitigation: String,
125}
126
127#[derive(Debug, Clone, Copy, PartialEq, Eq)]
129pub enum BottleneckType {
130 MemoryBandwidth,
131 CacheLatency,
132 ComputeBound,
133 SynchronizationOverhead,
134 AlgorithmicComplexity,
135}
136
137#[allow(dead_code)]
139pub struct BenchmarkRunner {
140 config: BenchmarkConfig,
141 #[allow(dead_code)]
142 optimizer: AdaptiveOptimizer,
143}
144
145impl BenchmarkRunner {
146 pub fn new(config: BenchmarkConfig) -> Self {
148 Self {
149 config,
150 optimizer: AdaptiveOptimizer::new(),
151 }
152 }
153
154 pub fn benchmark_operation<F>(&self, operationname: &str, operation: F) -> BenchmarkResults
156 where
157 F: Fn(&[f64], OptimizationStrategy) -> (Duration, Vec<f64>) + Send + Sync,
158 {
159 let start_time = Instant::now();
160 let mut measurements = Vec::new();
161
162 for &size in &self.config.sample_sizes {
164 let input_data: Vec<f64> = (0..size).map(|i| i as f64).collect();
165
166 for &strategy in &self.config.strategies {
167 for _ in 0..self.config.warmup_iterations {
169 let _ = operation(&input_data, strategy);
170 }
171
172 let mut durations = Vec::new();
174 for _ in 0..self.config.measurement_iterations {
175 let _duration_result = operation(&input_data, strategy);
176 durations.push(std::time::Duration::from_secs(1));
177 }
178
179 let avg_duration = Duration::from_nanos(
181 (durations.iter().map(|d| d.as_nanos()).sum::<u128>() / durations.len() as u128)
182 as u64,
183 );
184
185 let throughput = if avg_duration.as_nanos() > 0 {
186 (size as f64) / (avg_duration.as_secs_f64())
187 } else {
188 0.0
189 };
190
191 let memory_usage = self.estimate_memory_usage(size, strategy);
193
194 measurements.push(BenchmarkMeasurement {
195 strategy,
196 input_size: size,
197 duration: avg_duration,
198 throughput,
199 memory_usage,
200 custom_metrics: HashMap::new(),
201 });
202 }
203 }
204
205 let strategy_summary = self.analyze_strategy_performance(&measurements);
207 let scalability_analysis = self.analyze_scalability(&measurements);
208 let recommendations = self.generate_recommendations(&measurements, &strategy_summary);
209
210 BenchmarkResults {
211 operation_name: operationname.to_string(),
212 measurements,
213 strategy_summary,
214 scalability_analysis,
215 recommendations,
216 total_duration: start_time.elapsed(),
217 }
218 }
219
220 fn analyze_strategy_performance(
222 &self,
223 measurements: &[BenchmarkMeasurement],
224 ) -> HashMap<OptimizationStrategy, StrategyPerformance> {
225 let mut strategy_map: HashMap<OptimizationStrategy, Vec<&BenchmarkMeasurement>> =
226 HashMap::new();
227
228 for measurement in measurements {
229 strategy_map
230 .entry(measurement.strategy)
231 .or_default()
232 .push(measurement);
233 }
234
235 let mut summary = HashMap::new();
236 for (strategy, strategy_measurements) in strategy_map {
237 let throughputs: Vec<f64> =
238 strategy_measurements.iter().map(|m| m.throughput).collect();
239 let memory_usages: Vec<f64> = strategy_measurements
240 .iter()
241 .map(|m| m.memory_usage as f64)
242 .collect();
243
244 let avg_throughput = throughputs.iter().sum::<f64>() / throughputs.len() as f64;
245 let throughput_variance = throughputs
246 .iter()
247 .map(|&x| (x - avg_throughput).powi(2))
248 .sum::<f64>()
249 / throughputs.len() as f64;
250 let throughput_stddev = throughput_variance.sqrt();
251
252 let avg_memory_usage = memory_usages.iter().sum::<f64>() / memory_usages.len() as f64;
253
254 let optimal_size = strategy_measurements
256 .iter()
257 .max_by(|a, b| {
258 a.throughput
259 .partial_cmp(&b.throughput)
260 .expect("Operation failed")
261 })
262 .map(|m| m.input_size)
263 .unwrap_or(0);
264
265 let efficiency_score = if avg_memory_usage > 0.0 {
267 (avg_throughput / avg_memory_usage * 1e6).min(1.0)
268 } else {
269 0.0
270 };
271
272 summary.insert(
273 strategy,
274 StrategyPerformance {
275 avg_throughput,
276 throughput_stddev,
277 avg_memory_usage,
278 optimal_size,
279 efficiency_score,
280 },
281 );
282 }
283
284 summary
285 }
286
287 fn analyze_scalability(&self, measurements: &[BenchmarkMeasurement]) -> ScalabilityAnalysis {
289 let mut parallel_efficiency = HashMap::new();
290 let mut memory_sizes = Vec::new();
291 let mut memory_usages = Vec::new();
292
293 for &size in &self.config.sample_sizes {
295 let scalar_throughput = measurements
296 .iter()
297 .find(|m| m.input_size == size && m.strategy == OptimizationStrategy::Scalar)
298 .map(|m| m.throughput)
299 .unwrap_or(0.0);
300
301 let parallel_throughput = measurements
302 .iter()
303 .find(|m| m.input_size == size && m.strategy == OptimizationStrategy::Parallel)
304 .map(|m| m.throughput)
305 .unwrap_or(0.0);
306
307 if scalar_throughput > 0.0 {
308 let efficiency = parallel_throughput / (scalar_throughput * 4.0); parallel_efficiency.insert(size, efficiency.min(1.0));
310 }
311
312 memory_sizes.push(size as f64);
313 if let Some(measurement) = measurements.iter().find(|m| m.input_size == size) {
314 memory_usages.push(measurement.memory_usage as f64);
315 }
316 }
317
318 let memory_scaling = self.fit_linear_model(&memory_sizes, &memory_usages);
320
321 let bottlenecks = self.identify_bottlenecks(measurements);
323
324 ScalabilityAnalysis {
325 parallel_efficiency,
326 memory_scaling,
327 bottlenecks,
328 }
329 }
330
331 fn fit_linear_model(&self, x: &[f64], y: &[f64]) -> MemoryScaling {
333 if x.len() != y.len() || x.is_empty() {
334 return MemoryScaling {
335 linear_coefficient: 0.0,
336 constant_coefficient: 0.0,
337 r_squared: 0.0,
338 };
339 }
340
341 let n = x.len() as f64;
342 let sum_x = x.iter().sum::<f64>();
343 let sum_y = y.iter().sum::<f64>();
344 let sum_xy = x.iter().zip(y.iter()).map(|(xi, yi)| xi * yi).sum::<f64>();
345 let sum_x2 = x.iter().map(|xi| xi * xi).sum::<f64>();
346
347 let slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x * sum_x);
348 let intercept = (sum_y - slope * sum_x) / n;
349
350 let y_mean = sum_y / n;
352 let ss_tot = y.iter().map(|yi| (yi - y_mean).powi(2)).sum::<f64>();
353 let ss_res = x
354 .iter()
355 .zip(y.iter())
356 .map(|(xi, yi)| (yi - (slope * xi + intercept)).powi(2))
357 .sum::<f64>();
358
359 let r_squared = if ss_tot > 0.0 {
360 1.0 - (ss_res / ss_tot)
361 } else {
362 0.0
363 };
364
365 MemoryScaling {
366 linear_coefficient: slope,
367 constant_coefficient: intercept,
368 r_squared,
369 }
370 }
371
372 fn identify_bottlenecks(
374 &self,
375 measurements: &[BenchmarkMeasurement],
376 ) -> Vec<PerformanceBottleneck> {
377 let mut bottlenecks = Vec::new();
378
379 let mut size_groups: HashMap<usize, Vec<&BenchmarkMeasurement>> = HashMap::new();
381 for measurement in measurements {
382 size_groups
383 .entry(measurement.input_size)
384 .or_default()
385 .push(measurement);
386 }
387
388 for (&size, group) in &size_groups {
389 let max_throughput = group.iter().map(|m| m.throughput).fold(0.0f64, f64::max);
391 let min_throughput = group
392 .iter()
393 .map(|m| m.throughput)
394 .fold(f64::INFINITY, f64::min);
395
396 if max_throughput > 0.0 && (max_throughput - min_throughput) / max_throughput > 0.5 {
397 let impact = (max_throughput - min_throughput) / max_throughput;
398 bottlenecks.push(PerformanceBottleneck {
399 bottleneck_type: BottleneckType::MemoryBandwidth,
400 size_range: (size, size),
401 impact,
402 mitigation: "Consider cache-friendly data layouts or memory prefetching"
403 .to_string(),
404 });
405 }
406
407 let scalar_perf = group
409 .iter()
410 .find(|m| m.strategy == OptimizationStrategy::Scalar)
411 .map(|m| m.throughput)
412 .unwrap_or(0.0);
413
414 let parallel_perf = group
415 .iter()
416 .find(|m| m.strategy == OptimizationStrategy::Parallel)
417 .map(|m| m.throughput)
418 .unwrap_or(0.0);
419
420 if scalar_perf > 0.0 && parallel_perf / scalar_perf < 2.0 {
421 let impact = 1.0 - (parallel_perf / (scalar_perf * 4.0));
422 bottlenecks.push(PerformanceBottleneck {
423 bottleneck_type: BottleneckType::SynchronizationOverhead,
424 size_range: (size, size),
425 impact,
426 mitigation: "Reduce synchronization points or increase work per thread"
427 .to_string(),
428 });
429 }
430 }
431
432 bottlenecks
433 }
434
435 fn generate_recommendations(
437 &self,
438 measurements: &[BenchmarkMeasurement],
439 strategy_summary: &HashMap<OptimizationStrategy, StrategyPerformance>,
440 ) -> Vec<String> {
441 let mut recommendations = Vec::new();
442
443 let best_strategy = strategy_summary
445 .iter()
446 .max_by(|(_, a), (_, b)| {
447 a.avg_throughput
448 .partial_cmp(&b.avg_throughput)
449 .expect("Operation failed")
450 })
451 .map(|(strategy, _)| *strategy);
452
453 if let Some(strategy) = best_strategy {
454 recommendations.push(format!("{strategy:?}"));
455 }
456
457 let large_size_threshold = 50_000;
459 let large_measurements: Vec<_> = measurements
460 .iter()
461 .filter(|m| m.input_size >= large_size_threshold)
462 .collect();
463
464 if !large_measurements.is_empty() {
465 let best_large_strategy = large_measurements
466 .iter()
467 .max_by(|a, b| {
468 a.throughput
469 .partial_cmp(&b.throughput)
470 .expect("Operation failed")
471 })
472 .map(|m| m.strategy);
473
474 if let Some(strategy) = best_large_strategy {
475 recommendations.push(format!(
476 "For large datasets (>{large_size_threshold}): Use {strategy:?}"
477 ));
478 }
479 }
480
481 let most_efficient = strategy_summary
483 .iter()
484 .max_by(|(_, a), (_, b)| {
485 a.efficiency_score
486 .partial_cmp(&b.efficiency_score)
487 .expect("Operation failed")
488 })
489 .map(|(strategy, perf)| (*strategy, perf.efficiency_score));
490
491 if let Some((strategy, score)) = most_efficient {
492 if score > 0.8 {
493 recommendations.push(format!(
494 "Most memory-efficient strategy: {strategy:?} (efficiency: {score:.2})"
495 ));
496 }
497 }
498
499 let parallel_measurements: Vec<_> = measurements
501 .iter()
502 .filter(|m| m.strategy == OptimizationStrategy::Parallel)
503 .collect();
504
505 if parallel_measurements.len() >= 2 {
506 let throughput_growth = parallel_measurements
507 .last()
508 .expect("Operation failed")
509 .throughput
510 / parallel_measurements
511 .first()
512 .expect("Operation failed")
513 .throughput;
514 if throughput_growth < 2.0 {
515 recommendations.push(
516 "Parallel strategy shows poor scalability - consider algorithmic improvements"
517 .to_string(),
518 );
519 }
520 }
521
522 if recommendations.is_empty() {
523 recommendations.push(
524 "Performance analysis complete - all strategies show similar characteristics"
525 .to_string(),
526 );
527 }
528
529 recommendations
530 }
531
532 fn estimate_memory_usage(&self, size: usize, strategy: OptimizationStrategy) -> usize {
534 let base_memory = size * std::mem::size_of::<f64>(); match strategy {
537 OptimizationStrategy::Scalar => base_memory,
538 OptimizationStrategy::Simd => base_memory + 1024, OptimizationStrategy::Parallel => base_memory + size * std::mem::size_of::<f64>(), OptimizationStrategy::Gpu => base_memory * 2, _ => base_memory,
542 }
543 }
544}
545
546pub mod presets {
548 use super::*;
549
550 pub fn array_operations() -> BenchmarkConfig {
552 BenchmarkConfig {
553 warmup_iterations: 3,
554 measurement_iterations: 10,
555 min_duration: Duration::from_millis(50),
556 max_duration: Duration::from_secs(10),
557 sample_sizes: vec![100, 1_000, 10_000, 100_000],
558 strategies: {
559 let mut set = std::collections::HashSet::new();
560 set.insert(OptimizationStrategy::Scalar);
561 set.insert(OptimizationStrategy::Simd);
562 set.insert(OptimizationStrategy::Parallel);
563 set.insert(OptimizationStrategy::ModernArchOptimized);
564 set.insert(OptimizationStrategy::VectorOptimized);
565 set.insert(OptimizationStrategy::EnergyEfficient);
566 set.into_iter().collect::<Vec<_>>()
567 },
568 }
569 }
570
571 pub fn matrix_operations() -> BenchmarkConfig {
573 BenchmarkConfig {
574 warmup_iterations: 5,
575 measurement_iterations: 15,
576 min_duration: Duration::from_millis(100),
577 max_duration: Duration::from_secs(30),
578 sample_sizes: vec![64, 128, 256, 512, 1024],
579 strategies: {
580 let mut set = std::collections::HashSet::new();
581 set.insert(OptimizationStrategy::Scalar);
582 set.insert(OptimizationStrategy::Simd);
583 set.insert(OptimizationStrategy::Parallel);
584 set.insert(OptimizationStrategy::CacheOptimized);
585 set.insert(OptimizationStrategy::ModernArchOptimized);
586 set.insert(OptimizationStrategy::VectorOptimized);
587 set.insert(OptimizationStrategy::HighThroughput);
588 set.into_iter().collect::<Vec<_>>()
589 },
590 }
591 }
592
593 pub fn memory_intensive() -> BenchmarkConfig {
595 BenchmarkConfig {
596 warmup_iterations: 2,
597 measurement_iterations: 8,
598 min_duration: Duration::from_millis(200),
599 max_duration: Duration::from_secs(20),
600 sample_sizes: vec![1_000, 10_000, 100_000, 1_000_000, 10_000_000],
601 strategies: {
602 let mut set = std::collections::HashSet::new();
603 set.insert(OptimizationStrategy::Scalar);
604 set.insert(OptimizationStrategy::MemoryBound);
605 set.insert(OptimizationStrategy::CacheOptimized);
606 set.insert(OptimizationStrategy::ModernArchOptimized);
607 set.insert(OptimizationStrategy::HighThroughput);
608 set.insert(OptimizationStrategy::EnergyEfficient);
609 set.into_iter().collect::<Vec<_>>()
610 },
611 }
612 }
613
614 pub fn advanced_comprehensive() -> BenchmarkConfig {
616 BenchmarkConfig {
617 warmup_iterations: 10,
618 measurement_iterations: 25,
619 min_duration: Duration::from_millis(100),
620 max_duration: Duration::from_secs(60),
621 sample_sizes: vec![
622 100, 500, 1_000, 5_000, 10_000, 50_000, 100_000, 500_000, 1_000_000, 5_000_000,
623 ],
624 strategies: {
625 let mut set = std::collections::HashSet::new();
626 set.insert(OptimizationStrategy::Scalar);
627 set.insert(OptimizationStrategy::Simd);
628 set.insert(OptimizationStrategy::Parallel);
629 set.insert(OptimizationStrategy::Gpu);
630 set.insert(OptimizationStrategy::Hybrid);
631 set.insert(OptimizationStrategy::CacheOptimized);
632 set.insert(OptimizationStrategy::MemoryBound);
633 set.insert(OptimizationStrategy::ComputeBound);
634 set.insert(OptimizationStrategy::ModernArchOptimized);
635 set.insert(OptimizationStrategy::VectorOptimized);
636 set.insert(OptimizationStrategy::EnergyEfficient);
637 set.insert(OptimizationStrategy::HighThroughput);
638 set.into_iter().collect::<Vec<_>>()
639 },
640 }
641 }
642
643 pub fn modern_architectures() -> BenchmarkConfig {
645 BenchmarkConfig {
646 warmup_iterations: 5,
647 measurement_iterations: 15,
648 min_duration: Duration::from_millis(50),
649 max_duration: Duration::from_secs(30),
650 sample_sizes: vec![1_000, 10_000, 100_000, 1_000_000],
651 strategies: {
652 let mut set = std::collections::HashSet::new();
653 set.insert(OptimizationStrategy::ModernArchOptimized);
654 set.insert(OptimizationStrategy::VectorOptimized);
655 set.insert(OptimizationStrategy::HighThroughput);
656 set.insert(OptimizationStrategy::EnergyEfficient);
657 set.into_iter().collect::<Vec<_>>()
658 },
659 }
660 }
661}
662
663#[cfg(test)]
664mod tests {
665 use super::*;
666
667 #[test]
668 fn test_benchmark_config_default() {
669 let config = BenchmarkConfig::default();
670 assert!(config.warmup_iterations > 0);
671 assert!(config.measurement_iterations > 0);
672 assert!(!config.sample_sizes.is_empty());
673 assert!(!config.strategies.is_empty());
674 }
675
676 #[test]
677 fn test_bottleneck_type_enum() {
678 let bottleneck_types = [
679 BottleneckType::MemoryBandwidth,
680 BottleneckType::CacheLatency,
681 BottleneckType::ComputeBound,
682 BottleneckType::SynchronizationOverhead,
683 BottleneckType::AlgorithmicComplexity,
684 ];
685
686 for bt in &bottleneck_types {
687 assert!(!format!("{bt:?}").is_empty());
689 }
690
691 assert_eq!(
693 BottleneckType::MemoryBandwidth,
694 BottleneckType::MemoryBandwidth
695 );
696 assert_ne!(
697 BottleneckType::MemoryBandwidth,
698 BottleneckType::CacheLatency
699 );
700 }
701
702 #[test]
703 fn test_benchmark_presets() {
704 let configs = [
705 presets::array_operations(),
706 presets::matrix_operations(),
707 presets::memory_intensive(),
708 presets::advanced_comprehensive(),
709 presets::modern_architectures(),
710 ];
711
712 for config in &configs {
713 assert!(config.warmup_iterations > 0);
714 assert!(config.measurement_iterations > 0);
715 assert!(!config.sample_sizes.is_empty());
716 assert!(!config.strategies.is_empty());
717 }
718 }
719}