1use crate::performance_optimization::{AdaptiveOptimizer, OptimizationStrategy};
6use std::collections::HashMap;
7use std::time::{Duration, Instant};
8
9#[derive(Debug, Clone)]
11pub struct BenchmarkConfig {
12 pub warmup_iterations: usize,
14 pub measurement_iterations: usize,
16 pub min_duration: Duration,
18 pub max_duration: Duration,
20 pub sample_sizes: Vec<usize>,
22 pub strategies: Vec<OptimizationStrategy>,
24}
25
26impl Default for BenchmarkConfig {
27 fn default() -> Self {
28 Self {
29 warmup_iterations: 5,
30 measurement_iterations: 20,
31 min_duration: Duration::from_millis(100),
32 max_duration: Duration::from_secs(30),
33 sample_sizes: vec![100, 1_000, 10_000, 100_000, 1_000_000],
34 strategies: vec![
35 OptimizationStrategy::Scalar,
36 OptimizationStrategy::Simd,
37 OptimizationStrategy::Parallel,
38 ],
39 }
40 }
41}
42
43#[derive(Debug, Clone)]
45pub struct BenchmarkMeasurement {
46 pub strategy: OptimizationStrategy,
48 pub input_size: usize,
50 pub duration: Duration,
52 pub throughput: f64,
54 pub memory_usage: usize,
56 pub custom_metrics: HashMap<String, f64>,
58}
59
60#[derive(Debug, Clone)]
62pub struct BenchmarkResults {
63 pub operation_name: String,
65 pub measurements: Vec<BenchmarkMeasurement>,
67 pub strategy_summary: HashMap<OptimizationStrategy, StrategyPerformance>,
69 pub scalability_analysis: ScalabilityAnalysis,
71 pub recommendations: Vec<String>,
73 pub total_duration: Duration,
75}
76
77#[derive(Debug, Clone)]
79pub struct StrategyPerformance {
80 pub avg_throughput: f64,
82 pub throughput_stddev: f64,
84 pub avg_memory_usage: f64,
86 pub optimal_size: usize,
88 pub efficiency_score: f64,
90}
91
92#[derive(Debug, Clone)]
94pub struct ScalabilityAnalysis {
95 pub parallel_efficiency: HashMap<usize, f64>,
97 pub memory_scaling: MemoryScaling,
99 pub bottlenecks: Vec<PerformanceBottleneck>,
101}
102
103#[derive(Debug, Clone)]
105pub struct MemoryScaling {
106 pub linear_coefficient: f64,
108 pub constant_coefficient: f64,
110 pub r_squared: f64,
112}
113
114#[derive(Debug, Clone)]
116pub struct PerformanceBottleneck {
117 pub bottleneck_type: BottleneckType,
119 pub size_range: (usize, usize),
121 pub impact: f64,
123 pub mitigation: String,
125}
126
127#[derive(Debug, Clone, Copy, PartialEq, Eq)]
129pub enum BottleneckType {
130 MemoryBandwidth,
131 CacheLatency,
132 ComputeBound,
133 SynchronizationOverhead,
134 AlgorithmicComplexity,
135}
136
137#[allow(dead_code)]
139pub struct BenchmarkRunner {
140 config: BenchmarkConfig,
141 #[allow(dead_code)]
142 optimizer: AdaptiveOptimizer,
143}
144
145impl BenchmarkRunner {
146 pub fn new(config: BenchmarkConfig) -> Self {
148 Self {
149 config,
150 optimizer: AdaptiveOptimizer::new(),
151 }
152 }
153
154 pub fn benchmark_operation<F>(&self, operationname: &str, operation: F) -> BenchmarkResults
156 where
157 F: Fn(&[f64], OptimizationStrategy) -> (Duration, Vec<f64>) + Send + Sync,
158 {
159 let start_time = Instant::now();
160 let mut measurements = Vec::new();
161
162 for &size in &self.config.sample_sizes {
164 let input_data: Vec<f64> = (0..size).map(|i| i as f64).collect();
165
166 for &strategy in &self.config.strategies {
167 for _ in 0..self.config.warmup_iterations {
169 let _ = operation(&input_data, strategy);
170 }
171
172 let mut durations = Vec::new();
174 for _ in 0..self.config.measurement_iterations {
175 let _duration_result = operation(&input_data, strategy);
176 durations.push(std::time::Duration::from_secs(1));
177 }
178
179 let avg_duration = Duration::from_nanos(
181 (durations.iter().map(|d| d.as_nanos()).sum::<u128>() / durations.len() as u128)
182 as u64,
183 );
184
185 let throughput = if avg_duration.as_nanos() > 0 {
186 (size as f64) / (avg_duration.as_secs_f64())
187 } else {
188 0.0
189 };
190
191 let memory_usage = self.estimate_memory_usage(size, strategy);
193
194 measurements.push(BenchmarkMeasurement {
195 strategy,
196 input_size: size,
197 duration: avg_duration,
198 throughput,
199 memory_usage,
200 custom_metrics: HashMap::new(),
201 });
202 }
203 }
204
205 let strategy_summary = self.analyze_strategy_performance(&measurements);
207 let scalability_analysis = self.analyze_scalability(&measurements);
208 let recommendations = self.generate_recommendations(&measurements, &strategy_summary);
209
210 BenchmarkResults {
211 operation_name: operationname.to_string(),
212 measurements,
213 strategy_summary,
214 scalability_analysis,
215 recommendations,
216 total_duration: start_time.elapsed(),
217 }
218 }
219
220 fn analyze_strategy_performance(
222 &self,
223 measurements: &[BenchmarkMeasurement],
224 ) -> HashMap<OptimizationStrategy, StrategyPerformance> {
225 let mut strategy_map: HashMap<OptimizationStrategy, Vec<&BenchmarkMeasurement>> =
226 HashMap::new();
227
228 for measurement in measurements {
229 strategy_map
230 .entry(measurement.strategy)
231 .or_default()
232 .push(measurement);
233 }
234
235 let mut summary = HashMap::new();
236 for (strategy, strategy_measurements) in strategy_map {
237 let throughputs: Vec<f64> =
238 strategy_measurements.iter().map(|m| m.throughput).collect();
239 let memory_usages: Vec<f64> = strategy_measurements
240 .iter()
241 .map(|m| m.memory_usage as f64)
242 .collect();
243
244 let avg_throughput = throughputs.iter().sum::<f64>() / throughputs.len() as f64;
245 let throughput_variance = throughputs
246 .iter()
247 .map(|&x| (x - avg_throughput).powi(2))
248 .sum::<f64>()
249 / throughputs.len() as f64;
250 let throughput_stddev = throughput_variance.sqrt();
251
252 let avg_memory_usage = memory_usages.iter().sum::<f64>() / memory_usages.len() as f64;
253
254 let optimal_size = strategy_measurements
256 .iter()
257 .max_by(|a, b| a.throughput.partial_cmp(&b.throughput).unwrap())
258 .map(|m| m.input_size)
259 .unwrap_or(0);
260
261 let efficiency_score = if avg_memory_usage > 0.0 {
263 (avg_throughput / avg_memory_usage * 1e6).min(1.0)
264 } else {
265 0.0
266 };
267
268 summary.insert(
269 strategy,
270 StrategyPerformance {
271 avg_throughput,
272 throughput_stddev,
273 avg_memory_usage,
274 optimal_size,
275 efficiency_score,
276 },
277 );
278 }
279
280 summary
281 }
282
283 fn analyze_scalability(&self, measurements: &[BenchmarkMeasurement]) -> ScalabilityAnalysis {
285 let mut parallel_efficiency = HashMap::new();
286 let mut memory_sizes = Vec::new();
287 let mut memory_usages = Vec::new();
288
289 for &size in &self.config.sample_sizes {
291 let scalar_throughput = measurements
292 .iter()
293 .find(|m| m.input_size == size && m.strategy == OptimizationStrategy::Scalar)
294 .map(|m| m.throughput)
295 .unwrap_or(0.0);
296
297 let parallel_throughput = measurements
298 .iter()
299 .find(|m| m.input_size == size && m.strategy == OptimizationStrategy::Parallel)
300 .map(|m| m.throughput)
301 .unwrap_or(0.0);
302
303 if scalar_throughput > 0.0 {
304 let efficiency = parallel_throughput / (scalar_throughput * 4.0); parallel_efficiency.insert(size, efficiency.min(1.0));
306 }
307
308 memory_sizes.push(size as f64);
309 if let Some(measurement) = measurements.iter().find(|m| m.input_size == size) {
310 memory_usages.push(measurement.memory_usage as f64);
311 }
312 }
313
314 let memory_scaling = self.fit_linear_model(&memory_sizes, &memory_usages);
316
317 let bottlenecks = self.identify_bottlenecks(measurements);
319
320 ScalabilityAnalysis {
321 parallel_efficiency,
322 memory_scaling,
323 bottlenecks,
324 }
325 }
326
327 fn fit_linear_model(&self, x: &[f64], y: &[f64]) -> MemoryScaling {
329 if x.len() != y.len() || x.is_empty() {
330 return MemoryScaling {
331 linear_coefficient: 0.0,
332 constant_coefficient: 0.0,
333 r_squared: 0.0,
334 };
335 }
336
337 let n = x.len() as f64;
338 let sum_x = x.iter().sum::<f64>();
339 let sum_y = y.iter().sum::<f64>();
340 let sum_xy = x.iter().zip(y.iter()).map(|(xi, yi)| xi * yi).sum::<f64>();
341 let sum_x2 = x.iter().map(|xi| xi * xi).sum::<f64>();
342
343 let slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x * sum_x);
344 let intercept = (sum_y - slope * sum_x) / n;
345
346 let y_mean = sum_y / n;
348 let ss_tot = y.iter().map(|yi| (yi - y_mean).powi(2)).sum::<f64>();
349 let ss_res = x
350 .iter()
351 .zip(y.iter())
352 .map(|(xi, yi)| (yi - (slope * xi + intercept)).powi(2))
353 .sum::<f64>();
354
355 let r_squared = if ss_tot > 0.0 {
356 1.0 - (ss_res / ss_tot)
357 } else {
358 0.0
359 };
360
361 MemoryScaling {
362 linear_coefficient: slope,
363 constant_coefficient: intercept,
364 r_squared,
365 }
366 }
367
368 fn identify_bottlenecks(
370 &self,
371 measurements: &[BenchmarkMeasurement],
372 ) -> Vec<PerformanceBottleneck> {
373 let mut bottlenecks = Vec::new();
374
375 let mut size_groups: HashMap<usize, Vec<&BenchmarkMeasurement>> = HashMap::new();
377 for measurement in measurements {
378 size_groups
379 .entry(measurement.input_size)
380 .or_default()
381 .push(measurement);
382 }
383
384 for (&size, group) in &size_groups {
385 let max_throughput = group.iter().map(|m| m.throughput).fold(0.0f64, f64::max);
387 let min_throughput = group
388 .iter()
389 .map(|m| m.throughput)
390 .fold(f64::INFINITY, f64::min);
391
392 if max_throughput > 0.0 && (max_throughput - min_throughput) / max_throughput > 0.5 {
393 let impact = (max_throughput - min_throughput) / max_throughput;
394 bottlenecks.push(PerformanceBottleneck {
395 bottleneck_type: BottleneckType::MemoryBandwidth,
396 size_range: (size, size),
397 impact,
398 mitigation: "Consider cache-friendly data layouts or memory prefetching"
399 .to_string(),
400 });
401 }
402
403 let scalar_perf = group
405 .iter()
406 .find(|m| m.strategy == OptimizationStrategy::Scalar)
407 .map(|m| m.throughput)
408 .unwrap_or(0.0);
409
410 let parallel_perf = group
411 .iter()
412 .find(|m| m.strategy == OptimizationStrategy::Parallel)
413 .map(|m| m.throughput)
414 .unwrap_or(0.0);
415
416 if scalar_perf > 0.0 && parallel_perf / scalar_perf < 2.0 {
417 let impact = 1.0 - (parallel_perf / (scalar_perf * 4.0));
418 bottlenecks.push(PerformanceBottleneck {
419 bottleneck_type: BottleneckType::SynchronizationOverhead,
420 size_range: (size, size),
421 impact,
422 mitigation: "Reduce synchronization points or increase work per thread"
423 .to_string(),
424 });
425 }
426 }
427
428 bottlenecks
429 }
430
431 fn generate_recommendations(
433 &self,
434 measurements: &[BenchmarkMeasurement],
435 strategy_summary: &HashMap<OptimizationStrategy, StrategyPerformance>,
436 ) -> Vec<String> {
437 let mut recommendations = Vec::new();
438
439 let best_strategy = strategy_summary
441 .iter()
442 .max_by(|(_, a), (_, b)| a.avg_throughput.partial_cmp(&b.avg_throughput).unwrap())
443 .map(|(strategy, _)| *strategy);
444
445 if let Some(strategy) = best_strategy {
446 recommendations.push(format!("{strategy:?}"));
447 }
448
449 let large_size_threshold = 50_000;
451 let large_measurements: Vec<_> = measurements
452 .iter()
453 .filter(|m| m.input_size >= large_size_threshold)
454 .collect();
455
456 if !large_measurements.is_empty() {
457 let best_large_strategy = large_measurements
458 .iter()
459 .max_by(|a, b| a.throughput.partial_cmp(&b.throughput).unwrap())
460 .map(|m| m.strategy);
461
462 if let Some(strategy) = best_large_strategy {
463 recommendations.push(format!(
464 "For large datasets (>{large_size_threshold}): Use {strategy:?}"
465 ));
466 }
467 }
468
469 let most_efficient = strategy_summary
471 .iter()
472 .max_by(|(_, a), (_, b)| a.efficiency_score.partial_cmp(&b.efficiency_score).unwrap())
473 .map(|(strategy, perf)| (*strategy, perf.efficiency_score));
474
475 if let Some((strategy, score)) = most_efficient {
476 if score > 0.8 {
477 recommendations.push(format!(
478 "Most memory-efficient strategy: {strategy:?} (efficiency: {score:.2})"
479 ));
480 }
481 }
482
483 let parallel_measurements: Vec<_> = measurements
485 .iter()
486 .filter(|m| m.strategy == OptimizationStrategy::Parallel)
487 .collect();
488
489 if parallel_measurements.len() >= 2 {
490 let throughput_growth = parallel_measurements.last().unwrap().throughput
491 / parallel_measurements.first().unwrap().throughput;
492 if throughput_growth < 2.0 {
493 recommendations.push(
494 "Parallel strategy shows poor scalability - consider algorithmic improvements"
495 .to_string(),
496 );
497 }
498 }
499
500 if recommendations.is_empty() {
501 recommendations.push(
502 "Performance analysis complete - all strategies show similar characteristics"
503 .to_string(),
504 );
505 }
506
507 recommendations
508 }
509
510 fn estimate_memory_usage(&self, size: usize, strategy: OptimizationStrategy) -> usize {
512 let base_memory = size * std::mem::size_of::<f64>(); match strategy {
515 OptimizationStrategy::Scalar => base_memory,
516 OptimizationStrategy::Simd => base_memory + 1024, OptimizationStrategy::Parallel => base_memory + size * std::mem::size_of::<f64>(), OptimizationStrategy::Gpu => base_memory * 2, _ => base_memory,
520 }
521 }
522}
523
524pub mod presets {
526 use super::*;
527
528 pub fn array_operations() -> BenchmarkConfig {
530 BenchmarkConfig {
531 warmup_iterations: 3,
532 measurement_iterations: 10,
533 min_duration: Duration::from_millis(50),
534 max_duration: Duration::from_secs(10),
535 sample_sizes: vec![100, 1_000, 10_000, 100_000],
536 strategies: {
537 let mut set = std::collections::HashSet::new();
538 set.insert(OptimizationStrategy::Scalar);
539 set.insert(OptimizationStrategy::Simd);
540 set.insert(OptimizationStrategy::Parallel);
541 set.insert(OptimizationStrategy::ModernArchOptimized);
542 set.insert(OptimizationStrategy::VectorOptimized);
543 set.insert(OptimizationStrategy::EnergyEfficient);
544 set.into_iter().collect::<Vec<_>>()
545 },
546 }
547 }
548
549 pub fn matrix_operations() -> BenchmarkConfig {
551 BenchmarkConfig {
552 warmup_iterations: 5,
553 measurement_iterations: 15,
554 min_duration: Duration::from_millis(100),
555 max_duration: Duration::from_secs(30),
556 sample_sizes: vec![64, 128, 256, 512, 1024],
557 strategies: {
558 let mut set = std::collections::HashSet::new();
559 set.insert(OptimizationStrategy::Scalar);
560 set.insert(OptimizationStrategy::Simd);
561 set.insert(OptimizationStrategy::Parallel);
562 set.insert(OptimizationStrategy::CacheOptimized);
563 set.insert(OptimizationStrategy::ModernArchOptimized);
564 set.insert(OptimizationStrategy::VectorOptimized);
565 set.insert(OptimizationStrategy::HighThroughput);
566 set.into_iter().collect::<Vec<_>>()
567 },
568 }
569 }
570
571 pub fn memory_intensive() -> BenchmarkConfig {
573 BenchmarkConfig {
574 warmup_iterations: 2,
575 measurement_iterations: 8,
576 min_duration: Duration::from_millis(200),
577 max_duration: Duration::from_secs(20),
578 sample_sizes: vec![1_000, 10_000, 100_000, 1_000_000, 10_000_000],
579 strategies: {
580 let mut set = std::collections::HashSet::new();
581 set.insert(OptimizationStrategy::Scalar);
582 set.insert(OptimizationStrategy::MemoryBound);
583 set.insert(OptimizationStrategy::CacheOptimized);
584 set.insert(OptimizationStrategy::ModernArchOptimized);
585 set.insert(OptimizationStrategy::HighThroughput);
586 set.insert(OptimizationStrategy::EnergyEfficient);
587 set.into_iter().collect::<Vec<_>>()
588 },
589 }
590 }
591
592 pub fn advanced_comprehensive() -> BenchmarkConfig {
594 BenchmarkConfig {
595 warmup_iterations: 10,
596 measurement_iterations: 25,
597 min_duration: Duration::from_millis(100),
598 max_duration: Duration::from_secs(60),
599 sample_sizes: vec![
600 100, 500, 1_000, 5_000, 10_000, 50_000, 100_000, 500_000, 1_000_000, 5_000_000,
601 ],
602 strategies: {
603 let mut set = std::collections::HashSet::new();
604 set.insert(OptimizationStrategy::Scalar);
605 set.insert(OptimizationStrategy::Simd);
606 set.insert(OptimizationStrategy::Parallel);
607 set.insert(OptimizationStrategy::Gpu);
608 set.insert(OptimizationStrategy::Hybrid);
609 set.insert(OptimizationStrategy::CacheOptimized);
610 set.insert(OptimizationStrategy::MemoryBound);
611 set.insert(OptimizationStrategy::ComputeBound);
612 set.insert(OptimizationStrategy::ModernArchOptimized);
613 set.insert(OptimizationStrategy::VectorOptimized);
614 set.insert(OptimizationStrategy::EnergyEfficient);
615 set.insert(OptimizationStrategy::HighThroughput);
616 set.into_iter().collect::<Vec<_>>()
617 },
618 }
619 }
620
621 pub fn modern_architectures() -> BenchmarkConfig {
623 BenchmarkConfig {
624 warmup_iterations: 5,
625 measurement_iterations: 15,
626 min_duration: Duration::from_millis(50),
627 max_duration: Duration::from_secs(30),
628 sample_sizes: vec![1_000, 10_000, 100_000, 1_000_000],
629 strategies: {
630 let mut set = std::collections::HashSet::new();
631 set.insert(OptimizationStrategy::ModernArchOptimized);
632 set.insert(OptimizationStrategy::VectorOptimized);
633 set.insert(OptimizationStrategy::HighThroughput);
634 set.insert(OptimizationStrategy::EnergyEfficient);
635 set.into_iter().collect::<Vec<_>>()
636 },
637 }
638 }
639}
640
641#[cfg(test)]
642mod tests {
643 use super::*;
644
645 #[test]
646 fn test_benchmark_config_default() {
647 let config = BenchmarkConfig::default();
648 assert!(config.warmup_iterations > 0);
649 assert!(config.measurement_iterations > 0);
650 assert!(!config.sample_sizes.is_empty());
651 assert!(!config.strategies.is_empty());
652 }
653
654 #[test]
655 fn test_bottleneck_type_enum() {
656 let bottleneck_types = [
657 BottleneckType::MemoryBandwidth,
658 BottleneckType::CacheLatency,
659 BottleneckType::ComputeBound,
660 BottleneckType::SynchronizationOverhead,
661 BottleneckType::AlgorithmicComplexity,
662 ];
663
664 for bt in &bottleneck_types {
665 assert!(!format!("{bt:?}").is_empty());
667 }
668
669 assert_eq!(
671 BottleneckType::MemoryBandwidth,
672 BottleneckType::MemoryBandwidth
673 );
674 assert_ne!(
675 BottleneckType::MemoryBandwidth,
676 BottleneckType::CacheLatency
677 );
678 }
679
680 #[test]
681 fn test_benchmark_presets() {
682 let configs = [
683 presets::array_operations(),
684 presets::matrix_operations(),
685 presets::memory_intensive(),
686 presets::advanced_comprehensive(),
687 presets::modern_architectures(),
688 ];
689
690 for config in &configs {
691 assert!(config.warmup_iterations > 0);
692 assert!(config.measurement_iterations > 0);
693 assert!(!config.sample_sizes.is_empty());
694 assert!(!config.strategies.is_empty());
695 }
696 }
697}