1#[cfg(feature = "no-std")]
15extern crate alloc;
16
17#[cfg(feature = "no-std")]
18use alloc::{
19 format,
20 string::{String, ToString},
21 vec::Vec,
22};
23
24use crate::SimdCapabilities;
25
26#[cfg(feature = "no-std")]
27use alloc::collections::BTreeMap as HashMap;
28#[cfg(not(feature = "no-std"))]
29use std::collections::HashMap;
30#[cfg(not(feature = "no-std"))]
31use std::string::ToString;
32#[cfg(not(feature = "no-std"))]
33pub use std::time::Duration;
34
35#[cfg(not(feature = "no-std"))]
36use std::time::Instant;
37
38#[cfg(feature = "no-std")]
40#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
41pub struct Duration(u64); #[cfg(feature = "no-std")]
44impl Duration {
45 pub fn from_nanos(nanos: u64) -> Self {
46 Duration(nanos)
47 }
48
49 pub fn from_millis(millis: u64) -> Self {
50 Duration(millis * 1_000_000)
51 }
52
53 pub fn from_secs(secs: u64) -> Self {
54 Duration(secs * 1_000_000_000)
55 }
56
57 pub fn as_nanos(&self) -> u128 {
58 self.0 as u128
59 }
60
61 pub fn as_millis(&self) -> u128 {
62 (self.0 / 1_000_000) as u128
63 }
64
65 pub fn as_secs(&self) -> u64 {
66 self.0 / 1_000_000_000
67 }
68
69 pub fn as_secs_f64(&self) -> f64 {
70 self.0 as f64 / 1_000_000_000.0
71 }
72}
73
74#[derive(Debug, Clone)]
79pub struct BenchmarkResult {
80 pub name: String,
81 pub duration: Duration,
82 pub throughput: Option<f64>, pub simd_width: usize,
84 pub architecture: String,
85 pub iterations: u64,
86}
87
88#[derive(Debug, Clone)]
90pub struct CrossPlatformResult {
91 pub operation: String,
92 pub results: HashMap<String, BenchmarkResult>,
93 pub best_performance: String,
94 pub speedup_ratios: HashMap<String, f64>,
95}
96
97#[derive(Debug)]
99pub struct RegressionDetector {
100 baseline_results: HashMap<String, BenchmarkResult>,
101 threshold: f64, }
103
104impl RegressionDetector {
105 pub fn new(threshold_percent: f64) -> Self {
107 Self {
108 baseline_results: HashMap::new(),
109 threshold: threshold_percent / 100.0,
110 }
111 }
112
113 pub fn set_baseline(&mut self, results: Vec<BenchmarkResult>) {
115 for result in results {
116 self.baseline_results.insert(result.name.clone(), result);
117 }
118 }
119
120 pub fn check_regression(&self, current_results: &[BenchmarkResult]) -> Vec<RegressionReport> {
122 let mut regressions = Vec::new();
123
124 for current in current_results {
125 if let Some(baseline) = self.baseline_results.get(¤t.name) {
126 let baseline_ns = baseline.duration.as_nanos() as f64;
127 let current_ns = current.duration.as_nanos() as f64;
128 let change_ratio = (current_ns - baseline_ns) / baseline_ns;
129
130 if change_ratio > self.threshold {
131 regressions.push(RegressionReport {
132 operation: current.name.clone(),
133 baseline_duration: baseline.duration,
134 current_duration: current.duration,
135 regression_percent: change_ratio * 100.0,
136 severity: if change_ratio > 0.2 {
137 Severity::Critical
138 } else if change_ratio > 0.1 {
139 Severity::High
140 } else {
141 Severity::Medium
142 },
143 });
144 }
145 }
146 }
147
148 regressions
149 }
150}
151
152#[derive(Debug)]
154pub struct RegressionReport {
155 pub operation: String,
156 pub baseline_duration: Duration,
157 pub current_duration: Duration,
158 pub regression_percent: f64,
159 pub severity: Severity,
160}
161
162#[derive(Debug, Clone, Copy)]
163pub enum Severity {
164 Medium,
165 High,
166 Critical,
167}
168
169pub struct BenchmarkSuite {
175 capabilities: SimdCapabilities,
176 results: Vec<BenchmarkResult>,
177}
178
179impl Default for BenchmarkSuite {
180 fn default() -> Self {
181 Self::new()
182 }
183}
184
185impl BenchmarkSuite {
186 pub fn new() -> Self {
188 Self {
189 capabilities: SimdCapabilities::detect(),
190 results: Vec::new(),
191 }
192 }
193
194 pub fn benchmark<F>(&mut self, name: &str, iterations: u64, mut operation: F) -> BenchmarkResult
199 where
200 F: FnMut(),
201 {
202 for _ in 0..10 {
204 operation();
205 }
206
207 #[cfg(not(feature = "no-std"))]
208 let (duration, throughput) = {
209 let start = Instant::now();
210 for _ in 0..iterations {
211 operation();
212 }
213 let duration = start.elapsed();
214 let throughput = Some(iterations as f64 / duration.as_secs_f64());
215 (duration, throughput)
216 };
217
218 #[cfg(feature = "no-std")]
219 let (duration, throughput) = {
220 for _ in 0..iterations {
222 operation();
223 }
224 (Duration::from_nanos(1), None)
226 };
227
228 let result = BenchmarkResult {
229 name: name.to_string(),
230 duration,
231 throughput,
232 simd_width: self.capabilities.best_f32_width(),
233 architecture: self.get_architecture_name(),
234 iterations,
235 };
236
237 self.results.push(result.clone());
238 result
239 }
240
241 pub fn cross_platform_benchmark<F>(
243 &mut self,
244 operation_name: &str,
245 data_size: usize,
246 operation: F,
247 ) -> CrossPlatformResult
248 where
249 F: Fn(&[f32]) -> f32 + Copy,
250 {
251 let test_data: Vec<f32> = (0..data_size).map(|i| i as f32).collect();
252 let mut results = HashMap::new();
253
254 let scalar_result = self.benchmark(&format!("{}_scalar", operation_name), 1000, || {
256 let _ = operation(&test_data);
257 });
258 results.insert("scalar".to_string(), scalar_result);
259
260 if self.capabilities.sse2 {
262 let sse2_result = self.benchmark(&format!("{}_sse2", operation_name), 1000, || {
263 let _ = operation(&test_data);
264 });
265 results.insert("sse2".to_string(), sse2_result);
266 }
267
268 if self.capabilities.avx2 {
269 let avx2_result = self.benchmark(&format!("{}_avx2", operation_name), 1000, || {
270 let _ = operation(&test_data);
271 });
272 results.insert("avx2".to_string(), avx2_result);
273 }
274
275 if self.capabilities.avx512 {
276 let avx512_result = self.benchmark(&format!("{}_avx512", operation_name), 1000, || {
277 let _ = operation(&test_data);
278 });
279 results.insert("avx512".to_string(), avx512_result);
280 }
281
282 if self.capabilities.neon {
283 let neon_result = self.benchmark(&format!("{}_neon", operation_name), 1000, || {
284 let _ = operation(&test_data);
285 });
286 results.insert("neon".to_string(), neon_result);
287 }
288
289 let best_duration = results
291 .values()
292 .map(|r| r.duration)
293 .min()
294 .unwrap_or(Duration::from_secs(1));
295
296 let best_performance = results
297 .iter()
298 .min_by_key(|(_, result)| result.duration)
299 .map(|(name, _)| name.clone())
300 .unwrap_or_else(|| "unknown".to_string());
301
302 let mut speedup_ratios = HashMap::new();
303 let baseline_duration = results
304 .get("scalar")
305 .map(|r| r.duration)
306 .unwrap_or(best_duration);
307
308 for (name, result) in &results {
309 let speedup = baseline_duration.as_nanos() as f64 / result.duration.as_nanos() as f64;
310 speedup_ratios.insert(name.clone(), speedup);
311 }
312
313 CrossPlatformResult {
314 operation: operation_name.to_string(),
315 results,
316 best_performance,
317 speedup_ratios,
318 }
319 }
320
321 pub fn get_results(&self) -> &[BenchmarkResult] {
323 &self.results
324 }
325
326 pub fn generate_report(&self) -> BenchmarkReport {
328 let total_benchmarks = self.results.len();
329 let avg_duration = if total_benchmarks > 0 {
330 let total_nanos: u128 = self.results.iter().map(|r| r.duration.as_nanos()).sum();
331 Duration::from_nanos((total_nanos / total_benchmarks as u128) as u64)
332 } else {
333 Duration::from_secs(0)
334 };
335
336 let fastest = self.results.iter().min_by_key(|r| r.duration).cloned();
337 let slowest = self.results.iter().max_by_key(|r| r.duration).cloned();
338
339 BenchmarkReport {
340 total_benchmarks,
341 avg_duration,
342 fastest,
343 slowest,
344 architecture: self.get_architecture_name(),
345 simd_width: self.capabilities.best_f32_width(),
346 capabilities: self.capabilities,
347 }
348 }
349
350 fn get_architecture_name(&self) -> String {
351 if self.capabilities.avx512 {
352 "AVX-512".to_string()
353 } else if self.capabilities.avx2 {
354 "AVX2".to_string()
355 } else if self.capabilities.avx {
356 "AVX".to_string()
357 } else if self.capabilities.sse42 {
358 "SSE4.2".to_string()
359 } else if self.capabilities.sse2 {
360 "SSE2".to_string()
361 } else if self.capabilities.neon {
362 "NEON".to_string()
363 } else {
364 "Scalar".to_string()
365 }
366 }
367}
368
369#[derive(Debug)]
371pub struct BenchmarkReport {
372 pub total_benchmarks: usize,
373 pub avg_duration: Duration,
374 pub fastest: Option<BenchmarkResult>,
375 pub slowest: Option<BenchmarkResult>,
376 pub architecture: String,
377 pub simd_width: usize,
378 pub capabilities: SimdCapabilities,
379}
380
381impl BenchmarkReport {
382 pub fn format_report(&self) -> String {
384 let mut report = String::new();
385
386 report.push_str("=== SIMD Performance Benchmark Report ===\n");
387 report.push_str(&format!("Architecture: {}\n", self.architecture));
388 report.push_str(&format!("SIMD Width (f32): {}\n", self.simd_width));
389 report.push_str(&format!("Total Benchmarks: {}\n", self.total_benchmarks));
390 report.push_str(&format!("Average Duration: {:?}\n", self.avg_duration));
391
392 report.push_str("\nCapabilities:\n");
393 report.push_str(&format!(" SSE2: {}\n", self.capabilities.sse2));
394 report.push_str(&format!(" AVX2: {}\n", self.capabilities.avx2));
395 report.push_str(&format!(" AVX-512: {}\n", self.capabilities.avx512));
396 report.push_str(&format!(" NEON: {}\n", self.capabilities.neon));
397
398 if let Some(fastest) = &self.fastest {
399 report.push_str(&format!(
400 "\nFastest Operation: {} ({:?})\n",
401 fastest.name, fastest.duration
402 ));
403 }
404
405 if let Some(slowest) = &self.slowest {
406 report.push_str(&format!(
407 "Slowest Operation: {} ({:?})\n",
408 slowest.name, slowest.duration
409 ));
410 }
411
412 report.push_str("\n=== End Report ===\n");
413 report
414 }
415}
416
417pub struct OptimizationAdvisor {
419 results: Vec<CrossPlatformResult>,
420}
421
422impl Default for OptimizationAdvisor {
423 fn default() -> Self {
424 Self::new()
425 }
426}
427
428impl OptimizationAdvisor {
429 pub fn new() -> Self {
431 Self {
432 results: Vec::new(),
433 }
434 }
435
436 pub fn add_results(&mut self, result: CrossPlatformResult) {
438 self.results.push(result);
439 }
440
441 pub fn generate_recommendations(&self) -> Vec<OptimizationRecommendation> {
443 let mut recommendations = Vec::new();
444
445 for result in &self.results {
446 if let Some(scalar_speedup) = result.speedup_ratios.get("scalar") {
448 if *scalar_speedup < 1.5 {
449 recommendations.push(OptimizationRecommendation {
450 operation: result.operation.clone(),
451 recommendation_type: RecommendationType::AlgorithmOptimization,
452 description: format!(
453 "SIMD implementation for {} shows minimal speedup ({}x). Consider algorithm optimization or data layout changes.",
454 result.operation, scalar_speedup
455 ),
456 priority: Priority::Medium,
457 });
458 }
459 }
460
461 let best_speedup = result.speedup_ratios.values().cloned().fold(0.0, f64::max);
463 if best_speedup < 2.0 {
464 recommendations.push(OptimizationRecommendation {
465 operation: result.operation.clone(),
466 recommendation_type: RecommendationType::MemoryOptimization,
467 description: format!(
468 "Operation {} may be memory-bound. Consider cache optimization, prefetching, or data layout improvements.",
469 result.operation
470 ),
471 priority: Priority::High,
472 });
473 }
474
475 if result.best_performance == "sse2" && result.speedup_ratios.contains_key("avx2") {
477 recommendations.push(OptimizationRecommendation {
478 operation: result.operation.clone(),
479 recommendation_type: RecommendationType::SimdWidthOptimization,
480 description: format!(
481 "Operation {} performs better with SSE2 than AVX2. Consider optimizing for wider SIMD or checking for overhead.",
482 result.operation
483 ),
484 priority: Priority::Medium,
485 });
486 }
487 }
488
489 recommendations
490 }
491}
492
493#[derive(Debug)]
495pub struct OptimizationRecommendation {
496 pub operation: String,
497 pub recommendation_type: RecommendationType,
498 pub description: String,
499 pub priority: Priority,
500}
501
502#[derive(Debug)]
503pub enum RecommendationType {
504 AlgorithmOptimization,
505 MemoryOptimization,
506 SimdWidthOptimization,
507 CompilerOptimization,
508}
509
510#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
511pub enum Priority {
512 Low,
513 Medium,
514 High,
515 Critical,
516}
517
518#[cfg(all(test, not(feature = "no-std")))]
519mod tests {
520 use super::*;
521
522 #[test]
523 fn test_benchmark_suite_creation() {
524 let suite = BenchmarkSuite::new();
525 assert_eq!(suite.results.len(), 0);
526 }
527
528 #[test]
529 fn test_simple_benchmark() {
530 let mut suite = BenchmarkSuite::new();
531 let result = suite.benchmark("test_op", 100, || {
532 let _sum: f32 = (0..1000).map(|i| i as f32).sum();
534 });
535
536 assert_eq!(result.name, "test_op");
537 assert_eq!(result.iterations, 100);
538 assert!(result.duration > Duration::from_nanos(0));
539 }
540
541 #[test]
542 fn test_regression_detector() {
543 let mut detector = RegressionDetector::new(10.0); let baseline = vec![BenchmarkResult {
546 name: "test_op".to_string(),
547 duration: Duration::from_millis(100),
548 throughput: None,
549 simd_width: 4,
550 architecture: "test".to_string(),
551 iterations: 1000,
552 }];
553
554 detector.set_baseline(baseline);
555
556 let current = vec![BenchmarkResult {
558 name: "test_op".to_string(),
559 duration: Duration::from_millis(105), throughput: None,
561 simd_width: 4,
562 architecture: "test".to_string(),
563 iterations: 1000,
564 }];
565
566 let regressions = detector.check_regression(¤t);
567 assert_eq!(regressions.len(), 0);
568
569 let current_regressed = vec![BenchmarkResult {
571 name: "test_op".to_string(),
572 duration: Duration::from_millis(120), throughput: None,
574 simd_width: 4,
575 architecture: "test".to_string(),
576 iterations: 1000,
577 }];
578
579 let regressions = detector.check_regression(¤t_regressed);
580 assert_eq!(regressions.len(), 1);
581 assert_eq!(regressions[0].operation, "test_op");
582 assert!(regressions[0].regression_percent > 10.0);
583 }
584
585 #[test]
586 fn test_optimization_advisor() {
587 let mut advisor = OptimizationAdvisor::new();
588
589 let mut speedup_ratios = HashMap::new();
590 speedup_ratios.insert("scalar".to_string(), 1.2); let result = CrossPlatformResult {
593 operation: "slow_op".to_string(),
594 results: HashMap::new(),
595 best_performance: "sse2".to_string(),
596 speedup_ratios,
597 };
598
599 advisor.add_results(result);
600 let recommendations = advisor.generate_recommendations();
601
602 assert!(!recommendations.is_empty());
603 assert!(recommendations.iter().any(|r| r.operation == "slow_op"));
604 }
605
606 #[test]
607 fn test_benchmark_report_formatting() {
608 let report = BenchmarkReport {
609 total_benchmarks: 5,
610 avg_duration: Duration::from_millis(10),
611 fastest: None,
612 slowest: None,
613 architecture: "AVX2".to_string(),
614 simd_width: 8,
615 capabilities: SimdCapabilities::detect(),
616 };
617
618 let formatted = report.format_report();
619 assert!(formatted.contains("Architecture: AVX2"));
620 assert!(formatted.contains("SIMD Width (f32): 8"));
621 assert!(formatted.contains("Total Benchmarks: 5"));
622 }
623}