1use crate::gpu::{GpuBackend, GpuContext, GpuError};
7use std::collections::HashMap;
8use std::time::{Duration, Instant};
9use thiserror::Error;
10
11#[derive(Error, Debug)]
13pub enum BenchmarkError {
14 #[error("Benchmark setup failed: {0}")]
16 SetupFailed(String),
17
18 #[error("Benchmark execution failed: {0}")]
20 ExecutionFailed(String),
21
22 #[error("Invalid benchmark configuration: {0}")]
24 InvalidConfiguration(String),
25
26 #[error("Results comparison failed: {0}")]
28 ComparisonFailed(String),
29
30 #[error("GPU error: {0}")]
32 GpuError(#[from] GpuError),
33}
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
37pub enum BenchmarkOperation {
38 MatrixMultiply,
40 VectorOperations,
42 FastFourierTransform,
44 Convolution,
46 Reduction,
48 Sorting,
50 RandomGeneration,
52 ImageProcessing,
54 SignalProcessing,
56 Statistics,
58 LinearAlgebra,
60 SparseMatrix,
62}
63
64impl BenchmarkOperation {
65 pub const fn name(&self) -> &'static str {
67 match self {
68 BenchmarkOperation::MatrixMultiply => "Matrix Multiplication",
69 BenchmarkOperation::VectorOperations => "Vector Operations",
70 BenchmarkOperation::FastFourierTransform => "Fast Fourier Transform",
71 BenchmarkOperation::Convolution => "Convolution",
72 BenchmarkOperation::Reduction => "Reduction",
73 BenchmarkOperation::Sorting => "Sorting",
74 BenchmarkOperation::RandomGeneration => "Random Generation",
75 BenchmarkOperation::ImageProcessing => "Image Processing",
76 BenchmarkOperation::SignalProcessing => "Signal Processing",
77 BenchmarkOperation::Statistics => "Statistics",
78 BenchmarkOperation::LinearAlgebra => "Linear Algebra",
79 BenchmarkOperation::SparseMatrix => "Sparse Matrix",
80 }
81 }
82
83 pub fn category(&self) -> BenchmarkCategory {
85 match self {
86 BenchmarkOperation::MatrixMultiply
87 | BenchmarkOperation::LinearAlgebra
88 | BenchmarkOperation::SparseMatrix => BenchmarkCategory::LinearAlgebra,
89
90 BenchmarkOperation::VectorOperations | BenchmarkOperation::Reduction => {
91 BenchmarkCategory::ElementWise
92 }
93
94 BenchmarkOperation::FastFourierTransform
95 | BenchmarkOperation::Convolution
96 | BenchmarkOperation::SignalProcessing => BenchmarkCategory::SignalProcessing,
97
98 BenchmarkOperation::ImageProcessing => BenchmarkCategory::ImageProcessing,
99
100 BenchmarkOperation::Sorting
101 | BenchmarkOperation::RandomGeneration
102 | BenchmarkOperation::Statistics => BenchmarkCategory::GeneralCompute,
103 }
104 }
105}
106
107#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
109pub enum BenchmarkCategory {
110 LinearAlgebra,
112 ElementWise,
114 SignalProcessing,
116 ImageProcessing,
118 GeneralCompute,
120}
121
122#[derive(Debug, Clone)]
124pub struct BenchmarkConfig {
125 pub operations: Vec<BenchmarkOperation>,
127 pub problemsizes: Vec<ProblemSize>,
129 pub warmup_iterations: usize,
131 pub benchmark_iterations: usize,
133 pub datatypes: Vec<DataType>,
135 pub gpu_backends: Vec<GpuBackend>,
137 pub verify_correctness: bool,
139 pub tolerance: f64,
141}
142
143impl Default for BenchmarkConfig {
144 fn default() -> Self {
145 Self {
146 operations: vec![
147 BenchmarkOperation::MatrixMultiply,
148 BenchmarkOperation::VectorOperations,
149 BenchmarkOperation::Reduction,
150 ],
151 problemsizes: vec![ProblemSize::Small, ProblemSize::Medium, ProblemSize::Large],
152 warmup_iterations: 3,
153 benchmark_iterations: 10,
154 datatypes: vec![DataType::Float32, DataType::Float64],
155 gpu_backends: vec![GpuBackend::Cuda, GpuBackend::Rocm],
156 verify_correctness: true,
157 tolerance: 1e-6,
158 }
159 }
160}
161
162#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
164pub enum ProblemSize {
165 Small,
167 Medium,
169 Large,
171 ExtraLarge,
173 Custom(usize),
175}
176
177impl ProblemSize {
178 pub fn matrix_size(&self) -> usize {
180 match self {
181 ProblemSize::Small => 64,
182 ProblemSize::Medium => 512,
183 ProblemSize::Large => 2048,
184 ProblemSize::ExtraLarge => 8192,
185 ProblemSize::Custom(size) => *size,
186 }
187 }
188
189 pub fn vector_size(&self) -> usize {
191 match self {
192 ProblemSize::Small => 1024,
193 ProblemSize::Medium => 1024 * 1024,
194 ProblemSize::Large => 64 * 1024 * 1024,
195 ProblemSize::ExtraLarge => 512 * 1024 * 1024,
196 ProblemSize::Custom(size) => *size,
197 }
198 }
199}
200
201#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
203pub enum DataType {
204 Float32,
206 Float64,
208 Float16,
210 Int32,
212 UInt32,
214}
215
216impl DataType {
217 pub fn size_bytes(&self) -> usize {
219 match self {
220 DataType::Float32 | DataType::Int32 | DataType::UInt32 => 4,
221 DataType::Float64 => 8,
222 DataType::Float16 => 2,
223 }
224 }
225
226 pub const fn name(&self) -> &'static str {
228 match self {
229 DataType::Float32 => "f32",
230 DataType::Float64 => "f64",
231 DataType::Float16 => "f16",
232 DataType::Int32 => "i32",
233 DataType::UInt32 => "u32",
234 }
235 }
236}
237
238#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
240pub enum ComputePlatform {
241 Cpu,
243 Gpu(GpuBackend),
245}
246
247impl ComputePlatform {
248 pub fn name(&self) -> String {
250 match self {
251 ComputePlatform::Cpu => "CPU".to_string(),
252 ComputePlatform::Gpu(backend) => format!("GPU ({backend})"),
253 }
254 }
255}
256
257#[derive(Debug, Clone)]
259pub struct BenchmarkResult {
260 pub operation: BenchmarkOperation,
262 pub platform: ComputePlatform,
264 pub problemsize: ProblemSize,
266 pub datatype: DataType,
268 pub execution_time: Duration,
270 pub time_stddev: Duration,
272 pub throughput: f64,
274 pub memorybandwidth: f64,
276 pub energy_efficiency: Option<f64>,
278 pub peak_memory_usage: usize,
280 pub correctness_verified: bool,
282}
283
284#[derive(Debug, Clone)]
286pub struct BenchmarkComparison {
287 pub operation: BenchmarkOperation,
289 pub problemsize: ProblemSize,
291 pub datatype: DataType,
293 pub platform_results: HashMap<ComputePlatform, BenchmarkResult>,
295 pub speedups: HashMap<GpuBackend, f64>,
297 pub energy_comparison: HashMap<ComputePlatform, f64>,
299 pub recommendation: PlatformRecommendation,
301}
302
303#[derive(Debug, Clone)]
305pub enum PlatformRecommendation {
306 Cpu { reason: String },
308 Gpu { backend: GpuBackend, reason: String },
310 Depends { factors: Vec<String> },
312}
313
314pub struct BenchmarkSuite {
316 config: BenchmarkConfig,
317 results: Vec<BenchmarkResult>,
318 comparisons: Vec<BenchmarkComparison>,
319}
320
321impl BenchmarkSuite {
322 pub fn new(config: BenchmarkConfig) -> Self {
324 Self {
325 config,
326 results: Vec::new(),
327 comparisons: Vec::new(),
328 }
329 }
330
331 pub fn run_all(&mut self) -> Result<(), BenchmarkError> {
333 let operations = self.config.operations.clone();
334 let problemsizes = self.config.problemsizes.clone();
335 let datatypes = self.config.datatypes.clone();
336
337 for operation in operations {
338 for problemsize in problemsizes.iter() {
339 for datatype in datatypes.iter() {
340 self.run_operation_benchmark(operation, *problemsize, *datatype)?;
341 }
342 }
343 }
344
345 self.generate_comparisons()?;
346 Ok(())
347 }
348
349 fn run_operation_benchmark(
351 &mut self,
352 operation: BenchmarkOperation,
353 problemsize: ProblemSize,
354 datatype: DataType,
355 ) -> Result<(), BenchmarkError> {
356 let cpu_result = self.run_cpu_benchmark(operation, problemsize, datatype)?;
358 self.results.push(cpu_result);
359
360 for &backend in &self.config.gpu_backends {
362 if backend.is_available() {
363 match self.run_gpu_benchmark(operation, problemsize, datatype, backend) {
364 Ok(gpu_result) => self.results.push(gpu_result),
365 Err(e) => {
366 eprintln!("GPU benchmark failed for {backend}: {e}");
367 }
368 }
369 }
370 }
371
372 Ok(())
373 }
374
375 fn run_cpu_benchmark(
377 &self,
378 operation: BenchmarkOperation,
379 problemsize: ProblemSize,
380 datatype: DataType,
381 ) -> Result<BenchmarkResult, BenchmarkError> {
382 for _ in 0..self.config.warmup_iterations {
384 self.execute_cpu_operation(operation, problemsize, datatype)?;
385 }
386
387 let mut execution_times = Vec::new();
389 for _ in 0..self.config.benchmark_iterations {
390 let start = Instant::now();
391 self.execute_cpu_operation(operation, problemsize, datatype)?;
392 execution_times.push(start.elapsed());
393 }
394
395 let avg_time = execution_times.iter().sum::<Duration>() / execution_times.len() as u32;
396 let time_stddev = self.calculate_stddev(&execution_times, avg_time);
397
398 Ok(BenchmarkResult {
399 operation,
400 platform: ComputePlatform::Cpu,
401 problemsize,
402 datatype,
403 execution_time: avg_time,
404 time_stddev,
405 throughput: self.calculate_throughput(operation, problemsize, avg_time),
406 memorybandwidth: self.calculate_memorybandwidth(
407 operation,
408 problemsize,
409 datatype,
410 avg_time,
411 ),
412 energy_efficiency: None, peak_memory_usage: self.estimate_memory_usage(operation, problemsize, datatype),
414 correctness_verified: true, })
416 }
417
418 fn run_gpu_benchmark(
420 &self,
421 operation: BenchmarkOperation,
422 problemsize: ProblemSize,
423 datatype: DataType,
424 backend: GpuBackend,
425 ) -> Result<BenchmarkResult, BenchmarkError> {
426 let context =
428 GpuContext::new(backend).map_err(|e| BenchmarkError::SetupFailed(e.to_string()))?;
429
430 for _ in 0..self.config.warmup_iterations {
432 self.execute_gpu_operation(operation, problemsize, datatype, backend)?;
433 }
434
435 let mut execution_times = Vec::new();
437 for _ in 0..self.config.benchmark_iterations {
438 let start = Instant::now();
439 self.execute_gpu_operation(operation, problemsize, datatype, backend)?;
440 execution_times.push(start.elapsed());
441 }
442
443 let avg_time = execution_times.iter().sum::<Duration>() / execution_times.len() as u32;
444 let time_stddev = self.calculate_stddev(&execution_times, avg_time);
445
446 Ok(BenchmarkResult {
447 operation,
448 platform: ComputePlatform::Gpu(backend),
449 problemsize,
450 datatype,
451 execution_time: avg_time,
452 time_stddev,
453 throughput: self.calculate_throughput(operation, problemsize, avg_time),
454 memorybandwidth: self.calculate_memorybandwidth(
455 operation,
456 problemsize,
457 datatype,
458 avg_time,
459 ),
460 energy_efficiency: None,
461 peak_memory_usage: self.estimate_memory_usage(operation, problemsize, datatype),
462 correctness_verified: self.config.verify_correctness,
463 })
464 }
465
466 fn execute_cpu_operation(
468 &self,
469 operation: BenchmarkOperation,
470 problemsize: ProblemSize,
471 datatype: DataType,
472 ) -> Result<(), BenchmarkError> {
473 match operation {
474 BenchmarkOperation::MatrixMultiply => {
475 let n = problemsize.matrix_size();
476 let _result = (0..n * n).map(|i| i as f64).sum::<f64>();
478 Ok(())
479 }
480 BenchmarkOperation::VectorOperations => {
481 let n = problemsize.vector_size();
482 let _result = (0..n).map(|i| (i as f64).sin()).sum::<f64>();
484 Ok(())
485 }
486 _ => {
487 std::thread::sleep(Duration::from_millis(1));
489 Ok(())
490 }
491 }
492 }
493
494 fn execute_gpu_operation(
496 &self,
497 operation: BenchmarkOperation,
498 problemsize: ProblemSize,
499 datatype: DataType,
500 _backend: GpuBackend,
501 ) -> Result<(), BenchmarkError> {
502 match operation {
503 BenchmarkOperation::MatrixMultiply => {
504 let _n = problemsize.matrix_size();
505 std::thread::sleep(Duration::from_micros(100));
507 Ok(())
508 }
509 BenchmarkOperation::VectorOperations => {
510 let _n = problemsize.vector_size();
511 std::thread::sleep(Duration::from_micros(50));
513 Ok(())
514 }
515 _ => {
516 std::thread::sleep(Duration::from_micros(100));
518 Ok(())
519 }
520 }
521 }
522
523 fn generate_comparisons(&mut self) -> Result<(), BenchmarkError> {
525 let mut grouped_results: HashMap<
526 (BenchmarkOperation, ProblemSize, DataType),
527 Vec<&BenchmarkResult>,
528 > = HashMap::new();
529
530 for result in &self.results {
532 let key = (result.operation, result.problemsize, result.datatype);
533 grouped_results.entry(key).or_default().push(result);
534 }
535
536 for ((operation, problemsize, datatype), results) in grouped_results {
538 if results.len() > 1 {
539 let comparison =
540 self.create_comparison(operation, problemsize, datatype, &results)?;
541 self.comparisons.push(comparison);
542 }
543 }
544
545 Ok(())
546 }
547
548 fn create_comparison(
550 &self,
551 operation: BenchmarkOperation,
552 problemsize: ProblemSize,
553 datatype: DataType,
554 results: &[&BenchmarkResult],
555 ) -> Result<BenchmarkComparison, BenchmarkError> {
556 let mut platform_results = HashMap::new();
557 let mut cpu_time = None;
558
559 for result in results {
560 platform_results.insert(result.platform, (*result).clone());
561 if matches!(result.platform, ComputePlatform::Cpu) {
562 cpu_time = Some(result.execution_time);
563 }
564 }
565
566 let mut speedups = HashMap::new();
567 let mut energy_comparison = HashMap::new();
568
569 if let Some(cpu_time) = cpu_time {
570 for result in results {
571 if let ComputePlatform::Gpu(backend) = result.platform {
572 let speedup = cpu_time.as_secs_f64() / result.execution_time.as_secs_f64();
573 speedups.insert(backend, speedup);
574 }
575
576 energy_comparison.insert(result.platform, 1.0);
578 }
579 }
580
581 let recommendation = self.generate_recommendation(operation, &platform_results, &speedups);
582
583 Ok(BenchmarkComparison {
584 operation,
585 problemsize,
586 datatype,
587 platform_results,
588 speedups,
589 energy_comparison,
590 recommendation,
591 })
592 }
593
594 fn generate_recommendation(
596 &self,
597 operation: BenchmarkOperation,
598 platform_results: &HashMap<ComputePlatform, BenchmarkResult>,
599 speedups: &HashMap<GpuBackend, f64>,
600 ) -> PlatformRecommendation {
601 let best_speedup = speedups.values().fold(0.0f64, |a, &b| a.max(b));
603 let best_backend = speedups
604 .iter()
605 .max_by(|a, b| a.1.partial_cmp(b.1).expect("Operation failed"))
606 .map(|(&backend, _)| backend);
607
608 if best_speedup > 2.0 {
609 if let Some(backend) = best_backend {
610 PlatformRecommendation::Gpu {
611 backend,
612 reason: format!("GPU shows {best_speedup:.1}x speedup over CPU"),
613 }
614 } else {
615 PlatformRecommendation::Cpu {
616 reason: "No significant GPU advantage found".to_string(),
617 }
618 }
619 } else if best_speedup > 1.2 {
620 PlatformRecommendation::Depends {
621 factors: vec![
622 format!("GPU shows modest {:.1}x speedup", best_speedup),
623 "Consider data transfer overhead".to_string(),
624 format!(
625 "{} may benefit from GPU for larger problems",
626 operation.name()
627 ),
628 ],
629 }
630 } else {
631 PlatformRecommendation::Cpu {
632 reason: "CPU performance is competitive or better".to_string(),
633 }
634 }
635 }
636
637 fn calculate_stddev(&self, times: &[Duration], avg: Duration) -> Duration {
639 if times.len() <= 1 {
640 return Duration::ZERO;
641 }
642
643 let variance = times
644 .iter()
645 .map(|&time| {
646 let diff = time.as_secs_f64() - avg.as_secs_f64();
647 diff * diff
648 })
649 .sum::<f64>()
650 / (times.len() - 1) as f64;
651
652 Duration::from_secs_f64(variance.sqrt())
653 }
654
655 fn calculate_throughput(
657 &self,
658 operation: BenchmarkOperation,
659 problemsize: ProblemSize,
660 time: Duration,
661 ) -> f64 {
662 let ops = match operation {
663 BenchmarkOperation::MatrixMultiply => {
664 let n = problemsize.matrix_size();
665 2 * n * n * n }
667 BenchmarkOperation::VectorOperations => {
668 problemsize.vector_size() }
670 _ => problemsize.vector_size(), };
672
673 ops as f64 / time.as_secs_f64()
674 }
675
676 fn calculate_memorybandwidth(
678 &self,
679 operation: BenchmarkOperation,
680 problemsize: ProblemSize,
681 datatype: DataType,
682 time: Duration,
683 ) -> f64 {
684 let bytes = match operation {
685 BenchmarkOperation::MatrixMultiply => {
686 let n = problemsize.matrix_size();
687 (3 * n * n) * datatype.size_bytes() }
689 BenchmarkOperation::VectorOperations => {
690 problemsize.vector_size() * datatype.size_bytes() * 2 }
692 _ => problemsize.vector_size() * datatype.size_bytes() * 2,
693 };
694
695 (bytes as f64) / (time.as_secs_f64() * 1e9) }
697
698 fn estimate_memory_usage(
700 &self,
701 operation: BenchmarkOperation,
702 problemsize: ProblemSize,
703 datatype: DataType,
704 ) -> usize {
705 match operation {
706 BenchmarkOperation::MatrixMultiply => {
707 let n = problemsize.matrix_size();
708 3 * n * n * datatype.size_bytes() }
710 BenchmarkOperation::VectorOperations => {
711 problemsize.vector_size() * datatype.size_bytes() * 2 }
713 _ => problemsize.vector_size() * datatype.size_bytes() * 2,
714 }
715 }
716
717 pub fn results(&self) -> &[BenchmarkResult] {
719 &self.results
720 }
721
722 pub fn comparisons(&self) -> &[BenchmarkComparison] {
724 &self.comparisons
725 }
726
727 pub fn generate_report(&self) -> BenchmarkReport {
729 BenchmarkReport::new(&self.results, &self.comparisons)
730 }
731}
732
733#[derive(Debug, Clone)]
735pub struct BenchmarkReport {
736 pub summary: BenchmarkSummary,
738 pub detailed_results: Vec<BenchmarkResult>,
740 pub comparisons: Vec<BenchmarkComparison>,
742 pub category_recommendations: HashMap<BenchmarkCategory, String>,
744}
745
746impl BenchmarkReport {
747 fn new(results: &[BenchmarkResult], comparisons: &[BenchmarkComparison]) -> Self {
748 let summary = BenchmarkSummary::from_results(results);
749 let category_recommendations = Self::generate_category_recommendations(comparisons);
750
751 Self {
752 summary,
753 detailed_results: results.to_vec(),
754 comparisons: comparisons.to_vec(),
755 category_recommendations,
756 }
757 }
758
759 fn generate_category_recommendations(
760 comparisons: &[BenchmarkComparison],
761 ) -> HashMap<BenchmarkCategory, String> {
762 let mut recommendations = HashMap::new();
763
764 for category in [
766 BenchmarkCategory::LinearAlgebra,
767 BenchmarkCategory::ElementWise,
768 BenchmarkCategory::SignalProcessing,
769 BenchmarkCategory::ImageProcessing,
770 BenchmarkCategory::GeneralCompute,
771 ] {
772 let category_comps: Vec<_> = comparisons
773 .iter()
774 .filter(|c| c.operation.category() == category)
775 .collect();
776
777 if !category_comps.is_empty() {
778 let gpu_wins = category_comps
779 .iter()
780 .filter(|c| matches!(c.recommendation, PlatformRecommendation::Gpu { .. }))
781 .count();
782
783 let recommendation = if gpu_wins > category_comps.len() / 2 {
784 format!(
785 "GPU recommended for most {name} operations",
786 name = category.name()
787 )
788 } else {
789 format!(
790 "CPU competitive for {name} operations",
791 name = category.name()
792 )
793 };
794
795 recommendations.insert(category, recommendation);
796 }
797 }
798
799 recommendations
800 }
801}
802
803impl BenchmarkCategory {
804 fn name(&self) -> &'static str {
805 match self {
806 BenchmarkCategory::LinearAlgebra => "linear algebra",
807 BenchmarkCategory::ElementWise => "element-wise",
808 BenchmarkCategory::SignalProcessing => "signal processing",
809 BenchmarkCategory::ImageProcessing => "image processing",
810 BenchmarkCategory::GeneralCompute => "general compute",
811 }
812 }
813}
814
815#[derive(Debug, Clone)]
817pub struct BenchmarkSummary {
818 pub total_benchmarks: usize,
820 pub avg_cpu_time: Duration,
822 pub avg_gpu_time: Duration,
824 pub overall_speedup: f64,
826 pub best_platforms: HashMap<BenchmarkOperation, ComputePlatform>,
828}
829
830impl BenchmarkSummary {
831 fn from_results(results: &[BenchmarkResult]) -> Self {
832 let total_benchmarks = results.len();
833
834 let cpu_times: Vec<_> = results
835 .iter()
836 .filter(|r| matches!(r.platform, ComputePlatform::Cpu))
837 .map(|r| r.execution_time)
838 .collect();
839
840 let gpu_times: Vec<_> = results
841 .iter()
842 .filter(|r| matches!(r.platform, ComputePlatform::Gpu(_)))
843 .map(|r| r.execution_time)
844 .collect();
845
846 let avg_cpu_time = if !cpu_times.is_empty() {
847 cpu_times.iter().sum::<Duration>() / cpu_times.len() as u32
848 } else {
849 Duration::ZERO
850 };
851
852 let avg_gpu_time = if !gpu_times.is_empty() {
853 gpu_times.iter().sum::<Duration>() / gpu_times.len() as u32
854 } else {
855 Duration::ZERO
856 };
857
858 let overall_speedup = if avg_gpu_time > Duration::ZERO {
859 avg_cpu_time.as_secs_f64() / avg_gpu_time.as_secs_f64()
860 } else {
861 1.0
862 };
863
864 let mut best_platforms = HashMap::new();
866 let mut operation_results: HashMap<BenchmarkOperation, Vec<&BenchmarkResult>> =
867 HashMap::new();
868
869 for result in results {
870 operation_results
871 .entry(result.operation)
872 .or_default()
873 .push(result);
874 }
875
876 for (operation, op_results) in operation_results {
877 if let Some(best) = op_results.iter().min_by_key(|r| r.execution_time) {
878 best_platforms.insert(operation, best.platform);
879 }
880 }
881
882 Self {
883 total_benchmarks,
884 avg_cpu_time,
885 avg_gpu_time,
886 overall_speedup,
887 best_platforms,
888 }
889 }
890}
891
892#[cfg(test)]
893mod tests {
894 use super::*;
895
896 #[test]
897 fn test_benchmark_operation_name() {
898 assert_eq!(
899 BenchmarkOperation::MatrixMultiply.name(),
900 "Matrix Multiplication"
901 );
902 assert_eq!(
903 BenchmarkOperation::VectorOperations.name(),
904 "Vector Operations"
905 );
906 }
907
908 #[test]
909 fn test_problemsizematrix() {
910 assert_eq!(ProblemSize::Small.matrix_size(), 64);
911 assert_eq!(ProblemSize::Large.matrix_size(), 2048);
912 assert_eq!(ProblemSize::Custom(1000).matrix_size(), 1000);
913 }
914
915 #[test]
916 fn test_datatype_size() {
917 assert_eq!(DataType::Float32.size_bytes(), 4);
918 assert_eq!(DataType::Float64.size_bytes(), 8);
919 assert_eq!(DataType::Float16.size_bytes(), 2);
920 }
921
922 #[test]
923 fn test_compute_platformname() {
924 assert_eq!(ComputePlatform::Cpu.name(), "CPU");
925 assert_eq!(ComputePlatform::Gpu(GpuBackend::Cuda).name(), "GPU (CUDA)");
926 }
927
928 #[test]
929 fn test_benchmark_config_default() {
930 let config = BenchmarkConfig::default();
931 assert!(!config.operations.is_empty());
932 assert!(!config.problemsizes.is_empty());
933 assert!(config.verify_correctness);
934 }
935
936 #[test]
937 fn test_benchmark_suite_creation() {
938 let config = BenchmarkConfig::default();
939 let suite = BenchmarkSuite::new(config);
940 assert!(suite.results().is_empty());
941 assert!(suite.comparisons().is_empty());
942 }
943}