torsh_benches/
lib.rs

1//! Benchmarking suite for ToRSh
2//!
3//! This crate provides comprehensive benchmarks for ToRSh to measure
4//! performance against other tensor libraries and track regressions.
5//!
6//! # Architecture
7//!
8//! The benchmarking suite is organized into specialized modules:
9//!
10//! - **core**: Core comparison infrastructure and performance analysis
11//! - **ndarray_comparisons**: ToRSh vs ndarray baseline comparisons
12//! - **pytorch_comparisons**: PyTorch integration benchmarks (feature-gated)
13//! - **tensorflow_comparisons**: TensorFlow comparison suite (feature-gated)
14//! - **jax_comparisons**: JAX performance benchmarks (feature-gated)
15//! - **numpy_comparisons**: NumPy baseline comparisons (feature-gated)
16//! - **reporting**: Comprehensive reporting and analysis utilities
17//!
18//! Additional specialized benchmarks include model-specific tests, hardware
19//! optimization benchmarks, edge deployment tests, and SciRS2 integration.
20
21#![cfg_attr(
22    not(any(feature = "tensorflow", feature = "pytorch", feature = "jax")),
23    allow(unexpected_cfgs)
24)]
25
26// Core comparison infrastructure
27pub mod core;
28
29// Specialized comparison modules
30pub mod ndarray_comparisons;
31pub mod reporting;
32
33// Feature-gated external library comparisons
34#[cfg(feature = "pytorch")]
35pub mod pytorch_comparisons;
36
37#[cfg(feature = "tensorflow")]
38pub mod tensorflow_comparisons;
39
40#[cfg(feature = "jax")]
41pub mod jax_comparisons;
42
43#[cfg(feature = "numpy_baseline")]
44pub mod numpy_comparisons;
45
46// Legacy comparison module (now clean interface)
47pub mod comparisons {
48    //! Legacy comparison interface that delegates to specialized modules
49    //!
50    //! This module maintains backward compatibility while providing access
51    //! to the new modular comparison architecture.
52
53    // Re-export core infrastructure
54    pub use crate::core::*;
55
56    // Re-export ndarray comparisons (always available)
57    pub use crate::ndarray_comparisons::*;
58
59    // Re-export feature-gated comparisons
60    #[cfg(feature = "pytorch")]
61    pub use crate::pytorch_comparisons::*;
62
63    #[cfg(feature = "tensorflow")]
64    pub use crate::tensorflow_comparisons::*;
65
66    #[cfg(feature = "jax")]
67    pub use crate::jax_comparisons::*;
68
69    #[cfg(feature = "numpy_baseline")]
70    pub use crate::numpy_comparisons::*;
71
72    // Re-export reporting utilities
73    pub use crate::reporting::*;
74}
75
76// Specialized benchmark modules
77pub mod advanced_analysis;
78pub mod benchmark_analysis;
79pub mod benchmark_cache;
80pub mod benchmark_comparison;
81pub mod benchmark_validation;
82pub mod benchmarks;
83pub mod cached_runner;
84pub mod ci_integration;
85pub mod custom_ops_benchmarks;
86pub mod distributed_training;
87pub mod edge_deployment;
88pub mod hardware_benchmarks;
89pub mod html_reporting;
90pub mod metrics;
91pub mod mobile_benchmarks;
92pub mod model_benchmarks;
93pub mod performance_dashboards;
94pub mod precision_benchmarks;
95pub mod regression_detection;
96pub mod scalability;
97pub mod scirs2_benchmarks;
98pub mod system_info;
99pub mod utils;
100pub mod visualization;
101pub mod wasm_benchmarks;
102
103// Core benchmarks
104pub use benchmarks::{
105    AdvancedSystemsBenchmarkSuite, AutoTuningBench, BackwardPassBench, DataLoaderThroughputBench,
106    ErrorDiagnosticsBench, GradientComputeBench, MatmulBench, SIMDGNNBench, TensorArithmeticBench,
107    TensorCreationBench, VectorizedMetricsBench,
108};
109
110// Core comparison framework
111pub use core::{ComparisonResult, ComparisonRunner, PerformanceAnalyzer};
112
113// Reporting utilities
114pub use reporting::{
115    benchmark_and_analyze, benchmark_and_compare, generate_master_comparison_report,
116    run_all_comparison_suites, run_comparison_benchmarks, run_extended_benchmarks,
117};
118
119// Metrics and utilities
120pub use metrics::{CpuStats, MemoryStats, MetricsCollector, PerformanceReport, SystemMetrics};
121pub use utils::{
122    DataGenerator,
123    Distribution,
124    EnhancedBenchResult,
125    EnhancedBenchSuite,
126    Environment,
127    EnvironmentInfo,
128    Formatter,
129    MemoryMonitor,
130    ParallelBenchRunner,
131    Timer,
132    // Enhanced utilities
133    TimingStats,
134    ValidationResult,
135    Validator,
136};
137
138// Model benchmarks
139pub use model_benchmarks::{ModelBenchmarkSuite, ResNetBlockBench, TransformerBlockBench};
140
141// Scalability and hardware tests
142pub use hardware_benchmarks::{
143    CPUGPUComparisonBench, MemoryBandwidthBench, MultiGPUBench, ThermalThrottlingBench,
144};
145pub use scalability::ScalabilityTestSuite;
146
147// Precision and optimization benchmarks
148pub use precision_benchmarks::{MixedPrecisionTrainingBench, PruningBench, QuantizationBench};
149
150// Advanced benchmarks
151pub use custom_ops_benchmarks::{ConvolutionOperation, CustomOpBench, FFTOperation};
152pub use edge_deployment::{BatteryLifeBench, EdgeInferenceBench, EdgeMemoryBench};
153pub use mobile_benchmarks::{ARMOptimizationBench, MobileGPUBench, MobilePlatformBench};
154pub use wasm_benchmarks::{BrowserSpecificBench, WASMPerformanceBench, WebDeploymentBench};
155
156// SciRS2 integration benchmarks
157pub use scirs2_benchmarks::{
158    AdvancedNeuralNetworkBench, AdvancedOptimizerBench, GraphNeuralNetworkBench,
159    SciRS2BenchmarkSuite, SciRS2MathBench, SciRS2RandomBench, SpatialVisionBench,
160    TimeSeriesAnalysisBench,
161};
162
163// Reporting and analysis
164pub use advanced_analysis::{AdaptiveBenchmarking, AdvancedAnalyzer};
165pub use benchmark_analysis::{
166    BenchmarkAnalyzer, BottleneckAnalysis, PerformanceAnalysis, PerformanceRating,
167};
168pub use benchmark_validation::{BenchmarkValidator, NumericalAccuracy, ValidationConfig};
169pub use ci_integration::{CIBenchmarkRunner, CIConfig, NotificationConfig};
170pub use html_reporting::{HtmlReportGenerator, Theme};
171pub use performance_dashboards::{DashboardConfig, PerformanceDashboard};
172pub use regression_detection::{AdvancedRegressionDetector, RegressionAnalysis};
173pub use system_info::{BenchmarkEnvironment, SystemInfo, SystemInfoCollector};
174pub use visualization::{ChartType, VisualizationGenerator};
175
176use criterion::{BenchmarkId, Criterion, Throughput};
177use std::time::Duration;
178
179/// Trait for benchmarkable operations
180pub trait Benchmarkable {
181    type Input;
182    type Output;
183
184    /// Setup the benchmark input for a given size
185    fn setup(&mut self, size: usize) -> Self::Input;
186
187    /// Run the benchmark operation on the input
188    fn run(&mut self, input: &Self::Input) -> Self::Output;
189
190    /// Calculate the number of floating-point operations for a given size
191    fn flops(&self, size: usize) -> usize {
192        // Default implementation - can be overridden by specific benchmarks
193        size
194    }
195
196    /// Calculate the number of bytes accessed for a given size
197    fn bytes_accessed(&self, size: usize) -> usize {
198        // Default implementation - can be overridden by specific benchmarks
199        size * std::mem::size_of::<f32>()
200    }
201}
202
203/// Benchmark configuration
204#[derive(Debug, Clone)]
205pub struct BenchConfig {
206    /// Benchmark name
207    pub name: String,
208
209    /// Input sizes to test
210    pub sizes: Vec<usize>,
211
212    /// Data types to test
213    pub dtypes: Vec<torsh_core::dtype::DType>,
214
215    /// Number of warmup iterations
216    pub warmup_time: Duration,
217
218    /// Measurement time per size
219    pub measurement_time: Duration,
220
221    /// Whether to include memory usage metrics
222    pub measure_memory: bool,
223
224    /// Whether to include throughput metrics
225    pub measure_throughput: bool,
226
227    /// Custom metadata
228    pub metadata: std::collections::HashMap<String, String>,
229}
230
231impl Default for BenchConfig {
232    fn default() -> Self {
233        Self {
234            name: "default".to_string(),
235            sizes: vec![64, 256, 1024, 4096],
236            dtypes: vec![torsh_core::dtype::DType::F32],
237            warmup_time: Duration::from_millis(100),
238            measurement_time: Duration::from_secs(1),
239            measure_memory: false,
240            measure_throughput: true,
241            metadata: std::collections::HashMap::new(),
242        }
243    }
244}
245
246impl BenchConfig {
247    /// Create a new benchmark configuration
248    pub fn new(name: &str) -> Self {
249        Self {
250            name: name.to_string(),
251            ..Default::default()
252        }
253    }
254
255    /// Set input sizes
256    pub fn with_sizes(mut self, sizes: Vec<usize>) -> Self {
257        self.sizes = sizes;
258        self
259    }
260
261    /// Set data types
262    pub fn with_dtypes(mut self, dtypes: Vec<torsh_core::dtype::DType>) -> Self {
263        self.dtypes = dtypes;
264        self
265    }
266
267    /// Enable memory measurement
268    pub fn with_memory_measurement(mut self) -> Self {
269        self.measure_memory = true;
270        self
271    }
272
273    /// Set timing configuration
274    pub fn with_timing(mut self, warmup: Duration, measurement: Duration) -> Self {
275        self.warmup_time = warmup;
276        self.measurement_time = measurement;
277        self
278    }
279
280    /// Add metadata
281    pub fn with_metadata(mut self, key: &str, value: &str) -> Self {
282        self.metadata.insert(key.to_string(), value.to_string());
283        self
284    }
285}
286
287/// Benchmark result
288#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
289pub struct BenchResult {
290    /// Benchmark name
291    pub name: String,
292
293    /// Input size
294    pub size: usize,
295
296    /// Data type
297    pub dtype: torsh_core::dtype::DType,
298
299    /// Mean execution time in nanoseconds
300    pub mean_time_ns: f64,
301
302    /// Standard deviation of execution time
303    pub std_dev_ns: f64,
304
305    /// Throughput (operations per second)
306    pub throughput: Option<f64>,
307
308    /// Memory usage in bytes
309    pub memory_usage: Option<usize>,
310
311    /// Peak memory usage in bytes
312    pub peak_memory: Option<usize>,
313
314    /// Additional metrics
315    pub metrics: std::collections::HashMap<String, f64>,
316}
317
318impl BenchResult {
319    /// Get throughput in GFLOPS
320    pub fn gflops(&self, flops_per_op: usize) -> Option<f64> {
321        self.throughput.map(|tps| tps * flops_per_op as f64 / 1e9)
322    }
323
324    /// Get memory bandwidth in GB/s
325    pub fn memory_bandwidth_gbps(&self, bytes_per_op: usize) -> Option<f64> {
326        self.throughput.map(|tps| tps * bytes_per_op as f64 / 1e9)
327    }
328}
329
330/// Macro to create a simple benchmark
331#[macro_export]
332macro_rules! benchmark {
333    ($name:expr, $setup:expr, $run:expr) => {{
334        struct SimpleBench<S, R> {
335            setup_fn: S,
336            run_fn: R,
337        }
338
339        impl<S, R, I, O> $crate::Benchmarkable for SimpleBench<S, R>
340        where
341            S: FnMut(usize) -> I,
342            R: FnMut(&I) -> O,
343        {
344            type Input = I;
345            type Output = O;
346
347            fn setup(&mut self, size: usize) -> <Self as $crate::Benchmarkable>::Input {
348                (self.setup_fn)(size)
349            }
350
351            fn run(
352                &mut self,
353                input: &<Self as $crate::Benchmarkable>::Input,
354            ) -> <Self as $crate::Benchmarkable>::Output {
355                (self.run_fn)(input)
356            }
357        }
358
359        SimpleBench {
360            setup_fn: $setup,
361            run_fn: $run,
362        }
363    }};
364}
365
366/// Benchmark runner
367pub struct BenchRunner {
368    criterion: Criterion,
369    configs: Vec<BenchConfig>,
370    results: Vec<BenchResult>,
371}
372
373impl BenchRunner {
374    /// Create a new benchmark runner
375    pub fn new() -> Self {
376        Self {
377            criterion: Criterion::default()
378                .warm_up_time(Duration::from_millis(100))
379                .measurement_time(Duration::from_secs(1)),
380            configs: Vec::new(),
381            results: Vec::new(),
382        }
383    }
384
385    /// Add a benchmark configuration
386    pub fn add_config(mut self, config: BenchConfig) -> Self {
387        self.configs.push(config);
388        self
389    }
390
391    /// Run a benchmarkable operation
392    pub fn run_benchmark<B: Benchmarkable>(&mut self, mut bench: B, config: &BenchConfig) {
393        let mut group = self.criterion.benchmark_group(&config.name);
394        group.warm_up_time(config.warmup_time);
395        group.measurement_time(config.measurement_time);
396
397        for &size in &config.sizes {
398            for &dtype in &config.dtypes {
399                let bench_id = BenchmarkId::new(format!("{}_{:?}", config.name, dtype), size);
400
401                if config.measure_throughput {
402                    let bytes_per_op = bench.bytes_accessed(size);
403                    group.throughput(Throughput::Bytes(bytes_per_op as u64));
404                }
405
406                group.bench_with_input(bench_id, &size, |b, &size| {
407                    let input = bench.setup(size);
408
409                    b.iter(|| bench.run(&input));
410                });
411            }
412        }
413
414        group.finish();
415    }
416
417    /// Get benchmark results
418    pub fn results(&self) -> &[BenchResult] {
419        &self.results
420    }
421
422    /// Export results to CSV
423    pub fn export_csv(&self, path: &str) -> std::io::Result<()> {
424        use std::io::Write;
425        let mut file = std::fs::File::create(path)?;
426
427        writeln!(
428            file,
429            "name,size,dtype,mean_time_ns,std_dev_ns,throughput,memory_usage"
430        )?;
431
432        for result in &self.results {
433            writeln!(
434                file,
435                "{},{},{:?},{},{},{:?},{:?}",
436                result.name,
437                result.size,
438                result.dtype,
439                result.mean_time_ns,
440                result.std_dev_ns,
441                result.throughput,
442                result.memory_usage
443            )?;
444        }
445
446        Ok(())
447    }
448
449    /// Generate HTML report
450    pub fn generate_report(&self, output_dir: &str) -> std::io::Result<()> {
451        std::fs::create_dir_all(output_dir)?;
452
453        let report_path = format!("{}/benchmark_report.html", output_dir);
454        let mut file = std::fs::File::create(report_path)?;
455
456        use std::io::Write;
457        writeln!(file, "<!DOCTYPE html>")?;
458        writeln!(
459            file,
460            "<html><head><title>ToRSh Benchmark Report</title></head><body>"
461        )?;
462        writeln!(file, "<h1>ToRSh Benchmark Report</h1>")?;
463
464        // Add results table
465        writeln!(file, "<table border='1'>")?;
466        writeln!(file, "<tr><th>Benchmark</th><th>Size</th><th>Type</th><th>Time (μs)</th><th>Throughput</th></tr>")?;
467
468        for result in &self.results {
469            writeln!(
470                file,
471                "<tr><td>{}</td><td>{}</td><td>{:?}</td><td>{:.2}</td><td>{:.2}</td></tr>",
472                result.name,
473                result.size,
474                result.dtype,
475                result.mean_time_ns / 1000.0,
476                result.throughput.unwrap_or(0.0)
477            )?;
478        }
479
480        writeln!(file, "</table>")?;
481        writeln!(file, "</body></html>")?;
482
483        Ok(())
484    }
485}
486
487impl Default for BenchRunner {
488    fn default() -> Self {
489        Self::new()
490    }
491}
492
493/// Common benchmark utilities
494pub mod prelude {
495    pub use super::{benchmark, BenchConfig, BenchResult, BenchRunner};
496    pub use super::{BenchmarkAnalyzer, SystemInfoCollector};
497    pub use crate::benchmark_analysis::{BottleneckAnalysis, PerformanceAnalysis};
498    pub use crate::core::{ComparisonResult, ComparisonRunner, PerformanceAnalyzer};
499    pub use crate::system_info::{BenchmarkEnvironment, SystemInfo};
500    pub use crate::Benchmarkable;
501    pub use criterion::{BenchmarkId, Criterion, Throughput};
502    pub use std::hint::black_box;
503
504    // Enhanced utilities
505    pub use crate::utils::{
506        Distribution, EnhancedBenchResult, EnhancedBenchSuite, EnvironmentInfo, Formatter,
507        MemoryMonitor, ParallelBenchRunner, Timer, TimingStats, ValidationResult, Validator,
508    };
509}
torsh_benches/lib.rs

torsh_benches/
lib.rs