1#![cfg_attr(
22 not(any(feature = "tensorflow", feature = "pytorch", feature = "jax")),
23 allow(unexpected_cfgs)
24)]
25
26pub mod core;
28
29pub mod ndarray_comparisons;
31pub mod reporting;
32
33#[cfg(feature = "pytorch")]
35pub mod pytorch_comparisons;
36
37#[cfg(feature = "tensorflow")]
38pub mod tensorflow_comparisons;
39
40#[cfg(feature = "jax")]
41pub mod jax_comparisons;
42
43#[cfg(feature = "numpy_baseline")]
44pub mod numpy_comparisons;
45
46pub mod comparisons {
48 pub use crate::core::*;
55
56 pub use crate::ndarray_comparisons::*;
58
59 #[cfg(feature = "pytorch")]
61 pub use crate::pytorch_comparisons::*;
62
63 #[cfg(feature = "tensorflow")]
64 pub use crate::tensorflow_comparisons::*;
65
66 #[cfg(feature = "jax")]
67 pub use crate::jax_comparisons::*;
68
69 #[cfg(feature = "numpy_baseline")]
70 pub use crate::numpy_comparisons::*;
71
72 pub use crate::reporting::*;
74}
75
76pub mod advanced_analysis;
78pub mod benchmark_analysis;
79pub mod benchmark_cache;
80pub mod benchmark_comparison;
81pub mod benchmark_validation;
82pub mod benchmarks;
83pub mod cached_runner;
84pub mod ci_integration;
85pub mod custom_ops_benchmarks;
86pub mod distributed_training;
87pub mod edge_deployment;
88pub mod hardware_benchmarks;
89pub mod html_reporting;
90pub mod metrics;
91pub mod mobile_benchmarks;
92pub mod model_benchmarks;
93pub mod performance_dashboards;
94pub mod precision_benchmarks;
95pub mod regression_detection;
96pub mod scalability;
97pub mod scirs2_benchmarks;
98pub mod system_info;
99pub mod utils;
100pub mod visualization;
101pub mod wasm_benchmarks;
102
103pub use benchmarks::{
105 AdvancedSystemsBenchmarkSuite, AutoTuningBench, BackwardPassBench, DataLoaderThroughputBench,
106 ErrorDiagnosticsBench, GradientComputeBench, MatmulBench, SIMDGNNBench, TensorArithmeticBench,
107 TensorCreationBench, VectorizedMetricsBench,
108};
109
110pub use core::{ComparisonResult, ComparisonRunner, PerformanceAnalyzer};
112
113pub use reporting::{
115 benchmark_and_analyze, benchmark_and_compare, generate_master_comparison_report,
116 run_all_comparison_suites, run_comparison_benchmarks, run_extended_benchmarks,
117};
118
119pub use metrics::{CpuStats, MemoryStats, MetricsCollector, PerformanceReport, SystemMetrics};
121pub use utils::{
122 DataGenerator,
123 Distribution,
124 EnhancedBenchResult,
125 EnhancedBenchSuite,
126 Environment,
127 EnvironmentInfo,
128 Formatter,
129 MemoryMonitor,
130 ParallelBenchRunner,
131 Timer,
132 TimingStats,
134 ValidationResult,
135 Validator,
136};
137
138pub use model_benchmarks::{ModelBenchmarkSuite, ResNetBlockBench, TransformerBlockBench};
140
141pub use hardware_benchmarks::{
143 CPUGPUComparisonBench, MemoryBandwidthBench, MultiGPUBench, ThermalThrottlingBench,
144};
145pub use scalability::ScalabilityTestSuite;
146
147pub use precision_benchmarks::{MixedPrecisionTrainingBench, PruningBench, QuantizationBench};
149
150pub use custom_ops_benchmarks::{ConvolutionOperation, CustomOpBench, FFTOperation};
152pub use edge_deployment::{BatteryLifeBench, EdgeInferenceBench, EdgeMemoryBench};
153pub use mobile_benchmarks::{ARMOptimizationBench, MobileGPUBench, MobilePlatformBench};
154pub use wasm_benchmarks::{BrowserSpecificBench, WASMPerformanceBench, WebDeploymentBench};
155
156pub use scirs2_benchmarks::{
158 AdvancedNeuralNetworkBench, AdvancedOptimizerBench, GraphNeuralNetworkBench,
159 SciRS2BenchmarkSuite, SciRS2MathBench, SciRS2RandomBench, SpatialVisionBench,
160 TimeSeriesAnalysisBench,
161};
162
163pub use advanced_analysis::{AdaptiveBenchmarking, AdvancedAnalyzer};
165pub use benchmark_analysis::{
166 BenchmarkAnalyzer, BottleneckAnalysis, PerformanceAnalysis, PerformanceRating,
167};
168pub use benchmark_validation::{BenchmarkValidator, NumericalAccuracy, ValidationConfig};
169pub use ci_integration::{CIBenchmarkRunner, CIConfig, NotificationConfig};
170pub use html_reporting::{HtmlReportGenerator, Theme};
171pub use performance_dashboards::{DashboardConfig, PerformanceDashboard};
172pub use regression_detection::{AdvancedRegressionDetector, RegressionAnalysis};
173pub use system_info::{BenchmarkEnvironment, SystemInfo, SystemInfoCollector};
174pub use visualization::{ChartType, VisualizationGenerator};
175
176use criterion::{BenchmarkId, Criterion, Throughput};
177use std::time::Duration;
178
179pub trait Benchmarkable {
181 type Input;
182 type Output;
183
184 fn setup(&mut self, size: usize) -> Self::Input;
186
187 fn run(&mut self, input: &Self::Input) -> Self::Output;
189
190 fn flops(&self, size: usize) -> usize {
192 size
194 }
195
196 fn bytes_accessed(&self, size: usize) -> usize {
198 size * std::mem::size_of::<f32>()
200 }
201}
202
203#[derive(Debug, Clone)]
205pub struct BenchConfig {
206 pub name: String,
208
209 pub sizes: Vec<usize>,
211
212 pub dtypes: Vec<torsh_core::dtype::DType>,
214
215 pub warmup_time: Duration,
217
218 pub measurement_time: Duration,
220
221 pub measure_memory: bool,
223
224 pub measure_throughput: bool,
226
227 pub metadata: std::collections::HashMap<String, String>,
229}
230
231impl Default for BenchConfig {
232 fn default() -> Self {
233 Self {
234 name: "default".to_string(),
235 sizes: vec![64, 256, 1024, 4096],
236 dtypes: vec![torsh_core::dtype::DType::F32],
237 warmup_time: Duration::from_millis(100),
238 measurement_time: Duration::from_secs(1),
239 measure_memory: false,
240 measure_throughput: true,
241 metadata: std::collections::HashMap::new(),
242 }
243 }
244}
245
246impl BenchConfig {
247 pub fn new(name: &str) -> Self {
249 Self {
250 name: name.to_string(),
251 ..Default::default()
252 }
253 }
254
255 pub fn with_sizes(mut self, sizes: Vec<usize>) -> Self {
257 self.sizes = sizes;
258 self
259 }
260
261 pub fn with_dtypes(mut self, dtypes: Vec<torsh_core::dtype::DType>) -> Self {
263 self.dtypes = dtypes;
264 self
265 }
266
267 pub fn with_memory_measurement(mut self) -> Self {
269 self.measure_memory = true;
270 self
271 }
272
273 pub fn with_timing(mut self, warmup: Duration, measurement: Duration) -> Self {
275 self.warmup_time = warmup;
276 self.measurement_time = measurement;
277 self
278 }
279
280 pub fn with_metadata(mut self, key: &str, value: &str) -> Self {
282 self.metadata.insert(key.to_string(), value.to_string());
283 self
284 }
285}
286
287#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
289pub struct BenchResult {
290 pub name: String,
292
293 pub size: usize,
295
296 pub dtype: torsh_core::dtype::DType,
298
299 pub mean_time_ns: f64,
301
302 pub std_dev_ns: f64,
304
305 pub throughput: Option<f64>,
307
308 pub memory_usage: Option<usize>,
310
311 pub peak_memory: Option<usize>,
313
314 pub metrics: std::collections::HashMap<String, f64>,
316}
317
318impl BenchResult {
319 pub fn gflops(&self, flops_per_op: usize) -> Option<f64> {
321 self.throughput.map(|tps| tps * flops_per_op as f64 / 1e9)
322 }
323
324 pub fn memory_bandwidth_gbps(&self, bytes_per_op: usize) -> Option<f64> {
326 self.throughput.map(|tps| tps * bytes_per_op as f64 / 1e9)
327 }
328}
329
330#[macro_export]
332macro_rules! benchmark {
333 ($name:expr, $setup:expr, $run:expr) => {{
334 struct SimpleBench<S, R> {
335 setup_fn: S,
336 run_fn: R,
337 }
338
339 impl<S, R, I, O> $crate::Benchmarkable for SimpleBench<S, R>
340 where
341 S: FnMut(usize) -> I,
342 R: FnMut(&I) -> O,
343 {
344 type Input = I;
345 type Output = O;
346
347 fn setup(&mut self, size: usize) -> <Self as $crate::Benchmarkable>::Input {
348 (self.setup_fn)(size)
349 }
350
351 fn run(
352 &mut self,
353 input: &<Self as $crate::Benchmarkable>::Input,
354 ) -> <Self as $crate::Benchmarkable>::Output {
355 (self.run_fn)(input)
356 }
357 }
358
359 SimpleBench {
360 setup_fn: $setup,
361 run_fn: $run,
362 }
363 }};
364}
365
366pub struct BenchRunner {
368 criterion: Criterion,
369 configs: Vec<BenchConfig>,
370 results: Vec<BenchResult>,
371}
372
373impl BenchRunner {
374 pub fn new() -> Self {
376 Self {
377 criterion: Criterion::default()
378 .warm_up_time(Duration::from_millis(100))
379 .measurement_time(Duration::from_secs(1)),
380 configs: Vec::new(),
381 results: Vec::new(),
382 }
383 }
384
385 pub fn add_config(mut self, config: BenchConfig) -> Self {
387 self.configs.push(config);
388 self
389 }
390
391 pub fn run_benchmark<B: Benchmarkable>(&mut self, mut bench: B, config: &BenchConfig) {
393 let mut group = self.criterion.benchmark_group(&config.name);
394 group.warm_up_time(config.warmup_time);
395 group.measurement_time(config.measurement_time);
396
397 for &size in &config.sizes {
398 for &dtype in &config.dtypes {
399 let bench_id = BenchmarkId::new(format!("{}_{:?}", config.name, dtype), size);
400
401 if config.measure_throughput {
402 let bytes_per_op = bench.bytes_accessed(size);
403 group.throughput(Throughput::Bytes(bytes_per_op as u64));
404 }
405
406 group.bench_with_input(bench_id, &size, |b, &size| {
407 let input = bench.setup(size);
408
409 b.iter(|| bench.run(&input));
410 });
411 }
412 }
413
414 group.finish();
415 }
416
417 pub fn results(&self) -> &[BenchResult] {
419 &self.results
420 }
421
422 pub fn export_csv(&self, path: &str) -> std::io::Result<()> {
424 use std::io::Write;
425 let mut file = std::fs::File::create(path)?;
426
427 writeln!(
428 file,
429 "name,size,dtype,mean_time_ns,std_dev_ns,throughput,memory_usage"
430 )?;
431
432 for result in &self.results {
433 writeln!(
434 file,
435 "{},{},{:?},{},{},{:?},{:?}",
436 result.name,
437 result.size,
438 result.dtype,
439 result.mean_time_ns,
440 result.std_dev_ns,
441 result.throughput,
442 result.memory_usage
443 )?;
444 }
445
446 Ok(())
447 }
448
449 pub fn generate_report(&self, output_dir: &str) -> std::io::Result<()> {
451 std::fs::create_dir_all(output_dir)?;
452
453 let report_path = format!("{}/benchmark_report.html", output_dir);
454 let mut file = std::fs::File::create(report_path)?;
455
456 use std::io::Write;
457 writeln!(file, "<!DOCTYPE html>")?;
458 writeln!(
459 file,
460 "<html><head><title>ToRSh Benchmark Report</title></head><body>"
461 )?;
462 writeln!(file, "<h1>ToRSh Benchmark Report</h1>")?;
463
464 writeln!(file, "<table border='1'>")?;
466 writeln!(file, "<tr><th>Benchmark</th><th>Size</th><th>Type</th><th>Time (μs)</th><th>Throughput</th></tr>")?;
467
468 for result in &self.results {
469 writeln!(
470 file,
471 "<tr><td>{}</td><td>{}</td><td>{:?}</td><td>{:.2}</td><td>{:.2}</td></tr>",
472 result.name,
473 result.size,
474 result.dtype,
475 result.mean_time_ns / 1000.0,
476 result.throughput.unwrap_or(0.0)
477 )?;
478 }
479
480 writeln!(file, "</table>")?;
481 writeln!(file, "</body></html>")?;
482
483 Ok(())
484 }
485}
486
487impl Default for BenchRunner {
488 fn default() -> Self {
489 Self::new()
490 }
491}
492
493pub mod prelude {
495 pub use super::{benchmark, BenchConfig, BenchResult, BenchRunner};
496 pub use super::{BenchmarkAnalyzer, SystemInfoCollector};
497 pub use crate::benchmark_analysis::{BottleneckAnalysis, PerformanceAnalysis};
498 pub use crate::core::{ComparisonResult, ComparisonRunner, PerformanceAnalyzer};
499 pub use crate::system_info::{BenchmarkEnvironment, SystemInfo};
500 pub use crate::Benchmarkable;
501 pub use criterion::{BenchmarkId, Criterion, Throughput};
502 pub use std::hint::black_box;
503
504 pub use crate::utils::{
506 Distribution, EnhancedBenchResult, EnhancedBenchSuite, EnvironmentInfo, Formatter,
507 MemoryMonitor, ParallelBenchRunner, Timer, TimingStats, ValidationResult, Validator,
508 };
509}