use super::performance_benchmark::BenchmarkConfig;
use crate::{Result, Tensor, TensorError};
use std::collections::HashMap;
use std::process::Command;
use std::time::{Duration, Instant};
#[derive(Debug, Clone)]
pub struct FrameworkComparisonResult {
pub operation: String,
pub size: usize,
pub tenflowers_time: Duration,
pub framework_times: HashMap<String, Duration>,
pub tenflowers_throughput: f64,
pub framework_throughputs: HashMap<String, f64>,
pub relative_performance: HashMap<String, f64>, }
impl FrameworkComparisonResult {
pub fn new(
operation: String,
size: usize,
tenflowers_time: Duration,
framework_times: HashMap<String, Duration>,
) -> Self {
let tenflowers_throughput = size as f64 / tenflowers_time.as_secs_f64();
let mut framework_throughputs = HashMap::new();
let mut relative_performance = HashMap::new();
for (framework, time) in &framework_times {
let throughput = size as f64 / time.as_secs_f64();
framework_throughputs.insert(framework.clone(), throughput);
let relative = tenflowers_time.as_nanos() as f64 / time.as_nanos() as f64;
relative_performance.insert(framework.clone(), relative);
}
Self {
operation,
size,
tenflowers_time,
framework_times,
tenflowers_throughput,
framework_throughputs,
relative_performance,
}
}
}
#[derive(Debug, Clone)]
pub struct FrameworkBenchmarkConfig {
pub base_config: BenchmarkConfig,
pub frameworks_to_test: Vec<String>,
pub python_executable: String,
pub skip_missing_frameworks: bool,
}
impl Default for FrameworkBenchmarkConfig {
fn default() -> Self {
Self {
base_config: BenchmarkConfig::default(),
frameworks_to_test: vec![
"numpy".to_string(),
"pytorch".to_string(),
"tensorflow".to_string(),
],
python_executable: "python3".to_string(),
skip_missing_frameworks: true,
}
}
}
fn check_framework_availability(framework: &str, python_executable: &str) -> bool {
let import_name = match framework {
"numpy" => "numpy",
"pytorch" => "torch",
"tensorflow" => "tensorflow",
_ => framework,
};
let output = Command::new(python_executable)
.arg("-c")
.arg(format!("import {import_name}"))
.output();
output.as_ref().map(|o| o.status.success()).unwrap_or(false)
}
fn generate_python_benchmark_script(
framework: &str,
operation: &str,
size: usize,
iterations: usize,
) -> String {
let setup_code = match framework {
"numpy" => format!(
r#"
import numpy as np
import time
a = np.random.randn({size}).astype(np.float32)
b = np.random.randn({size}).astype(np.float32)
"#
),
"pytorch" => format!(
r#"
import torch
import time
a = torch.randn({size}, dtype=torch.float32)
b = torch.randn({size}, dtype=torch.float32)
"#
),
"tensorflow" => format!(
r#"
import tensorflow as tf
import time
a = tf.random.normal([{size}], dtype=tf.float32)
b = tf.random.normal([{size}], dtype=tf.float32)
"#
),
_ => return String::new(),
};
let operation_code = match (framework, operation) {
("numpy", "add") => "result = np.add(a, b)",
("numpy", "mul") => "result = np.multiply(a, b)",
("numpy", "sub") => "result = np.subtract(a, b)",
("numpy", "div") => "result = np.divide(a, b)",
("pytorch", "add") => "result = torch.add(a, b)",
("pytorch", "mul") => "result = torch.mul(a, b)",
("pytorch", "sub") => "result = torch.sub(a, b)",
("pytorch", "div") => "result = torch.div(a, b)",
("tensorflow", "add") => "result = tf.add(a, b)",
("tensorflow", "mul") => "result = tf.multiply(a, b)",
("tensorflow", "sub") => "result = tf.subtract(a, b)",
("tensorflow", "div") => "result = tf.divide(a, b)",
_ => return String::new(),
};
format!(
r#"
{setup_code}
# Warmup
for _ in range(5):
{operation_code}
# Benchmark
start_time = time.perf_counter()
for _ in range({iterations}):
{operation_code}
end_time = time.perf_counter()
elapsed_ns = (end_time - start_time) * 1e9
print(f"{{elapsed_ns:.0f}}")
"#
)
}
fn benchmark_operation_against_frameworks(
operation: &str,
size: usize,
config: &FrameworkBenchmarkConfig,
) -> Result<FrameworkComparisonResult> {
let tenflowers_time = benchmark_tenflowers_operation(operation, size, &config.base_config)?;
let mut framework_times = HashMap::new();
for framework in &config.frameworks_to_test {
if !check_framework_availability(framework, &config.python_executable) {
if config.skip_missing_frameworks {
println!("Warning: {framework} not available, skipping");
continue;
} else {
return Err(TensorError::other(format!(
"Framework {framework} not available"
)));
}
}
if let Ok(time) = benchmark_external_framework(
framework,
operation,
size,
&config.base_config,
&config.python_executable,
) {
framework_times.insert(framework.clone(), time);
} else {
println!("Warning: Failed to benchmark {framework} for {operation}");
}
}
Ok(FrameworkComparisonResult::new(
operation.to_string(),
size,
tenflowers_time,
framework_times,
))
}
fn benchmark_tenflowers_operation(
operation: &str,
size: usize,
config: &BenchmarkConfig,
) -> Result<Duration> {
let a_data: Vec<f32> = (0..size).map(|i| i as f32).collect();
let b_data: Vec<f32> = (0..size).map(|i| (i as f32) + 1.0).collect();
let a = Tensor::from_vec(a_data, &[size])?;
let b = Tensor::from_vec(b_data, &[size])?;
for _ in 0..config.warmup_iterations {
match operation {
"add" => {
let _ = super::binary::add(&a, &b)?;
}
"mul" => {
let _ = super::binary::mul(&a, &b)?;
}
"sub" => {
let _ = super::binary::sub(&a, &b)?;
}
"div" => {
let _ = super::binary::div(&a, &b)?;
}
_ => {
return Err(TensorError::other(format!(
"Unknown operation: {operation}"
)))
}
}
}
let start = Instant::now();
for _ in 0..config.measurement_iterations {
match operation {
"add" => {
let _ = super::binary::add(&a, &b)?;
}
"mul" => {
let _ = super::binary::mul(&a, &b)?;
}
"sub" => {
let _ = super::binary::sub(&a, &b)?;
}
"div" => {
let _ = super::binary::div(&a, &b)?;
}
_ => {
return Err(TensorError::other(format!(
"Unknown operation: {operation}"
)))
}
}
}
let elapsed = start.elapsed() / config.measurement_iterations as u32;
Ok(elapsed)
}
fn benchmark_external_framework(
framework: &str,
operation: &str,
size: usize,
config: &BenchmarkConfig,
python_executable: &str,
) -> Result<Duration> {
let script =
generate_python_benchmark_script(framework, operation, size, config.measurement_iterations);
if script.is_empty() {
return Err(TensorError::other(format!(
"Unsupported framework/operation: {framework}/{operation}"
)));
}
let output = Command::new(python_executable)
.arg("-c")
.arg(&script)
.output()
.map_err(|e| TensorError::other(format!("Failed to execute Python script: {e}")))?;
if !output.status.success() {
return Err(TensorError::other(format!(
"Python script failed: {}",
String::from_utf8_lossy(&output.stderr)
)));
}
let elapsed_ns_str = String::from_utf8_lossy(&output.stdout);
let elapsed_ns: f64 = elapsed_ns_str
.trim()
.parse()
.map_err(|e| TensorError::other(format!("Failed to parse timing result: {e}")))?;
Ok(Duration::from_nanos(elapsed_ns as u64))
}
pub fn run_framework_comparison_benchmark(
config: FrameworkBenchmarkConfig,
) -> Result<Vec<FrameworkComparisonResult>> {
println!("Running TenfloweRS Framework Comparison Benchmark");
println!("Testing against external frameworks...\n");
let operations = vec!["add", "mul", "sub", "div"];
let mut results = Vec::new();
for &size in &config.base_config.sizes {
println!("Benchmarking size: {size}");
for operation in &operations {
match benchmark_operation_against_frameworks(operation, size, &config) {
Ok(result) => {
results.push(result);
}
Err(e) => {
println!("Warning: Failed to benchmark {operation} at size {size}: {e}");
}
}
}
}
print_framework_comparison_results(&results);
Ok(results)
}
pub fn print_framework_comparison_results(results: &[FrameworkComparisonResult]) {
if results.is_empty() {
println!("No benchmark results to display");
return;
}
println!("\n{:-<120}", "");
println!(
"| {:^12} | {:^8} | {:^15} | {:^15} | {:^15} | {:^15} | {:^15} |",
"Operation",
"Size",
"TenfloweRS (μs)",
"NumPy (μs)",
"PyTorch (μs)",
"TensorFlow (μs)",
"Relative Perf."
);
println!("{:-<120}", "");
for result in results {
let tf_us = result.tenflowers_time.as_micros();
let numpy_us = result
.framework_times
.get("numpy")
.map(|t| t.as_micros())
.unwrap_or(0);
let pytorch_us = result
.framework_times
.get("pytorch")
.map(|t| t.as_micros())
.unwrap_or(0);
let tensorflow_us = result
.framework_times
.get("tensorflow")
.map(|t| t.as_micros())
.unwrap_or(0);
let avg_relative = if !result.relative_performance.is_empty() {
result.relative_performance.values().sum::<f64>()
/ result.relative_performance.len() as f64
} else {
0.0
};
println!(
"| {:^12} | {:^8} | {:^15} | {:^15} | {:^15} | {:^15} | {:^15.2} |",
result.operation,
result.size,
if tf_us > 0 {
tf_us.to_string()
} else {
"-".to_string()
},
if numpy_us > 0 {
numpy_us.to_string()
} else {
"-".to_string()
},
if pytorch_us > 0 {
pytorch_us.to_string()
} else {
"-".to_string()
},
if tensorflow_us > 0 {
tensorflow_us.to_string()
} else {
"-".to_string()
},
avg_relative
);
}
println!("{:-<120}", "");
let all_relative_perfs: Vec<f64> = results
.iter()
.flat_map(|r| r.relative_performance.values())
.cloned()
.collect();
if !all_relative_perfs.is_empty() {
let avg_relative = all_relative_perfs.iter().sum::<f64>() / all_relative_perfs.len() as f64;
let best_relative = all_relative_perfs
.iter()
.fold(f64::INFINITY, |a, &b| a.min(b));
let worst_relative = all_relative_perfs.iter().fold(0.0f64, |a, &b| a.max(b));
println!("Summary:");
println!(" Average relative performance: {avg_relative:.2}x");
println!(" Best relative performance: {best_relative:.2}x");
println!(" Worst relative performance: {worst_relative:.2}x");
if avg_relative < 1.0 {
println!(
" 🚀 TenfloweRS is on average {:.2}x faster than other frameworks",
1.0 / avg_relative
);
} else {
println!(
" ⚠️ TenfloweRS is on average {avg_relative:.2}x slower than other frameworks"
);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_framework_availability_check() {
let has_python = check_framework_availability("sys", "python3")
|| check_framework_availability("sys", "python");
println!("Python available: {}", has_python);
}
#[test]
fn test_benchmark_script_generation() {
let script = generate_python_benchmark_script("numpy", "add", 1000, 10);
assert!(script.contains("import numpy"));
assert!(script.contains("np.add"));
let script = generate_python_benchmark_script("pytorch", "mul", 1000, 10);
assert!(script.contains("import torch"));
assert!(script.contains("torch.mul"));
}
#[test]
fn test_framework_comparison_result() {
let mut framework_times = HashMap::new();
framework_times.insert("numpy".to_string(), Duration::from_millis(2));
framework_times.insert("pytorch".to_string(), Duration::from_millis(3));
let result = FrameworkComparisonResult::new(
"add".to_string(),
1000,
Duration::from_millis(1),
framework_times,
);
assert_eq!(result.operation, "add");
assert_eq!(result.size, 1000);
assert!(result.relative_performance.contains_key("numpy"));
assert!(result.relative_performance.contains_key("pytorch"));
assert!(result.relative_performance["numpy"] < 1.0);
assert!(result.relative_performance["pytorch"] < 1.0);
}
}