use scirs2_core::array_protocol::{self, add, matmul, NdarrayWrapper};
#[cfg(feature = "gpu")]
use scirs2_core::array_protocol::{GPUBackend, GPUConfig, GPUNdarray};
use scirs2_core::ndarray_ext::{Array2, Ix2};
#[allow(dead_code)]
fn main() {
array_protocol::init();
println!("Array Protocol GPU Example");
println!("==========================");
let a = Array2::<f64>::eye(3);
let b = Array2::<f64>::ones((3, 3));
let c = Array2::<f64>::from_elem((3, 3), 2.0);
println!("\nOriginal arrays:");
println!("A =\n{}", a);
println!("B =\n{}", b);
println!("C =\n{}", c);
let wrapped_a = NdarrayWrapper::new(a.clone());
let wrapped_b = NdarrayWrapper::new(b.clone());
let wrapped_c = NdarrayWrapper::new(c.clone());
println!("\n1. Operations on CPU:");
match matmul(&wrapped_a, &wrapped_b) {
Ok(result) => {
if let Some(result_array) = result.as_any().downcast_ref::<NdarrayWrapper<f64, Ix2>>() {
println!("CPU: A * B =\n{}", result_array.as_array());
}
}
Err(e) => println!("Error in CPU matrix multiplication: {}", e),
}
match add(&wrapped_a, &wrapped_c) {
Ok(result) => {
if let Some(result_array) = result.as_any().downcast_ref::<NdarrayWrapper<f64, Ix2>>() {
println!("CPU: A + C =\n{}", result_array.as_array());
}
}
Err(e) => println!("Error in CPU addition: {}", e),
}
#[cfg(feature = "gpu")]
{
println!("\n2. Operations with CUDA GPU backend:");
let cuda_config = GPUConfig {
backend: GPUBackend::CUDA,
device_id: 0,
async_ops: false,
mixed_precision: false,
memory_fraction: 0.8,
};
let gpu_a = GPUNdarray::new(a.clone(), cuda_config.clone());
let gpu_b = GPUNdarray::new(b.clone(), cuda_config.clone());
let gpu_c = GPUNdarray::new(c.clone(), cuda_config.clone());
println!(
"Created GPU arrays with shapes {:?}, {:?}, and {:?}",
gpu_a.shape(),
gpu_b.shape(),
gpu_c.shape()
);
match matmul(&gpu_a, &gpu_b) {
Ok(result) => {
if let Some(gpu_result) = result.as_any().downcast_ref::<GPUNdarray<f64, Ix2>>() {
println!("CUDA GPU: A * B shape: {:?}", gpu_result.shape());
println!("CUDA GPU: A * B completed successfully");
}
}
Err(e) => println!("Error in CUDA GPU matrix multiplication: {}", e),
}
match add(&gpu_a, &gpu_c) {
Ok(result) => {
if let Some(gpu_result) = result.as_any().downcast_ref::<GPUNdarray<f64, Ix2>>() {
println!("CUDA GPU: A + C shape: {:?}", gpu_result.shape());
println!("CUDA GPU: A + C completed successfully");
}
}
Err(e) => println!("Error in CUDA GPU addition: {}", e),
}
println!("\n3. Using AutoDevice for automatic device selection:");
println!("Note: AutoDevice API requires trait bounds not satisfied in this example");
println!("Skipping AutoDevice demonstration");
println!("\n4. Performance comparison:");
let size = 500;
let large_a = Array2::<f64>::eye(size);
let large_b = Array2::<f64>::ones((size, size));
let cpu_large_a = NdarrayWrapper::new(large_a.clone());
let cpu_large_b = NdarrayWrapper::new(large_b.clone());
let gpu_large_a = GPUNdarray::new(large_a.clone(), cuda_config.clone());
let gpu_large_b = GPUNdarray::new(large_b.clone(), cuda_config.clone());
println!(
"Starting CPU matrix multiplication for {}x{} matrices...",
size, size
);
let cpu_start = std::time::Instant::now();
let _ = matmul(&cpu_large_a, &cpu_large_b);
let cpu_duration = cpu_start.elapsed();
println!("CPU computation time: {:?}", cpu_duration);
println!(
"Starting GPU matrix multiplication for {}x{} matrices...",
size, size
);
let gpu_start = std::time::Instant::now();
let _ = matmul(&gpu_large_a, &gpu_large_b);
let gpu_duration = gpu_start.elapsed();
println!("GPU computation time: {:?}", gpu_duration);
println!(
"\nGPU speedup: {:.2}x",
cpu_duration.as_secs_f64() / gpu_duration.as_secs_f64()
);
println!("\nArray Protocol GPU examples completed successfully!");
}
#[cfg(not(feature = "gpu"))]
{
println!("\n2. GPU support is not enabled.");
println!("To run GPU examples, compile with: cargo run --example array_protocol_gpu --features gpu");
}
}