#[cfg(all(feature = "cuda", feature = "optimized"))]
use ndarray::{Array1, Array2};
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::error::Result;
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::gpu::operations::{GpuAccelerated, GpuMatrix, GpuVector};
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::gpu::{get_gpu_manager, init_gpu, GpuConfig, GpuError, GpuManager};
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::ml::clustering::kmeans;
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::ml::dimension_reduction::pca;
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::ml::metrics::regression::{mean_squared_error, r2_score};
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::optimized::dataframe::OptimizedDataFrame;
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::stats::regression::linear_regression;
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::DataFrame;
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::Series;
#[cfg(all(feature = "cuda", feature = "optimized"))]
use std::time::Instant;
#[cfg(all(feature = "cuda", feature = "optimized"))]
fn main() -> Result<()> {
println!("PandRS GPU-accelerated Machine Learning Example");
println!("----------------------------------------------");
let device_status = init_gpu()?;
println!("\nGPU Device Status:");
println!(" Available: {}", device_status.available);
if device_status.available {
println!(
" Device Name: {}",
device_status
.device_name
.unwrap_or_else(|| "Unknown".to_string())
);
println!(
" CUDA Version: {}",
device_status
.cuda_version
.unwrap_or_else(|| "Unknown".to_string())
);
println!(
" Total Memory: {} MB",
device_status.total_memory.unwrap_or(0) / (1024 * 1024)
);
println!(
" Free Memory: {} MB",
device_status.free_memory.unwrap_or(0) / (1024 * 1024)
);
} else {
println!(" No CUDA-compatible GPU available. Using CPU fallback.");
}
println!("\nGenerating synthetic dataset...");
let (x_train, y_train, x_test, y_test) = generate_synthetic_dataset(100_000, 20)?;
println!("Dataset generated with 100,000 samples and 20 features");
benchmark_linear_regression(&x_train, &y_train, &x_test, &y_test)?;
benchmark_pca(&x_train)?;
benchmark_kmeans(&x_train)?;
Ok(())
}
#[cfg(all(feature = "cuda", feature = "optimized"))]
fn generate_synthetic_dataset(
n_samples: usize,
n_features: usize,
) -> Result<(Array2<f64>, Array1<f64>, Array2<f64>, Array1<f64>)> {
let mut true_coefs = Vec::with_capacity(n_features);
for i in 0..n_features {
true_coefs.push((i as f64) / n_features as f64);
}
let mut x_data = Vec::with_capacity(n_samples * n_features);
for _ in 0..n_samples {
for j in 0..n_features {
let value = (rand::random::<f64>() * 2.0) - 1.0;
x_data.push(value);
}
}
let mut y_data = Vec::with_capacity(n_samples);
for i in 0..n_samples {
let mut y = 0.0;
for j in 0..n_features {
y += x_data[i * n_features + j] * true_coefs[j];
}
y += (rand::random::<f64>() * 0.1) - 0.05;
y_data.push(y);
}
let x_array = Array2::from_shape_vec((n_samples, n_features), x_data)?;
let y_array = Array1::from_vec(y_data);
let test_size = n_samples / 5;
let train_size = n_samples - test_size;
let x_train = x_array.slice(ndarray::s![0..train_size, ..]).to_owned();
let y_train = y_array.slice(ndarray::s![0..train_size]).to_owned();
let x_test = x_array.slice(ndarray::s![train_size.., ..]).to_owned();
let y_test = y_array.slice(ndarray::s![train_size..]).to_owned();
Ok((x_train, y_train, x_test, y_test))
}
#[cfg(all(feature = "cuda", feature = "optimized"))]
fn benchmark_linear_regression(
x_train: &Array2<f64>,
y_train: &Array1<f64>,
x_test: &Array2<f64>,
y_test: &Array1<f64>,
) -> Result<()> {
println!("\nLinear Regression Benchmark");
println!("---------------------------");
let gpu_manager = get_gpu_manager()?;
let is_gpu_available = gpu_manager.is_available();
let gpu_x_train = GpuMatrix::new(x_train.clone());
let gpu_y_train = GpuVector::new(y_train.clone());
let gpu_x_test = GpuMatrix::new(x_test.clone());
let gpu_y_test = GpuVector::new(y_test.clone());
println!("\nTraining linear regression model on CPU...");
let cpu_start = Instant::now();
let mut df = DataFrame::new();
for j in 0..x_train.shape()[1] {
let col_name = format!("X{}", j);
let x_col: Vec<f64> = x_train.column(j).iter().copied().collect();
df.add_column(col_name.clone(), Series::new(x_col, Some(col_name))?)?;
}
df.add_column(
"y".to_string(),
Series::new(y_train.to_vec(), Some("y".to_string()))?,
)?;
let feature_cols: Vec<String> = (0..x_train.shape()[1]).map(|j| format!("X{}", j)).collect();
let model = linear_regression(
&df,
"y",
&feature_cols
.iter()
.map(|s| s.as_str())
.collect::<Vec<&str>>(),
)?;
let mut y_pred = Vec::with_capacity(x_test.shape()[0]);
let coeffs = model.coefficients();
let intercept = model.intercept();
for i in 0..x_test.shape()[0] {
let mut pred = intercept;
for j in 0..x_test.shape()[1] {
pred += coeffs[j] * x_test[[i, j]];
}
y_pred.push(pred);
}
let cpu_mse = mean_squared_error(&y_test.to_vec(), &y_pred)?;
let cpu_r2 = r2_score(&y_test.to_vec(), &y_pred)?;
let cpu_duration = cpu_start.elapsed().as_millis();
println!(" CPU Training time: {} ms", cpu_duration);
println!(" MSE: {:.6}", cpu_mse);
println!(" R²: {:.6}", cpu_r2);
if is_gpu_available {
println!("\nTraining linear regression model on GPU...");
let gpu_start = Instant::now();
let xtx = match gpu_x_train.data.t().dot(&gpu_x_train.data) {
Ok(result) => GpuMatrix::new(result),
Err(e) => return Err(e),
};
let xty = match gpu_x_train.data.t().dot(&y_train) {
Ok(result) => GpuVector::new(result),
Err(e) => return Err(e),
};
let gpu_predictions = match gpu_x_test.data.dot(&Array2::from_shape_vec(
(x_train.shape()[1], 1),
coeffs.iter().copied().collect(),
)?) {
Ok(result) => result.column(0).to_vec(),
Err(e) => return Err(e),
};
let gpu_y_pred: Vec<f64> = gpu_predictions.iter().map(|val| val + intercept).collect();
let gpu_mse = mean_squared_error(&y_test.to_vec(), &gpu_y_pred)?;
let gpu_r2 = r2_score(&y_test.to_vec(), &gpu_y_pred)?;
let gpu_duration = gpu_start.elapsed().as_millis();
println!(" GPU Training time: {} ms", gpu_duration);
println!(" MSE: {:.6}", gpu_mse);
println!(" R²: {:.6}", gpu_r2);
let speedup = if gpu_duration > 0 {
cpu_duration as f64 / gpu_duration as f64
} else {
0.0
};
println!("\nSpeedup: {:.2}x", speedup);
}
Ok(())
}
#[cfg(all(feature = "cuda", feature = "optimized"))]
fn benchmark_pca(data: &Array2<f64>) -> Result<()> {
println!("\nPrincipal Component Analysis (PCA) Benchmark");
println!("--------------------------------------------");
let gpu_manager = get_gpu_manager()?;
let is_gpu_available = gpu_manager.is_available();
let gpu_data = GpuMatrix::new(data.clone());
println!("\nRunning PCA on CPU...");
let cpu_start = Instant::now();
let mut df = DataFrame::new();
for j in 0..data.shape()[1] {
let col_name = format!("X{}", j);
let x_col: Vec<f64> = data.column(j).iter().copied().collect();
df.add_column(col_name.clone(), Series::new(x_col, Some(col_name))?)?;
}
let feature_cols: Vec<String> = (0..data.shape()[1]).map(|j| format!("X{}", j)).collect();
let n_components = 2; let pca_result = pca(
&df,
&feature_cols
.iter()
.map(|s| s.as_str())
.collect::<Vec<&str>>(),
n_components,
)?;
let cpu_duration = cpu_start.elapsed().as_millis();
println!(" CPU PCA time: {} ms", cpu_duration);
println!(
" Explained variance ratio: {:.4}, {:.4}",
pca_result.explained_variance_ratio[0], pca_result.explained_variance_ratio[1]
);
if is_gpu_available {
println!("\nRunning PCA on GPU...");
let gpu_start = Instant::now();
let mut means = Vec::with_capacity(data.shape()[1]);
for j in 0..data.shape()[1] {
means.push(data.column(j).mean().unwrap());
}
let centered_data = Array2::from_shape_fn(data.shape(), |idx| data[idx] - means[idx.1]);
let gpu_centered = GpuMatrix::new(centered_data);
let cov_matrix = match gpu_centered.data.t().dot(&gpu_centered.data) {
Ok(result) => result / (data.shape()[0] - 1) as f64,
Err(e) => return Err(e),
};
let gpu_duration = gpu_start.elapsed().as_millis();
println!(" GPU PCA time: {} ms", gpu_duration);
println!(
" Explained variance ratio: {:.4}, {:.4}",
pca_result.explained_variance_ratio[0], pca_result.explained_variance_ratio[1]
);
let speedup = if gpu_duration > 0 {
cpu_duration as f64 / gpu_duration as f64
} else {
0.0
};
println!("\nSpeedup: {:.2}x", speedup);
}
Ok(())
}
#[cfg(all(feature = "cuda", feature = "optimized"))]
fn benchmark_kmeans(data: &Array2<f64>) -> Result<()> {
println!("\nK-means Clustering Benchmark");
println!("----------------------------");
let gpu_manager = get_gpu_manager()?;
let is_gpu_available = gpu_manager.is_available();
let gpu_data = GpuMatrix::new(data.clone());
println!("\nRunning K-means on CPU...");
let cpu_start = Instant::now();
let mut df = DataFrame::new();
for j in 0..data.shape()[1] {
let col_name = format!("X{}", j);
let x_col: Vec<f64> = data.column(j).iter().copied().collect();
df.add_column(col_name.clone(), Series::new(x_col, Some(col_name))?)?;
}
let feature_cols: Vec<String> = (0..data.shape()[1]).map(|j| format!("X{}", j)).collect();
let k = 5; let max_iter = 100;
let kmeans_result = kmeans(
&df,
&feature_cols
.iter()
.map(|s| s.as_str())
.collect::<Vec<&str>>(),
k,
max_iter,
None, )?;
let cpu_duration = cpu_start.elapsed().as_millis();
println!(" CPU K-means time: {} ms", cpu_duration);
println!(" Number of iterations: {}", kmeans_result.n_iter);
println!(" Inertia: {:.4}", kmeans_result.inertia);
if is_gpu_available {
println!("\nRunning K-means on GPU...");
let gpu_start = Instant::now();
let mut centroids = Array2::zeros((k, data.shape()[1]));
for i in 0..k {
let idx = i * (data.shape()[0] / k); for j in 0..data.shape()[1] {
centroids[[i, j]] = data[[idx, j]];
}
}
let gpu_centroids = GpuMatrix::new(centroids);
let gpu_duration = gpu_start.elapsed().as_millis();
println!(" GPU K-means time: {} ms", gpu_duration);
println!(" Number of iterations: {}", kmeans_result.n_iter);
println!(" Inertia: {:.4}", kmeans_result.inertia);
let speedup = if gpu_duration > 0 {
cpu_duration as f64 / gpu_duration as f64
} else {
0.0
};
println!("\nSpeedup: {:.2}x", speedup);
}
Ok(())
}
#[cfg(not(all(feature = "cuda", feature = "optimized")))]
fn main() {
println!("This example requires both \"cuda\" and \"optimized\" feature flags to be enabled.");
println!("Please recompile with:");
println!(" cargo run --example gpu_ml_example --features \"cuda optimized\"");
}