pub(crate) const GPU_DATASET_THRESHOLD: usize = 4096;
pub(crate) enum GpuDispatch<T> {
Done(T),
FallbackToCpu,
}
pub(crate) fn try_regression_targets_gpu(
data: &[f64],
coef: &[f64],
n_samples: usize,
n_features: usize,
n_informative: usize,
) -> GpuDispatch<Vec<f64>> {
if n_samples.saturating_mul(n_informative) < GPU_DATASET_THRESHOLD {
return GpuDispatch::FallbackToCpu;
}
regression_targets_gpu_inner(data, coef, n_samples, n_features, n_informative)
}
pub(crate) fn try_classification_informative_gpu(
centroids: &[f64],
noise: &[f64],
n_samples: usize,
n_informative: usize,
) -> GpuDispatch<Vec<f64>> {
let n = n_samples.saturating_mul(n_informative);
if n < GPU_DATASET_THRESHOLD {
return GpuDispatch::FallbackToCpu;
}
affine_offset_gpu_inner(centroids, noise, 0.3, n)
}
pub(crate) fn try_blobs_center_gpu(
center_broadcast: &[f64],
noise: &[f64],
n_samples_center: usize,
n_features: usize,
) -> GpuDispatch<Vec<f64>> {
let n = n_samples_center.saturating_mul(n_features);
if n < GPU_DATASET_THRESHOLD {
return GpuDispatch::FallbackToCpu;
}
affine_offset_gpu_inner(center_broadcast, noise, 1.0, n)
}
#[cfg(feature = "gpu_wgpu")]
fn affine_offset_gpu_inner(
base: &[f64],
delta: &[f64],
scale: f32,
n: usize,
) -> GpuDispatch<Vec<f64>> {
use scirs2_core::array_protocol::gpu_ndarray::{global_context, is_gpu_available, GpuNdarray};
use std::sync::Arc;
if base.len() != n || delta.len() != n {
return GpuDispatch::FallbackToCpu;
}
if !is_gpu_available() {
return GpuDispatch::FallbackToCpu;
}
let ctx = match global_context() {
Some(c) => c,
None => return GpuDispatch::FallbackToCpu,
};
let run = || -> Result<Vec<f64>, scirs2_core::gpu::GpuError> {
let base_f32: Vec<f32> = base.iter().map(|&v| v as f32).collect();
let delta_f32: Vec<f32> = delta.iter().map(|&v| v as f32).collect();
let base_gpu = GpuNdarray::<f32>::from_ndarray_data(&base_f32, vec![n], Arc::clone(&ctx))?;
let delta_gpu =
GpuNdarray::<f32>::from_ndarray_data(&delta_f32, vec![n], Arc::clone(&ctx))?;
let scaled = delta_gpu.multiply_by_scalar_f32(scale)?;
let summed = base_gpu.add(&scaled)?;
let host = summed.to_vec()?;
if host.len() != n {
return Err(scirs2_core::gpu::GpuError::Other(format!(
"GPU result length mismatch: got {}, expected {n}",
host.len()
)));
}
Ok(host.into_iter().map(f64::from).collect())
};
match run() {
Ok(v) => GpuDispatch::Done(v),
Err(_) => GpuDispatch::FallbackToCpu,
}
}
#[cfg(not(feature = "gpu_wgpu"))]
fn affine_offset_gpu_inner(
base: &[f64],
delta: &[f64],
scale: f32,
n: usize,
) -> GpuDispatch<Vec<f64>> {
let _ = (base, delta, scale, n);
GpuDispatch::FallbackToCpu
}
#[cfg(feature = "gpu_wgpu")]
fn regression_targets_gpu_inner(
data: &[f64],
coef: &[f64],
n_samples: usize,
n_features: usize,
n_informative: usize,
) -> GpuDispatch<Vec<f64>> {
use scirs2_core::array_protocol::gpu_ndarray::{global_context, is_gpu_available, GpuNdarray};
use std::sync::Arc;
if data.len() != n_samples * n_features || coef.len() < n_informative {
return GpuDispatch::FallbackToCpu;
}
if !is_gpu_available() {
return GpuDispatch::FallbackToCpu;
}
let ctx = match global_context() {
Some(c) => c,
None => return GpuDispatch::FallbackToCpu,
};
let run = || -> Result<Vec<f64>, scirs2_core::gpu::GpuError> {
let mut x_inf = Vec::with_capacity(n_samples * n_informative);
for i in 0..n_samples {
let row = i * n_features;
for j in 0..n_informative {
x_inf.push(data[row + j] as f32);
}
}
let coef_f32: Vec<f32> = coef.iter().take(n_informative).map(|&v| v as f32).collect();
let x_gpu = GpuNdarray::<f32>::from_ndarray_data(
&x_inf,
vec![n_samples, n_informative],
Arc::clone(&ctx),
)?;
let coef_gpu = GpuNdarray::<f32>::from_ndarray_data(
&coef_f32,
vec![n_informative, 1],
Arc::clone(&ctx),
)?;
let prod = x_gpu.matmul(&coef_gpu)?; let host = prod.to_vec()?;
if host.len() != n_samples {
return Err(scirs2_core::gpu::GpuError::Other(format!(
"GPU matmul result length mismatch: got {}, expected {n_samples}",
host.len()
)));
}
Ok(host.into_iter().map(f64::from).collect())
};
match run() {
Ok(v) => GpuDispatch::Done(v),
Err(_) => GpuDispatch::FallbackToCpu,
}
}
#[cfg(not(feature = "gpu_wgpu"))]
fn regression_targets_gpu_inner(
data: &[f64],
coef: &[f64],
n_samples: usize,
n_features: usize,
n_informative: usize,
) -> GpuDispatch<Vec<f64>> {
let _ = (data, coef, n_samples, n_features, n_informative);
GpuDispatch::FallbackToCpu
}