pub use crate::gpu::*;
#[cfg(feature = "gpu")]
pub use scirs2_core::gpu::{GpuBackend, GpuBuffer, GpuContext, GpuDataType, GpuKernelHandle};
#[cfg(feature = "gpu")]
pub use scirs2_core::GpuError;
#[cfg(not(feature = "gpu"))]
pub trait GpuDataType: Copy + Send + Sync + 'static {}
#[cfg(not(feature = "gpu"))]
impl GpuDataType for f32 {}
#[cfg(not(feature = "gpu"))]
impl GpuDataType for f64 {}
#[cfg(not(feature = "gpu"))]
impl GpuDataType for i32 {}
#[cfg(not(feature = "gpu"))]
impl GpuDataType for i64 {}
#[cfg(not(feature = "gpu"))]
impl GpuDataType for u32 {}
#[cfg(not(feature = "gpu"))]
impl GpuDataType for u64 {}
#[cfg(not(feature = "gpu"))]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum GpuBackend {
#[default]
Cpu,
Cuda,
OpenCL,
Metal,
Rocm,
Wgpu,
Vulkan,
}
#[cfg(not(feature = "gpu"))]
#[derive(Debug, Clone)]
pub struct GpuError(String);
#[cfg(not(feature = "gpu"))]
impl GpuError {
pub fn new(msg: &str) -> Self {
Self(msg.to_string())
}
pub fn invalid_buffer(msg: String) -> Self {
Self(msg)
}
pub fn invalid_parameter(msg: String) -> Self {
Self(msg)
}
pub fn kernel_compilation_error(msg: String) -> Self {
Self(msg)
}
pub fn other(msg: String) -> Self {
Self(msg)
}
}
#[cfg(not(feature = "gpu"))]
impl std::fmt::Display for GpuError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
#[cfg(not(feature = "gpu"))]
impl std::error::Error for GpuError {}
#[cfg(not(feature = "gpu"))]
pub struct GpuBuffer<T> {
data: Vec<T>,
}
#[cfg(not(feature = "gpu"))]
impl<T: Clone + Copy> GpuBuffer<T> {
pub fn from_vec(data: Vec<T>) -> Self {
Self { data }
}
pub fn as_slice(&self) -> &[T] {
&self.data
}
pub fn as_mut_slice(&mut self) -> &mut [T] {
&mut self.data
}
pub fn to_vec(&self) -> Vec<T> {
self.data.clone()
}
pub fn to_host(&self) -> Result<Vec<T>, GpuError> {
Ok(self.data.clone())
}
pub fn len(&self) -> usize {
self.data.len()
}
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
}
#[cfg(not(feature = "gpu"))]
#[derive(Debug, Clone)]
pub struct GpuKernelHandle;
#[cfg(not(feature = "gpu"))]
pub struct GpuDevice {
backend: GpuBackend,
}
#[cfg(not(feature = "gpu"))]
impl GpuDevice {
pub fn new(_backend: GpuBackend) -> Result<Self, GpuError> {
Ok(Self {
backend: GpuBackend::Cpu,
})
}
pub fn get_default(_backend: GpuBackend) -> Result<Self, GpuError> {
Self::new(_backend)
}
pub fn backend(&self) -> GpuBackend {
self.backend
}
pub fn create_buffer<T>(&self, data: &[T]) -> Result<GpuBuffer<T>, GpuError>
where
T: Clone + Copy,
{
Ok(GpuBuffer {
data: data.to_vec(),
})
}
pub fn create_buffer_zeros<T>(&self, size: usize) -> Result<GpuBuffer<T>, GpuError>
where
T: Clone + Copy + Default,
{
Ok(GpuBuffer {
data: vec![T::default(); size],
})
}
}
pub use crate::gpu::{BackendInfo, GpuSpMatVec, OptimizationHint};
pub use crate::gpu::convenience::{available_backends, gpu_spmv, gpu_spmv_optimized};
pub struct AdvancedGpuOps {
gpu_handler: GpuSpMatVec,
}
#[derive(Debug, Clone)]
pub struct GpuKernelScheduler {
backend: GpuBackend,
}
#[derive(Debug, Clone)]
pub struct GpuMemoryManager {
backend: GpuBackend,
}
#[derive(Debug, Clone)]
pub struct GpuOptions {
pub backend: GpuBackend,
pub optimization: OptimizationHint,
}
#[derive(Debug, Clone)]
pub struct GpuProfiler {
enabled: bool,
}
pub struct OptimizedGpuOps {
gpu_handler: GpuSpMatVec,
}
use crate::csr_array::CsrArray;
use crate::error::SparseResult;
use scirs2_core::ndarray::{Array1, ArrayView1};
use scirs2_core::numeric::{Float, SparseElement};
use std::fmt::Debug;
#[allow(dead_code)]
pub fn gpu_sparse_matvec<T>(
matrix: &CsrArray<T>,
vector: &ArrayView1<T>,
backend: Option<GpuBackend>,
) -> SparseResult<Array1<T>>
where
T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
{
let gpu_handler = if let Some(backend) = backend {
GpuSpMatVec::with_backend(backend)?
} else {
GpuSpMatVec::new()?
};
#[cfg(all(target_os = "macos", feature = "gpu"))]
let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
Some(GpuDevice::new(GpuBackend::Metal, 0))
} else {
None
};
#[cfg(all(target_os = "macos", not(feature = "gpu")))]
let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
GpuDevice::new(GpuBackend::Metal).ok()
} else {
None
};
#[cfg(not(target_os = "macos"))]
let device = None;
gpu_handler.spmv(matrix, vector, device.as_ref())
}
#[allow(dead_code)]
pub fn gpu_sym_sparse_matvec<T>(
matrix: &CsrArray<T>,
vector: &ArrayView1<T>,
backend: Option<GpuBackend>,
) -> SparseResult<Array1<T>>
where
T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
{
let gpu_handler = if let Some(backend) = backend {
GpuSpMatVec::with_backend(backend)?
} else {
GpuSpMatVec::new()?
};
#[cfg(all(target_os = "macos", feature = "gpu"))]
let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
Some(GpuDevice::new(GpuBackend::Metal, 0))
} else {
None
};
#[cfg(all(target_os = "macos", not(feature = "gpu")))]
let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
GpuDevice::new(GpuBackend::Metal).ok()
} else {
None
};
#[cfg(not(target_os = "macos"))]
let device = None;
gpu_handler.spmv(matrix, vector, device.as_ref())
}
#[allow(dead_code)]
pub fn gpu_advanced_spmv<T>(
matrix: &CsrArray<T>,
vector: &ArrayView1<T>,
backend: Option<GpuBackend>,
optimization: OptimizationHint,
) -> SparseResult<Array1<T>>
where
T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
{
let gpu_handler = if let Some(backend) = backend {
GpuSpMatVec::with_backend(backend)?
} else {
GpuSpMatVec::new()?
};
#[cfg(all(target_os = "macos", feature = "gpu"))]
let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
Some(GpuDevice::new(GpuBackend::Metal, 0))
} else {
None
};
#[cfg(all(target_os = "macos", not(feature = "gpu")))]
let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
GpuDevice::new(GpuBackend::Metal).ok()
} else {
None
};
#[cfg(not(target_os = "macos"))]
let device = None;
gpu_handler.spmv_optimized(matrix, vector, device.as_ref(), optimization)
}
#[allow(dead_code)]
pub struct SpMVKernel {
gpu_handler: GpuSpMatVec,
}
impl SpMVKernel {
pub fn new(_device: &GpuDevice, _workgroupsize: [u32; 3]) -> Result<Self, GpuError> {
let gpu_handler = GpuSpMatVec::new().map_err(|e| {
#[cfg(feature = "gpu")]
return GpuError::Other(format!("{:?}", e));
#[cfg(not(feature = "gpu"))]
return GpuError::other(format!("{:?}", e));
})?;
Ok(Self { gpu_handler })
}
pub fn execute<T>(
&self,
matrix: &CsrArray<T>,
vector: &ArrayView1<T>,
device: &GpuDevice,
) -> Result<Array1<T>, GpuError>
where
T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
{
self.gpu_handler
.spmv(matrix, vector, Some(device))
.map_err(|e| {
#[cfg(feature = "gpu")]
return GpuError::Other(format!("{:?}", e));
#[cfg(not(feature = "gpu"))]
return GpuError::other(format!("{:?}", e));
})
}
}
pub trait GpuBufferExt<T: GpuDataType> {
fn to_host(&self) -> Result<Vec<T>, GpuError>;
fn to_host_range(&self, range: std::ops::Range<usize>) -> Result<Vec<T>, GpuError>;
}
impl<T: GpuDataType> GpuBufferExt<T> for GpuBuffer<T> {
fn to_host(&self) -> Result<Vec<T>, GpuError> {
Ok(self.to_vec())
}
fn to_host_range(&self, range: std::ops::Range<usize>) -> Result<Vec<T>, GpuError> {
let full_data = self.to_vec();
if range.end <= full_data.len() {
Ok(full_data[range].to_vec())
} else {
#[cfg(feature = "gpu")]
return Err(GpuError::InvalidParameter(
"Range out of bounds".to_string(),
));
#[cfg(not(feature = "gpu"))]
return Err(GpuError::invalid_parameter(
"Range out of bounds".to_string(),
));
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::Array1;
#[test]
fn test_backward_compatibility_gpu_sparse_matvec() {
let data = vec![1.0, 2.0, 3.0, 4.0];
let indices = vec![0, 1, 0, 1];
let indptr = vec![0, 2, 4];
let matrix = CsrArray::new(data.into(), indices.into(), indptr.into(), (2, 2))
.expect("Operation failed");
let vector = Array1::from_vec(vec![1.0, 2.0]);
let result = gpu_sparse_matvec(&matrix, &vector.view(), None);
if let Err(e) = &result {
eprintln!("Error from gpu_sparse_matvec: {:?}", e);
let error_msg = format!("{:?}", e);
assert!(
error_msg.contains("GPU device required")
|| error_msg.contains("not initialized")
|| error_msg.contains("not available"),
"Unexpected error: {:?}",
e
);
} else {
assert!(result.is_ok());
}
let result = gpu_sparse_matvec(&matrix, &vector.view(), Some(GpuBackend::Cpu));
if let Err(e) = &result {
eprintln!("Error from gpu_sparse_matvec with CPU backend: {:?}", e);
}
assert!(result.is_ok(), "CPU backend should always work");
}
#[test]
fn test_gpu_spmv_kernel() {
#[cfg(feature = "gpu")]
let device = scirs2_core::gpu::GpuDevice::new(GpuBackend::Cpu, 0);
#[cfg(not(feature = "gpu"))]
let device = GpuDevice::new(GpuBackend::Cpu).expect("Operation failed");
let kernel = SpMVKernel::new(&device, [1, 1, 1]);
assert!(kernel.is_ok());
}
#[test]
fn test_gpu_buffer_ext() {
#[cfg(not(feature = "gpu"))]
{
let buffer = GpuBuffer {
data: vec![1.0, 2.0, 3.0, 4.0],
};
let host_data = buffer.to_host().expect("Operation failed");
assert_eq!(host_data, vec![1.0, 2.0, 3.0, 4.0]);
let range_data = buffer.to_host_range(1..3).expect("Operation failed");
assert_eq!(range_data, vec![2.0, 3.0]);
}
}
#[test]
fn test_gpu_data_types() {
fn is_gpu_data_type<T: GpuDataType>() {}
is_gpu_data_type::<f32>();
is_gpu_data_type::<f64>();
is_gpu_data_type::<u32>();
is_gpu_data_type::<u64>();
is_gpu_data_type::<i32>();
is_gpu_data_type::<i64>();
}
#[test]
fn test_gpu_backend_enum() {
#[cfg(not(feature = "gpu"))]
let backends = [
GpuBackend::Cpu,
GpuBackend::Cuda,
GpuBackend::OpenCL,
GpuBackend::Metal,
GpuBackend::Rocm,
GpuBackend::Wgpu,
GpuBackend::Vulkan,
];
#[cfg(feature = "gpu")]
let backends = [
GpuBackend::Cpu,
GpuBackend::Cuda,
GpuBackend::OpenCL,
GpuBackend::Metal,
GpuBackend::Rocm,
GpuBackend::Wgpu,
];
for backend in &backends {
match backend {
GpuBackend::Cpu => (),
GpuBackend::Cuda => (),
GpuBackend::OpenCL => (),
GpuBackend::Metal => (),
GpuBackend::Rocm => (),
GpuBackend::Wgpu => (),
#[cfg(not(feature = "gpu"))]
GpuBackend::Vulkan => (),
}
}
}
#[test]
fn test_available_backends() {
let backends = available_backends();
assert!(!backends.is_empty());
assert!(backends.contains(&GpuBackend::Cpu)); }
}