#![cfg(feature = "cuda")]
use cudarc::cusparse::sys as csys;
use cudarc::driver::{DevicePtr, DevicePtrMut};
use crate::buffer::CudaBuffer;
use crate::device::GpuDevice;
use crate::error::{GpuError, GpuResult};
use crate::transfer::{alloc_zeros_f32, alloc_zeros_f64, cpu_to_gpu};
#[derive(Debug)]
pub struct CusparseHandle {
inner: csys::cusparseHandle_t,
}
unsafe impl Send for CusparseHandle {}
unsafe impl Sync for CusparseHandle {}
impl CusparseHandle {
pub fn new() -> GpuResult<Self> {
let inner = cudarc::cusparse::result::create().map_err(|e| GpuError::InvalidState {
message: format!("cusparseCreate failed: {e:?}"),
})?;
Ok(Self { inner })
}
#[inline]
pub fn raw(&self) -> csys::cusparseHandle_t {
self.inner
}
}
impl Drop for CusparseHandle {
fn drop(&mut self) {
unsafe {
let _ = cudarc::cusparse::result::destroy(self.inner);
}
}
}
fn check(status: csys::cusparseStatus_t, op: &'static str) -> GpuResult<()> {
if status == csys::cusparseStatus_t::CUSPARSE_STATUS_SUCCESS {
Ok(())
} else {
Err(GpuError::InvalidState {
message: format!("{op} returned cuSPARSE status {status:?}"),
})
}
}
fn set_stream(handle: &CusparseHandle, device: &GpuDevice) -> GpuResult<()> {
let stream = device.stream();
let cu_stream_ptr = stream.cu_stream() as *mut csys::CUstream_st;
let status =
unsafe { csys::cusparseSetStream(handle.raw(), cu_stream_ptr as csys::cudaStream_t) };
check(status, "cusparseSetStream")
}
#[allow(clippy::too_many_arguments)]
pub fn gpu_spmm_csr_f32(
handle: &CusparseHandle,
crow_indices: &[u32],
col_indices: &[u32],
values: &[f32],
dense: &CudaBuffer<f32>,
m: usize,
k: usize,
n: usize,
device: &GpuDevice,
) -> GpuResult<CudaBuffer<f32>> {
if crow_indices.len() != m + 1 {
return Err(GpuError::ShapeMismatch {
op: "spmm_csr_f32",
expected: vec![m + 1],
got: vec![crow_indices.len()],
});
}
if col_indices.len() != values.len() {
return Err(GpuError::ShapeMismatch {
op: "spmm_csr_f32",
expected: vec![values.len()],
got: vec![col_indices.len()],
});
}
if dense.len() != k * n {
return Err(GpuError::ShapeMismatch {
op: "spmm_csr_f32",
expected: vec![k, n],
got: vec![dense.len()],
});
}
if m == 0 || n == 0 {
return alloc_zeros_f32(m * n, device);
}
let nnz = values.len();
if nnz == 0 || k == 0 {
return alloc_zeros_f32(m * n, device);
}
set_stream(handle, device)?;
let mut d_crow = cpu_to_gpu(crow_indices, device)?;
let mut d_col = cpu_to_gpu(col_indices, device)?;
let mut d_vals = cpu_to_gpu(values, device)?;
let mut out = alloc_zeros_f32(m * n, device)?;
let stream = device.stream();
let mut sp_mat: csys::cusparseSpMatDescr_t = std::ptr::null_mut();
let mut dn_b: csys::cusparseDnMatDescr_t = std::ptr::null_mut();
let mut dn_c: csys::cusparseDnMatDescr_t = std::ptr::null_mut();
let m_i64 = i64::try_from(m).map_err(|_| GpuError::ShapeMismatch {
op: "spmm_csr_f32",
expected: vec![i64::MAX as usize],
got: vec![m],
})?;
let k_i64 = i64::try_from(k).map_err(|_| GpuError::ShapeMismatch {
op: "spmm_csr_f32",
expected: vec![i64::MAX as usize],
got: vec![k],
})?;
let n_i64 = i64::try_from(n).map_err(|_| GpuError::ShapeMismatch {
op: "spmm_csr_f32",
expected: vec![i64::MAX as usize],
got: vec![n],
})?;
let nnz_i64 = i64::try_from(nnz).map_err(|_| GpuError::ShapeMismatch {
op: "spmm_csr_f32",
expected: vec![i64::MAX as usize],
got: vec![nnz],
})?;
let alpha: f32 = 1.0;
let beta: f32 = 0.0;
let result = (|| -> GpuResult<()> {
let (crow_ptr, _crow_sync) = d_crow.inner_mut().device_ptr_mut(&stream);
let (col_ptr, _col_sync) = d_col.inner_mut().device_ptr_mut(&stream);
let (vals_ptr, _vals_sync) = d_vals.inner_mut().device_ptr_mut(&stream);
let (dense_ptr, _dense_sync) = dense.inner().device_ptr(&stream);
let (out_ptr, _out_sync) = out.inner_mut().device_ptr_mut(&stream);
let status = unsafe {
csys::cusparseCreateCsr(
&mut sp_mat,
m_i64,
k_i64,
nnz_i64,
crow_ptr as *mut std::ffi::c_void,
col_ptr as *mut std::ffi::c_void,
vals_ptr as *mut std::ffi::c_void,
csys::cusparseIndexType_t::CUSPARSE_INDEX_32I,
csys::cusparseIndexType_t::CUSPARSE_INDEX_32I,
csys::cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO,
csys::cudaDataType_t::CUDA_R_32F,
)
};
check(status, "cusparseCreateCsr")?;
let status = unsafe {
csys::cusparseCreateDnMat(
&mut dn_b,
k_i64,
n_i64,
n_i64,
dense_ptr as *mut std::ffi::c_void,
csys::cudaDataType_t::CUDA_R_32F,
csys::cusparseOrder_t::CUSPARSE_ORDER_ROW,
)
};
check(status, "cusparseCreateDnMat (B)")?;
let status = unsafe {
csys::cusparseCreateDnMat(
&mut dn_c,
m_i64,
n_i64,
n_i64,
out_ptr as *mut std::ffi::c_void,
csys::cudaDataType_t::CUDA_R_32F,
csys::cusparseOrder_t::CUSPARSE_ORDER_ROW,
)
};
check(status, "cusparseCreateDnMat (C)")?;
let mut buffer_size: usize = 0;
let status = unsafe {
csys::cusparseSpMM_bufferSize(
handle.raw(),
csys::cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE,
csys::cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE,
std::ptr::from_ref::<f32>(&alpha).cast::<std::ffi::c_void>(),
sp_mat,
dn_b,
std::ptr::from_ref::<f32>(&beta).cast::<std::ffi::c_void>(),
dn_c,
csys::cudaDataType_t::CUDA_R_32F,
csys::cusparseSpMMAlg_t::CUSPARSE_SPMM_ALG_DEFAULT,
&mut buffer_size,
)
};
check(status, "cusparseSpMM_bufferSize")?;
let workspace_bytes = buffer_size;
let mut workspace_slice = if workspace_bytes > 0 {
Some(stream.alloc_zeros::<u8>(workspace_bytes)?)
} else {
None
};
let workspace_ptr = match workspace_slice.as_mut() {
Some(s) => {
let (p, _sync) = s.device_ptr_mut(&stream);
p as *mut std::ffi::c_void
}
None => std::ptr::null_mut(),
};
let status = unsafe {
csys::cusparseSpMM(
handle.raw(),
csys::cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE,
csys::cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE,
std::ptr::from_ref::<f32>(&alpha).cast::<std::ffi::c_void>(),
sp_mat,
dn_b,
std::ptr::from_ref::<f32>(&beta).cast::<std::ffi::c_void>(),
dn_c,
csys::cudaDataType_t::CUDA_R_32F,
csys::cusparseSpMMAlg_t::CUSPARSE_SPMM_ALG_DEFAULT,
workspace_ptr,
)
};
check(status, "cusparseSpMM")?;
drop(workspace_slice);
Ok(())
})();
unsafe {
if !dn_c.is_null() {
let _ = csys::cusparseDestroyDnMat(dn_c);
}
if !dn_b.is_null() {
let _ = csys::cusparseDestroyDnMat(dn_b);
}
if !sp_mat.is_null() {
let _ = csys::cusparseDestroySpMat(sp_mat);
}
}
result?;
Ok(out)
}
#[allow(clippy::too_many_arguments)]
pub fn gpu_spmm_csr_f64(
handle: &CusparseHandle,
crow_indices: &[u32],
col_indices: &[u32],
values: &[f64],
dense: &CudaBuffer<f64>,
m: usize,
k: usize,
n: usize,
device: &GpuDevice,
) -> GpuResult<CudaBuffer<f64>> {
if crow_indices.len() != m + 1 {
return Err(GpuError::ShapeMismatch {
op: "spmm_csr_f64",
expected: vec![m + 1],
got: vec![crow_indices.len()],
});
}
if col_indices.len() != values.len() {
return Err(GpuError::ShapeMismatch {
op: "spmm_csr_f64",
expected: vec![values.len()],
got: vec![col_indices.len()],
});
}
if dense.len() != k * n {
return Err(GpuError::ShapeMismatch {
op: "spmm_csr_f64",
expected: vec![k, n],
got: vec![dense.len()],
});
}
if m == 0 || n == 0 {
return alloc_zeros_f64(m * n, device);
}
let nnz = values.len();
if nnz == 0 || k == 0 {
return alloc_zeros_f64(m * n, device);
}
set_stream(handle, device)?;
let mut d_crow = cpu_to_gpu(crow_indices, device)?;
let mut d_col = cpu_to_gpu(col_indices, device)?;
let mut d_vals = cpu_to_gpu(values, device)?;
let mut out = alloc_zeros_f64(m * n, device)?;
let stream = device.stream();
let mut sp_mat: csys::cusparseSpMatDescr_t = std::ptr::null_mut();
let mut dn_b: csys::cusparseDnMatDescr_t = std::ptr::null_mut();
let mut dn_c: csys::cusparseDnMatDescr_t = std::ptr::null_mut();
let m_i64 = i64::try_from(m).map_err(|_| GpuError::ShapeMismatch {
op: "spmm_csr_f64",
expected: vec![i64::MAX as usize],
got: vec![m],
})?;
let k_i64 = i64::try_from(k).map_err(|_| GpuError::ShapeMismatch {
op: "spmm_csr_f64",
expected: vec![i64::MAX as usize],
got: vec![k],
})?;
let n_i64 = i64::try_from(n).map_err(|_| GpuError::ShapeMismatch {
op: "spmm_csr_f64",
expected: vec![i64::MAX as usize],
got: vec![n],
})?;
let nnz_i64 = i64::try_from(nnz).map_err(|_| GpuError::ShapeMismatch {
op: "spmm_csr_f64",
expected: vec![i64::MAX as usize],
got: vec![nnz],
})?;
let alpha: f64 = 1.0;
let beta: f64 = 0.0;
let result = (|| -> GpuResult<()> {
let (crow_ptr, _crow_sync) = d_crow.inner_mut().device_ptr_mut(&stream);
let (col_ptr, _col_sync) = d_col.inner_mut().device_ptr_mut(&stream);
let (vals_ptr, _vals_sync) = d_vals.inner_mut().device_ptr_mut(&stream);
let (dense_ptr, _dense_sync) = dense.inner().device_ptr(&stream);
let (out_ptr, _out_sync) = out.inner_mut().device_ptr_mut(&stream);
let status = unsafe {
csys::cusparseCreateCsr(
&mut sp_mat,
m_i64,
k_i64,
nnz_i64,
crow_ptr as *mut std::ffi::c_void,
col_ptr as *mut std::ffi::c_void,
vals_ptr as *mut std::ffi::c_void,
csys::cusparseIndexType_t::CUSPARSE_INDEX_32I,
csys::cusparseIndexType_t::CUSPARSE_INDEX_32I,
csys::cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO,
csys::cudaDataType_t::CUDA_R_64F,
)
};
check(status, "cusparseCreateCsr (f64)")?;
let status = unsafe {
csys::cusparseCreateDnMat(
&mut dn_b,
k_i64,
n_i64,
n_i64,
dense_ptr as *mut std::ffi::c_void,
csys::cudaDataType_t::CUDA_R_64F,
csys::cusparseOrder_t::CUSPARSE_ORDER_ROW,
)
};
check(status, "cusparseCreateDnMat B (f64)")?;
let status = unsafe {
csys::cusparseCreateDnMat(
&mut dn_c,
m_i64,
n_i64,
n_i64,
out_ptr as *mut std::ffi::c_void,
csys::cudaDataType_t::CUDA_R_64F,
csys::cusparseOrder_t::CUSPARSE_ORDER_ROW,
)
};
check(status, "cusparseCreateDnMat C (f64)")?;
let mut buffer_size: usize = 0;
let status = unsafe {
csys::cusparseSpMM_bufferSize(
handle.raw(),
csys::cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE,
csys::cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE,
std::ptr::from_ref::<f64>(&alpha).cast::<std::ffi::c_void>(),
sp_mat,
dn_b,
std::ptr::from_ref::<f64>(&beta).cast::<std::ffi::c_void>(),
dn_c,
csys::cudaDataType_t::CUDA_R_64F,
csys::cusparseSpMMAlg_t::CUSPARSE_SPMM_ALG_DEFAULT,
&mut buffer_size,
)
};
check(status, "cusparseSpMM_bufferSize (f64)")?;
let workspace_bytes = buffer_size;
let mut workspace_slice = if workspace_bytes > 0 {
Some(stream.alloc_zeros::<u8>(workspace_bytes)?)
} else {
None
};
let workspace_ptr = match workspace_slice.as_mut() {
Some(s) => {
let (p, _sync) = s.device_ptr_mut(&stream);
p as *mut std::ffi::c_void
}
None => std::ptr::null_mut(),
};
let status = unsafe {
csys::cusparseSpMM(
handle.raw(),
csys::cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE,
csys::cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE,
std::ptr::from_ref::<f64>(&alpha).cast::<std::ffi::c_void>(),
sp_mat,
dn_b,
std::ptr::from_ref::<f64>(&beta).cast::<std::ffi::c_void>(),
dn_c,
csys::cudaDataType_t::CUDA_R_64F,
csys::cusparseSpMMAlg_t::CUSPARSE_SPMM_ALG_DEFAULT,
workspace_ptr,
)
};
check(status, "cusparseSpMM (f64)")?;
drop(workspace_slice);
Ok(())
})();
unsafe {
if !dn_c.is_null() {
let _ = csys::cusparseDestroyDnMat(dn_c);
}
if !dn_b.is_null() {
let _ = csys::cusparseDestroyDnMat(dn_b);
}
if !sp_mat.is_null() {
let _ = csys::cusparseDestroySpMat(sp_mat);
}
}
result?;
Ok(out)
}
#[allow(clippy::too_many_arguments)]
pub fn gpu_sparse_to_dense_csr_f32(
handle: &CusparseHandle,
crow_indices: &[u32],
col_indices: &[u32],
values: &[f32],
m: usize,
n: usize,
device: &GpuDevice,
) -> GpuResult<CudaBuffer<f32>> {
if crow_indices.len() != m + 1 {
return Err(GpuError::ShapeMismatch {
op: "sparse_to_dense_csr_f32",
expected: vec![m + 1],
got: vec![crow_indices.len()],
});
}
if col_indices.len() != values.len() {
return Err(GpuError::ShapeMismatch {
op: "sparse_to_dense_csr_f32",
expected: vec![values.len()],
got: vec![col_indices.len()],
});
}
if m == 0 || n == 0 {
return alloc_zeros_f32(m * n, device);
}
let nnz = values.len();
if nnz == 0 {
return alloc_zeros_f32(m * n, device);
}
set_stream(handle, device)?;
let mut d_crow = cpu_to_gpu(crow_indices, device)?;
let mut d_col = cpu_to_gpu(col_indices, device)?;
let mut d_vals = cpu_to_gpu(values, device)?;
let mut out = alloc_zeros_f32(m * n, device)?;
let stream = device.stream();
let mut sp_mat: csys::cusparseConstSpMatDescr_t = std::ptr::null_mut();
let mut dn_c: csys::cusparseDnMatDescr_t = std::ptr::null_mut();
let m_i64 = i64::try_from(m).map_err(|_| GpuError::ShapeMismatch {
op: "sparse_to_dense_csr_f32",
expected: vec![i64::MAX as usize],
got: vec![m],
})?;
let n_i64 = i64::try_from(n).map_err(|_| GpuError::ShapeMismatch {
op: "sparse_to_dense_csr_f32",
expected: vec![i64::MAX as usize],
got: vec![n],
})?;
let nnz_i64 = i64::try_from(nnz).map_err(|_| GpuError::ShapeMismatch {
op: "sparse_to_dense_csr_f32",
expected: vec![i64::MAX as usize],
got: vec![nnz],
})?;
let result = (|| -> GpuResult<()> {
let (crow_ptr, _crow_sync) = d_crow.inner_mut().device_ptr_mut(&stream);
let (col_ptr, _col_sync) = d_col.inner_mut().device_ptr_mut(&stream);
let (vals_ptr, _vals_sync) = d_vals.inner_mut().device_ptr_mut(&stream);
let (out_ptr, _out_sync) = out.inner_mut().device_ptr_mut(&stream);
let status = unsafe {
csys::cusparseCreateConstCsr(
&mut sp_mat,
m_i64,
n_i64,
nnz_i64,
crow_ptr as *const std::ffi::c_void,
col_ptr as *const std::ffi::c_void,
vals_ptr as *const std::ffi::c_void,
csys::cusparseIndexType_t::CUSPARSE_INDEX_32I,
csys::cusparseIndexType_t::CUSPARSE_INDEX_32I,
csys::cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO,
csys::cudaDataType_t::CUDA_R_32F,
)
};
check(status, "cusparseCreateConstCsr (s2d f32)")?;
let status = unsafe {
csys::cusparseCreateDnMat(
&mut dn_c,
m_i64,
n_i64,
n_i64,
out_ptr as *mut std::ffi::c_void,
csys::cudaDataType_t::CUDA_R_32F,
csys::cusparseOrder_t::CUSPARSE_ORDER_ROW,
)
};
check(status, "cusparseCreateDnMat (s2d f32 C)")?;
let mut buffer_size: usize = 0;
let status = unsafe {
csys::cusparseSparseToDense_bufferSize(
handle.raw(),
sp_mat,
dn_c,
csys::cusparseSparseToDenseAlg_t::CUSPARSE_SPARSETODENSE_ALG_DEFAULT,
&mut buffer_size,
)
};
check(status, "cusparseSparseToDense_bufferSize (f32)")?;
let mut workspace_slice = if buffer_size > 0 {
Some(stream.alloc_zeros::<u8>(buffer_size)?)
} else {
None
};
let workspace_ptr = match workspace_slice.as_mut() {
Some(s) => {
let (p, _sync) = s.device_ptr_mut(&stream);
p as *mut std::ffi::c_void
}
None => std::ptr::null_mut(),
};
let status = unsafe {
csys::cusparseSparseToDense(
handle.raw(),
sp_mat,
dn_c,
csys::cusparseSparseToDenseAlg_t::CUSPARSE_SPARSETODENSE_ALG_DEFAULT,
workspace_ptr,
)
};
check(status, "cusparseSparseToDense (f32)")?;
drop(workspace_slice);
Ok(())
})();
unsafe {
if !dn_c.is_null() {
let _ = csys::cusparseDestroyDnMat(dn_c);
}
if !sp_mat.is_null() {
let _ = csys::cusparseDestroySpMat(sp_mat);
}
}
result?;
Ok(out)
}
#[allow(clippy::too_many_arguments)]
pub fn gpu_sparse_to_dense_csr_f64(
handle: &CusparseHandle,
crow_indices: &[u32],
col_indices: &[u32],
values: &[f64],
m: usize,
n: usize,
device: &GpuDevice,
) -> GpuResult<CudaBuffer<f64>> {
if crow_indices.len() != m + 1 {
return Err(GpuError::ShapeMismatch {
op: "sparse_to_dense_csr_f64",
expected: vec![m + 1],
got: vec![crow_indices.len()],
});
}
if col_indices.len() != values.len() {
return Err(GpuError::ShapeMismatch {
op: "sparse_to_dense_csr_f64",
expected: vec![values.len()],
got: vec![col_indices.len()],
});
}
if m == 0 || n == 0 {
return alloc_zeros_f64(m * n, device);
}
let nnz = values.len();
if nnz == 0 {
return alloc_zeros_f64(m * n, device);
}
set_stream(handle, device)?;
let mut d_crow = cpu_to_gpu(crow_indices, device)?;
let mut d_col = cpu_to_gpu(col_indices, device)?;
let mut d_vals = cpu_to_gpu(values, device)?;
let mut out = alloc_zeros_f64(m * n, device)?;
let stream = device.stream();
let mut sp_mat: csys::cusparseConstSpMatDescr_t = std::ptr::null_mut();
let mut dn_c: csys::cusparseDnMatDescr_t = std::ptr::null_mut();
let m_i64 = i64::try_from(m).map_err(|_| GpuError::ShapeMismatch {
op: "sparse_to_dense_csr_f64",
expected: vec![i64::MAX as usize],
got: vec![m],
})?;
let n_i64 = i64::try_from(n).map_err(|_| GpuError::ShapeMismatch {
op: "sparse_to_dense_csr_f64",
expected: vec![i64::MAX as usize],
got: vec![n],
})?;
let nnz_i64 = i64::try_from(nnz).map_err(|_| GpuError::ShapeMismatch {
op: "sparse_to_dense_csr_f64",
expected: vec![i64::MAX as usize],
got: vec![nnz],
})?;
let result = (|| -> GpuResult<()> {
let (crow_ptr, _crow_sync) = d_crow.inner_mut().device_ptr_mut(&stream);
let (col_ptr, _col_sync) = d_col.inner_mut().device_ptr_mut(&stream);
let (vals_ptr, _vals_sync) = d_vals.inner_mut().device_ptr_mut(&stream);
let (out_ptr, _out_sync) = out.inner_mut().device_ptr_mut(&stream);
let status = unsafe {
csys::cusparseCreateConstCsr(
&mut sp_mat,
m_i64,
n_i64,
nnz_i64,
crow_ptr as *const std::ffi::c_void,
col_ptr as *const std::ffi::c_void,
vals_ptr as *const std::ffi::c_void,
csys::cusparseIndexType_t::CUSPARSE_INDEX_32I,
csys::cusparseIndexType_t::CUSPARSE_INDEX_32I,
csys::cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO,
csys::cudaDataType_t::CUDA_R_64F,
)
};
check(status, "cusparseCreateConstCsr (s2d f64)")?;
let status = unsafe {
csys::cusparseCreateDnMat(
&mut dn_c,
m_i64,
n_i64,
n_i64,
out_ptr as *mut std::ffi::c_void,
csys::cudaDataType_t::CUDA_R_64F,
csys::cusparseOrder_t::CUSPARSE_ORDER_ROW,
)
};
check(status, "cusparseCreateDnMat (s2d f64 C)")?;
let mut buffer_size: usize = 0;
let status = unsafe {
csys::cusparseSparseToDense_bufferSize(
handle.raw(),
sp_mat,
dn_c,
csys::cusparseSparseToDenseAlg_t::CUSPARSE_SPARSETODENSE_ALG_DEFAULT,
&mut buffer_size,
)
};
check(status, "cusparseSparseToDense_bufferSize (f64)")?;
let mut workspace_slice = if buffer_size > 0 {
Some(stream.alloc_zeros::<u8>(buffer_size)?)
} else {
None
};
let workspace_ptr = match workspace_slice.as_mut() {
Some(s) => {
let (p, _sync) = s.device_ptr_mut(&stream);
p as *mut std::ffi::c_void
}
None => std::ptr::null_mut(),
};
let status = unsafe {
csys::cusparseSparseToDense(
handle.raw(),
sp_mat,
dn_c,
csys::cusparseSparseToDenseAlg_t::CUSPARSE_SPARSETODENSE_ALG_DEFAULT,
workspace_ptr,
)
};
check(status, "cusparseSparseToDense (f64)")?;
drop(workspace_slice);
Ok(())
})();
unsafe {
if !dn_c.is_null() {
let _ = csys::cusparseDestroyDnMat(dn_c);
}
if !sp_mat.is_null() {
let _ = csys::cusparseDestroySpMat(sp_mat);
}
}
result?;
Ok(out)
}
pub fn gpu_dense_to_sparse_csr_f32(
handle: &CusparseHandle,
dense: &CudaBuffer<f32>,
m: usize,
n: usize,
device: &GpuDevice,
) -> GpuResult<(Vec<u32>, Vec<u32>, Vec<f32>)> {
if dense.len() != m * n {
return Err(GpuError::ShapeMismatch {
op: "dense_to_sparse_csr_f32",
expected: vec![m, n],
got: vec![dense.len()],
});
}
if m == 0 || n == 0 {
return Ok((vec![0; m + 1], Vec::new(), Vec::new()));
}
set_stream(handle, device)?;
let stream = device.stream();
let mut d_crow = stream.alloc_zeros::<u32>(m + 1)?;
let mut sp_mat: csys::cusparseSpMatDescr_t = std::ptr::null_mut();
let mut dn_a: csys::cusparseConstDnMatDescr_t = std::ptr::null_mut();
let m_i64 = i64::try_from(m).map_err(|_| GpuError::ShapeMismatch {
op: "dense_to_sparse_csr_f32",
expected: vec![i64::MAX as usize],
got: vec![m],
})?;
let n_i64 = i64::try_from(n).map_err(|_| GpuError::ShapeMismatch {
op: "dense_to_sparse_csr_f32",
expected: vec![i64::MAX as usize],
got: vec![n],
})?;
let mut d_col_storage: Option<cudarc::driver::CudaSlice<u32>> = None;
let mut d_vals_storage: Option<cudarc::driver::CudaSlice<f32>> = None;
let mut nnz_out: i64 = 0;
let result = (|| -> GpuResult<()> {
let (dense_ptr, _dense_sync) = dense.inner().device_ptr(&stream);
let (crow_ptr, _crow_sync) = d_crow.device_ptr_mut(&stream);
let status = unsafe {
csys::cusparseCreateConstDnMat(
&mut dn_a,
m_i64,
n_i64,
n_i64,
dense_ptr as *const std::ffi::c_void,
csys::cudaDataType_t::CUDA_R_32F,
csys::cusparseOrder_t::CUSPARSE_ORDER_ROW,
)
};
check(status, "cusparseCreateConstDnMat (d2s f32)")?;
let status = unsafe {
csys::cusparseCreateCsr(
&mut sp_mat,
m_i64,
n_i64,
0,
crow_ptr as *mut std::ffi::c_void,
std::ptr::null_mut(),
std::ptr::null_mut(),
csys::cusparseIndexType_t::CUSPARSE_INDEX_32I,
csys::cusparseIndexType_t::CUSPARSE_INDEX_32I,
csys::cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO,
csys::cudaDataType_t::CUDA_R_32F,
)
};
check(status, "cusparseCreateCsr (d2s f32 init)")?;
let mut buffer_size: usize = 0;
let status = unsafe {
csys::cusparseDenseToSparse_bufferSize(
handle.raw(),
dn_a,
sp_mat,
csys::cusparseDenseToSparseAlg_t::CUSPARSE_DENSETOSPARSE_ALG_DEFAULT,
&mut buffer_size,
)
};
check(status, "cusparseDenseToSparse_bufferSize (f32)")?;
let mut workspace_slice = if buffer_size > 0 {
Some(stream.alloc_zeros::<u8>(buffer_size)?)
} else {
None
};
let workspace_ptr = match workspace_slice.as_mut() {
Some(s) => {
let (p, _sync) = s.device_ptr_mut(&stream);
p as *mut std::ffi::c_void
}
None => std::ptr::null_mut(),
};
let status = unsafe {
csys::cusparseDenseToSparse_analysis(
handle.raw(),
dn_a,
sp_mat,
csys::cusparseDenseToSparseAlg_t::CUSPARSE_DENSETOSPARSE_ALG_DEFAULT,
workspace_ptr,
)
};
check(status, "cusparseDenseToSparse_analysis (f32)")?;
let mut rows_out: i64 = 0;
let mut cols_out: i64 = 0;
let status = unsafe {
csys::cusparseSpMatGetSize(sp_mat, &mut rows_out, &mut cols_out, &mut nnz_out)
};
check(status, "cusparseSpMatGetSize (d2s f32)")?;
let nnz_usize = usize::try_from(nnz_out).map_err(|_| GpuError::ShapeMismatch {
op: "dense_to_sparse_csr_f32",
expected: vec![0],
got: vec![usize::MAX],
})?;
let mut d_col_local = stream.alloc_zeros::<u32>(nnz_usize.max(1))?;
let mut d_vals_local = stream.alloc_zeros::<f32>(nnz_usize.max(1))?;
{
let (col_ptr, _col_sync) = d_col_local.device_ptr_mut(&stream);
let (vals_ptr, _vals_sync) = d_vals_local.device_ptr_mut(&stream);
let status = unsafe {
csys::cusparseCsrSetPointers(
sp_mat,
crow_ptr as *mut std::ffi::c_void,
col_ptr as *mut std::ffi::c_void,
vals_ptr as *mut std::ffi::c_void,
)
};
check(status, "cusparseCsrSetPointers (d2s f32)")?;
let status = unsafe {
csys::cusparseDenseToSparse_convert(
handle.raw(),
dn_a,
sp_mat,
csys::cusparseDenseToSparseAlg_t::CUSPARSE_DENSETOSPARSE_ALG_DEFAULT,
workspace_ptr,
)
};
check(status, "cusparseDenseToSparse_convert (f32)")?;
}
drop(workspace_slice);
d_col_storage = Some(d_col_local);
d_vals_storage = Some(d_vals_local);
Ok(())
})();
unsafe {
if !sp_mat.is_null() {
let _ = csys::cusparseDestroySpMat(sp_mat);
}
if !dn_a.is_null() {
let _ = csys::cusparseDestroyDnMat(dn_a);
}
}
result?;
let mut crow = stream.clone_dtoh(&d_crow)?;
crow.truncate(m + 1);
let nnz_usize = usize::try_from(nnz_out).map_err(|_| GpuError::ShapeMismatch {
op: "dense_to_sparse_csr_f32",
expected: vec![0],
got: vec![usize::MAX],
})?;
let (col, vals) = if nnz_usize == 0 {
(Vec::new(), Vec::new())
} else {
let d_col = d_col_storage.expect("col buffer set on success");
let d_vals = d_vals_storage.expect("values buffer set on success");
let mut col_h = stream.clone_dtoh(&d_col)?;
col_h.truncate(nnz_usize);
let mut vals_h = stream.clone_dtoh(&d_vals)?;
vals_h.truncate(nnz_usize);
(col_h, vals_h)
};
Ok((crow, col, vals))
}
pub fn gpu_dense_to_sparse_csr_f64(
handle: &CusparseHandle,
dense: &CudaBuffer<f64>,
m: usize,
n: usize,
device: &GpuDevice,
) -> GpuResult<(Vec<u32>, Vec<u32>, Vec<f64>)> {
if dense.len() != m * n {
return Err(GpuError::ShapeMismatch {
op: "dense_to_sparse_csr_f64",
expected: vec![m, n],
got: vec![dense.len()],
});
}
if m == 0 || n == 0 {
return Ok((vec![0; m + 1], Vec::new(), Vec::new()));
}
set_stream(handle, device)?;
let stream = device.stream();
let mut d_crow = stream.alloc_zeros::<u32>(m + 1)?;
let mut sp_mat: csys::cusparseSpMatDescr_t = std::ptr::null_mut();
let mut dn_a: csys::cusparseConstDnMatDescr_t = std::ptr::null_mut();
let m_i64 = i64::try_from(m).map_err(|_| GpuError::ShapeMismatch {
op: "dense_to_sparse_csr_f64",
expected: vec![i64::MAX as usize],
got: vec![m],
})?;
let n_i64 = i64::try_from(n).map_err(|_| GpuError::ShapeMismatch {
op: "dense_to_sparse_csr_f64",
expected: vec![i64::MAX as usize],
got: vec![n],
})?;
let mut d_col_storage: Option<cudarc::driver::CudaSlice<u32>> = None;
let mut d_vals_storage: Option<cudarc::driver::CudaSlice<f64>> = None;
let mut nnz_out: i64 = 0;
let result = (|| -> GpuResult<()> {
let (dense_ptr, _dense_sync) = dense.inner().device_ptr(&stream);
let (crow_ptr, _crow_sync) = d_crow.device_ptr_mut(&stream);
let status = unsafe {
csys::cusparseCreateConstDnMat(
&mut dn_a,
m_i64,
n_i64,
n_i64,
dense_ptr as *const std::ffi::c_void,
csys::cudaDataType_t::CUDA_R_64F,
csys::cusparseOrder_t::CUSPARSE_ORDER_ROW,
)
};
check(status, "cusparseCreateConstDnMat (d2s f64)")?;
let status = unsafe {
csys::cusparseCreateCsr(
&mut sp_mat,
m_i64,
n_i64,
0,
crow_ptr as *mut std::ffi::c_void,
std::ptr::null_mut(),
std::ptr::null_mut(),
csys::cusparseIndexType_t::CUSPARSE_INDEX_32I,
csys::cusparseIndexType_t::CUSPARSE_INDEX_32I,
csys::cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO,
csys::cudaDataType_t::CUDA_R_64F,
)
};
check(status, "cusparseCreateCsr (d2s f64 init)")?;
let mut buffer_size: usize = 0;
let status = unsafe {
csys::cusparseDenseToSparse_bufferSize(
handle.raw(),
dn_a,
sp_mat,
csys::cusparseDenseToSparseAlg_t::CUSPARSE_DENSETOSPARSE_ALG_DEFAULT,
&mut buffer_size,
)
};
check(status, "cusparseDenseToSparse_bufferSize (f64)")?;
let mut workspace_slice = if buffer_size > 0 {
Some(stream.alloc_zeros::<u8>(buffer_size)?)
} else {
None
};
let workspace_ptr = match workspace_slice.as_mut() {
Some(s) => {
let (p, _sync) = s.device_ptr_mut(&stream);
p as *mut std::ffi::c_void
}
None => std::ptr::null_mut(),
};
let status = unsafe {
csys::cusparseDenseToSparse_analysis(
handle.raw(),
dn_a,
sp_mat,
csys::cusparseDenseToSparseAlg_t::CUSPARSE_DENSETOSPARSE_ALG_DEFAULT,
workspace_ptr,
)
};
check(status, "cusparseDenseToSparse_analysis (f64)")?;
let mut rows_out: i64 = 0;
let mut cols_out: i64 = 0;
let status = unsafe {
csys::cusparseSpMatGetSize(sp_mat, &mut rows_out, &mut cols_out, &mut nnz_out)
};
check(status, "cusparseSpMatGetSize (d2s f64)")?;
let nnz_usize = usize::try_from(nnz_out).map_err(|_| GpuError::ShapeMismatch {
op: "dense_to_sparse_csr_f64",
expected: vec![0],
got: vec![usize::MAX],
})?;
let mut d_col_local = stream.alloc_zeros::<u32>(nnz_usize.max(1))?;
let mut d_vals_local = stream.alloc_zeros::<f64>(nnz_usize.max(1))?;
{
let (col_ptr, _col_sync) = d_col_local.device_ptr_mut(&stream);
let (vals_ptr, _vals_sync) = d_vals_local.device_ptr_mut(&stream);
let status = unsafe {
csys::cusparseCsrSetPointers(
sp_mat,
crow_ptr as *mut std::ffi::c_void,
col_ptr as *mut std::ffi::c_void,
vals_ptr as *mut std::ffi::c_void,
)
};
check(status, "cusparseCsrSetPointers (d2s f64)")?;
let status = unsafe {
csys::cusparseDenseToSparse_convert(
handle.raw(),
dn_a,
sp_mat,
csys::cusparseDenseToSparseAlg_t::CUSPARSE_DENSETOSPARSE_ALG_DEFAULT,
workspace_ptr,
)
};
check(status, "cusparseDenseToSparse_convert (f64)")?;
}
drop(workspace_slice);
d_col_storage = Some(d_col_local);
d_vals_storage = Some(d_vals_local);
Ok(())
})();
unsafe {
if !sp_mat.is_null() {
let _ = csys::cusparseDestroySpMat(sp_mat);
}
if !dn_a.is_null() {
let _ = csys::cusparseDestroyDnMat(dn_a);
}
}
result?;
let mut crow = stream.clone_dtoh(&d_crow)?;
crow.truncate(m + 1);
let nnz_usize = usize::try_from(nnz_out).map_err(|_| GpuError::ShapeMismatch {
op: "dense_to_sparse_csr_f64",
expected: vec![0],
got: vec![usize::MAX],
})?;
let (col, vals) = if nnz_usize == 0 {
(Vec::new(), Vec::new())
} else {
let d_col = d_col_storage.expect("col buffer set on success");
let d_vals = d_vals_storage.expect("values buffer set on success");
let mut col_h = stream.clone_dtoh(&d_col)?;
col_h.truncate(nnz_usize);
let mut vals_h = stream.clone_dtoh(&d_vals)?;
vals_h.truncate(nnz_usize);
(col_h, vals_h)
};
Ok((crow, col, vals))
}
#[allow(clippy::too_many_arguments)]
pub fn gpu_csc_to_dense_f32(
handle: &CusparseHandle,
col_ptrs: &[u32],
row_indices: &[u32],
values: &[f32],
m: usize,
n: usize,
device: &GpuDevice,
) -> GpuResult<CudaBuffer<f32>> {
if col_ptrs.len() != n + 1 {
return Err(GpuError::ShapeMismatch {
op: "csc_to_dense_f32",
expected: vec![n + 1],
got: vec![col_ptrs.len()],
});
}
if row_indices.len() != values.len() {
return Err(GpuError::ShapeMismatch {
op: "csc_to_dense_f32",
expected: vec![values.len()],
got: vec![row_indices.len()],
});
}
if m == 0 || n == 0 {
return alloc_zeros_f32(m * n, device);
}
let nnz = values.len();
if nnz == 0 {
return alloc_zeros_f32(m * n, device);
}
set_stream(handle, device)?;
let mut d_col = cpu_to_gpu(col_ptrs, device)?;
let mut d_row = cpu_to_gpu(row_indices, device)?;
let mut d_vals = cpu_to_gpu(values, device)?;
let mut out = alloc_zeros_f32(m * n, device)?;
let stream = device.stream();
let mut sp_mat: csys::cusparseConstSpMatDescr_t = std::ptr::null_mut();
let mut dn_c: csys::cusparseDnMatDescr_t = std::ptr::null_mut();
let m_i64 = i64::try_from(m).map_err(|_| GpuError::ShapeMismatch {
op: "csc_to_dense_f32",
expected: vec![i64::MAX as usize],
got: vec![m],
})?;
let n_i64 = i64::try_from(n).map_err(|_| GpuError::ShapeMismatch {
op: "csc_to_dense_f32",
expected: vec![i64::MAX as usize],
got: vec![n],
})?;
let nnz_i64 = i64::try_from(nnz).map_err(|_| GpuError::ShapeMismatch {
op: "csc_to_dense_f32",
expected: vec![i64::MAX as usize],
got: vec![nnz],
})?;
let result = (|| -> GpuResult<()> {
let (col_ptr, _col_sync) = d_col.inner_mut().device_ptr_mut(&stream);
let (row_ptr, _row_sync) = d_row.inner_mut().device_ptr_mut(&stream);
let (vals_ptr, _vals_sync) = d_vals.inner_mut().device_ptr_mut(&stream);
let (out_ptr, _out_sync) = out.inner_mut().device_ptr_mut(&stream);
let status = unsafe {
csys::cusparseCreateConstCsc(
&mut sp_mat,
m_i64,
n_i64,
nnz_i64,
col_ptr as *const std::ffi::c_void,
row_ptr as *const std::ffi::c_void,
vals_ptr as *const std::ffi::c_void,
csys::cusparseIndexType_t::CUSPARSE_INDEX_32I,
csys::cusparseIndexType_t::CUSPARSE_INDEX_32I,
csys::cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO,
csys::cudaDataType_t::CUDA_R_32F,
)
};
check(status, "cusparseCreateConstCsc (s2d f32)")?;
let status = unsafe {
csys::cusparseCreateDnMat(
&mut dn_c,
m_i64,
n_i64,
n_i64,
out_ptr as *mut std::ffi::c_void,
csys::cudaDataType_t::CUDA_R_32F,
csys::cusparseOrder_t::CUSPARSE_ORDER_ROW,
)
};
check(status, "cusparseCreateDnMat (csc s2d f32 C)")?;
let mut buffer_size: usize = 0;
let status = unsafe {
csys::cusparseSparseToDense_bufferSize(
handle.raw(),
sp_mat,
dn_c,
csys::cusparseSparseToDenseAlg_t::CUSPARSE_SPARSETODENSE_ALG_DEFAULT,
&mut buffer_size,
)
};
check(status, "cusparseSparseToDense_bufferSize (csc f32)")?;
let mut workspace_slice = if buffer_size > 0 {
Some(stream.alloc_zeros::<u8>(buffer_size)?)
} else {
None
};
let workspace_ptr = match workspace_slice.as_mut() {
Some(s) => {
let (p, _sync) = s.device_ptr_mut(&stream);
p as *mut std::ffi::c_void
}
None => std::ptr::null_mut(),
};
let status = unsafe {
csys::cusparseSparseToDense(
handle.raw(),
sp_mat,
dn_c,
csys::cusparseSparseToDenseAlg_t::CUSPARSE_SPARSETODENSE_ALG_DEFAULT,
workspace_ptr,
)
};
check(status, "cusparseSparseToDense (csc f32)")?;
drop(workspace_slice);
Ok(())
})();
unsafe {
if !dn_c.is_null() {
let _ = csys::cusparseDestroyDnMat(dn_c);
}
if !sp_mat.is_null() {
let _ = csys::cusparseDestroySpMat(sp_mat);
}
}
result?;
Ok(out)
}
#[allow(clippy::too_many_arguments)]
pub fn gpu_csc_to_dense_f64(
handle: &CusparseHandle,
col_ptrs: &[u32],
row_indices: &[u32],
values: &[f64],
m: usize,
n: usize,
device: &GpuDevice,
) -> GpuResult<CudaBuffer<f64>> {
if col_ptrs.len() != n + 1 {
return Err(GpuError::ShapeMismatch {
op: "csc_to_dense_f64",
expected: vec![n + 1],
got: vec![col_ptrs.len()],
});
}
if row_indices.len() != values.len() {
return Err(GpuError::ShapeMismatch {
op: "csc_to_dense_f64",
expected: vec![values.len()],
got: vec![row_indices.len()],
});
}
if m == 0 || n == 0 {
return alloc_zeros_f64(m * n, device);
}
let nnz = values.len();
if nnz == 0 {
return alloc_zeros_f64(m * n, device);
}
set_stream(handle, device)?;
let mut d_col = cpu_to_gpu(col_ptrs, device)?;
let mut d_row = cpu_to_gpu(row_indices, device)?;
let mut d_vals = cpu_to_gpu(values, device)?;
let mut out = alloc_zeros_f64(m * n, device)?;
let stream = device.stream();
let mut sp_mat: csys::cusparseConstSpMatDescr_t = std::ptr::null_mut();
let mut dn_c: csys::cusparseDnMatDescr_t = std::ptr::null_mut();
let m_i64 = i64::try_from(m).map_err(|_| GpuError::ShapeMismatch {
op: "csc_to_dense_f64",
expected: vec![i64::MAX as usize],
got: vec![m],
})?;
let n_i64 = i64::try_from(n).map_err(|_| GpuError::ShapeMismatch {
op: "csc_to_dense_f64",
expected: vec![i64::MAX as usize],
got: vec![n],
})?;
let nnz_i64 = i64::try_from(nnz).map_err(|_| GpuError::ShapeMismatch {
op: "csc_to_dense_f64",
expected: vec![i64::MAX as usize],
got: vec![nnz],
})?;
let result = (|| -> GpuResult<()> {
let (col_ptr, _col_sync) = d_col.inner_mut().device_ptr_mut(&stream);
let (row_ptr, _row_sync) = d_row.inner_mut().device_ptr_mut(&stream);
let (vals_ptr, _vals_sync) = d_vals.inner_mut().device_ptr_mut(&stream);
let (out_ptr, _out_sync) = out.inner_mut().device_ptr_mut(&stream);
let status = unsafe {
csys::cusparseCreateConstCsc(
&mut sp_mat,
m_i64,
n_i64,
nnz_i64,
col_ptr as *const std::ffi::c_void,
row_ptr as *const std::ffi::c_void,
vals_ptr as *const std::ffi::c_void,
csys::cusparseIndexType_t::CUSPARSE_INDEX_32I,
csys::cusparseIndexType_t::CUSPARSE_INDEX_32I,
csys::cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO,
csys::cudaDataType_t::CUDA_R_64F,
)
};
check(status, "cusparseCreateConstCsc (s2d f64)")?;
let status = unsafe {
csys::cusparseCreateDnMat(
&mut dn_c,
m_i64,
n_i64,
n_i64,
out_ptr as *mut std::ffi::c_void,
csys::cudaDataType_t::CUDA_R_64F,
csys::cusparseOrder_t::CUSPARSE_ORDER_ROW,
)
};
check(status, "cusparseCreateDnMat (csc s2d f64 C)")?;
let mut buffer_size: usize = 0;
let status = unsafe {
csys::cusparseSparseToDense_bufferSize(
handle.raw(),
sp_mat,
dn_c,
csys::cusparseSparseToDenseAlg_t::CUSPARSE_SPARSETODENSE_ALG_DEFAULT,
&mut buffer_size,
)
};
check(status, "cusparseSparseToDense_bufferSize (csc f64)")?;
let mut workspace_slice = if buffer_size > 0 {
Some(stream.alloc_zeros::<u8>(buffer_size)?)
} else {
None
};
let workspace_ptr = match workspace_slice.as_mut() {
Some(s) => {
let (p, _sync) = s.device_ptr_mut(&stream);
p as *mut std::ffi::c_void
}
None => std::ptr::null_mut(),
};
let status = unsafe {
csys::cusparseSparseToDense(
handle.raw(),
sp_mat,
dn_c,
csys::cusparseSparseToDenseAlg_t::CUSPARSE_SPARSETODENSE_ALG_DEFAULT,
workspace_ptr,
)
};
check(status, "cusparseSparseToDense (csc f64)")?;
drop(workspace_slice);
Ok(())
})();
unsafe {
if !dn_c.is_null() {
let _ = csys::cusparseDestroyDnMat(dn_c);
}
if !sp_mat.is_null() {
let _ = csys::cusparseDestroySpMat(sp_mat);
}
}
result?;
Ok(out)
}
#[allow(clippy::too_many_arguments)]
pub fn gpu_csr_to_csc_f32(
handle: &CusparseHandle,
crow_indices: &[u32],
col_indices: &[u32],
values: &[f32],
m: usize,
n: usize,
device: &GpuDevice,
) -> GpuResult<(Vec<u32>, Vec<u32>, Vec<f32>)> {
if crow_indices.len() != m + 1 {
return Err(GpuError::ShapeMismatch {
op: "csr_to_csc_f32",
expected: vec![m + 1],
got: vec![crow_indices.len()],
});
}
if col_indices.len() != values.len() {
return Err(GpuError::ShapeMismatch {
op: "csr_to_csc_f32",
expected: vec![values.len()],
got: vec![col_indices.len()],
});
}
if values.is_empty() {
return Ok((vec![0u32; n + 1], Vec::new(), Vec::new()));
}
set_stream(handle, device)?;
let nnz = values.len();
let m_i = i32::try_from(m).map_err(|_| GpuError::ShapeMismatch {
op: "csr_to_csc_f32",
expected: vec![i32::MAX as usize],
got: vec![m],
})?;
let n_i = i32::try_from(n).map_err(|_| GpuError::ShapeMismatch {
op: "csr_to_csc_f32",
expected: vec![i32::MAX as usize],
got: vec![n],
})?;
let nnz_i = i32::try_from(nnz).map_err(|_| GpuError::ShapeMismatch {
op: "csr_to_csc_f32",
expected: vec![i32::MAX as usize],
got: vec![nnz],
})?;
let mut d_crow = cpu_to_gpu(crow_indices, device)?;
let mut d_col = cpu_to_gpu(col_indices, device)?;
let mut d_vals = cpu_to_gpu(values, device)?;
let stream = device.stream();
let mut d_col_ptrs = stream.alloc_zeros::<u32>(n + 1)?;
let mut d_row_idx = stream.alloc_zeros::<u32>(nnz)?;
let mut d_vals_csc = stream.alloc_zeros::<f32>(nnz)?;
let buffer_size = {
let (crow_ptr, _crow_sync) = d_crow.inner_mut().device_ptr_mut(&stream);
let (col_ptr, _col_sync) = d_col.inner_mut().device_ptr_mut(&stream);
let (vals_ptr, _vals_sync) = d_vals.inner_mut().device_ptr_mut(&stream);
let (cp_ptr, _cp_sync) = d_col_ptrs.device_ptr_mut(&stream);
let (ri_ptr, _ri_sync) = d_row_idx.device_ptr_mut(&stream);
let (vc_ptr, _vc_sync) = d_vals_csc.device_ptr_mut(&stream);
let mut sz: usize = 0;
let status = unsafe {
csys::cusparseCsr2cscEx2_bufferSize(
handle.raw(),
m_i,
n_i,
nnz_i,
vals_ptr as *const std::ffi::c_void,
crow_ptr as *const i32,
col_ptr as *const i32,
vc_ptr as *mut std::ffi::c_void,
cp_ptr as *mut i32,
ri_ptr as *mut i32,
csys::cudaDataType_t::CUDA_R_32F,
csys::cusparseAction_t::CUSPARSE_ACTION_NUMERIC,
csys::cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO,
csys::cusparseCsr2CscAlg_t::CUSPARSE_CSR2CSC_ALG_DEFAULT,
&mut sz,
)
};
check(status, "cusparseCsr2cscEx2_bufferSize (f32)")?;
sz
};
let mut workspace = if buffer_size > 0 {
Some(stream.alloc_zeros::<u8>(buffer_size)?)
} else {
None
};
{
let (crow_ptr, _crow_sync) = d_crow.inner_mut().device_ptr_mut(&stream);
let (col_ptr, _col_sync) = d_col.inner_mut().device_ptr_mut(&stream);
let (vals_ptr, _vals_sync) = d_vals.inner_mut().device_ptr_mut(&stream);
let (cp_ptr, _cp_sync) = d_col_ptrs.device_ptr_mut(&stream);
let (ri_ptr, _ri_sync) = d_row_idx.device_ptr_mut(&stream);
let (vc_ptr, _vc_sync) = d_vals_csc.device_ptr_mut(&stream);
let ws_ptr = match workspace.as_mut() {
Some(s) => {
let (p, _sync) = s.device_ptr_mut(&stream);
p as *mut std::ffi::c_void
}
None => std::ptr::null_mut(),
};
let status = unsafe {
csys::cusparseCsr2cscEx2(
handle.raw(),
m_i,
n_i,
nnz_i,
vals_ptr as *const std::ffi::c_void,
crow_ptr as *const i32,
col_ptr as *const i32,
vc_ptr as *mut std::ffi::c_void,
cp_ptr as *mut i32,
ri_ptr as *mut i32,
csys::cudaDataType_t::CUDA_R_32F,
csys::cusparseAction_t::CUSPARSE_ACTION_NUMERIC,
csys::cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO,
csys::cusparseCsr2CscAlg_t::CUSPARSE_CSR2CSC_ALG_DEFAULT,
ws_ptr,
)
};
check(status, "cusparseCsr2cscEx2 (f32)")?;
}
drop(workspace);
let col_ptrs_h = stream.clone_dtoh(&d_col_ptrs)?;
let row_idx_h = stream.clone_dtoh(&d_row_idx)?;
let vals_h = stream.clone_dtoh(&d_vals_csc)?;
Ok((col_ptrs_h, row_idx_h, vals_h))
}
#[allow(clippy::too_many_arguments)]
pub fn gpu_csr_to_csc_f64(
handle: &CusparseHandle,
crow_indices: &[u32],
col_indices: &[u32],
values: &[f64],
m: usize,
n: usize,
device: &GpuDevice,
) -> GpuResult<(Vec<u32>, Vec<u32>, Vec<f64>)> {
if crow_indices.len() != m + 1 {
return Err(GpuError::ShapeMismatch {
op: "csr_to_csc_f64",
expected: vec![m + 1],
got: vec![crow_indices.len()],
});
}
if col_indices.len() != values.len() {
return Err(GpuError::ShapeMismatch {
op: "csr_to_csc_f64",
expected: vec![values.len()],
got: vec![col_indices.len()],
});
}
if values.is_empty() {
return Ok((vec![0u32; n + 1], Vec::new(), Vec::new()));
}
set_stream(handle, device)?;
let nnz = values.len();
let m_i = i32::try_from(m).map_err(|_| GpuError::ShapeMismatch {
op: "csr_to_csc_f64",
expected: vec![i32::MAX as usize],
got: vec![m],
})?;
let n_i = i32::try_from(n).map_err(|_| GpuError::ShapeMismatch {
op: "csr_to_csc_f64",
expected: vec![i32::MAX as usize],
got: vec![n],
})?;
let nnz_i = i32::try_from(nnz).map_err(|_| GpuError::ShapeMismatch {
op: "csr_to_csc_f64",
expected: vec![i32::MAX as usize],
got: vec![nnz],
})?;
let mut d_crow = cpu_to_gpu(crow_indices, device)?;
let mut d_col = cpu_to_gpu(col_indices, device)?;
let mut d_vals = cpu_to_gpu(values, device)?;
let stream = device.stream();
let mut d_col_ptrs = stream.alloc_zeros::<u32>(n + 1)?;
let mut d_row_idx = stream.alloc_zeros::<u32>(nnz)?;
let mut d_vals_csc = stream.alloc_zeros::<f64>(nnz)?;
let buffer_size = {
let (crow_ptr, _crow_sync) = d_crow.inner_mut().device_ptr_mut(&stream);
let (col_ptr, _col_sync) = d_col.inner_mut().device_ptr_mut(&stream);
let (vals_ptr, _vals_sync) = d_vals.inner_mut().device_ptr_mut(&stream);
let (cp_ptr, _cp_sync) = d_col_ptrs.device_ptr_mut(&stream);
let (ri_ptr, _ri_sync) = d_row_idx.device_ptr_mut(&stream);
let (vc_ptr, _vc_sync) = d_vals_csc.device_ptr_mut(&stream);
let mut sz: usize = 0;
let status = unsafe {
csys::cusparseCsr2cscEx2_bufferSize(
handle.raw(),
m_i,
n_i,
nnz_i,
vals_ptr as *const std::ffi::c_void,
crow_ptr as *const i32,
col_ptr as *const i32,
vc_ptr as *mut std::ffi::c_void,
cp_ptr as *mut i32,
ri_ptr as *mut i32,
csys::cudaDataType_t::CUDA_R_64F,
csys::cusparseAction_t::CUSPARSE_ACTION_NUMERIC,
csys::cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO,
csys::cusparseCsr2CscAlg_t::CUSPARSE_CSR2CSC_ALG_DEFAULT,
&mut sz,
)
};
check(status, "cusparseCsr2cscEx2_bufferSize (f64)")?;
sz
};
let mut workspace = if buffer_size > 0 {
Some(stream.alloc_zeros::<u8>(buffer_size)?)
} else {
None
};
{
let (crow_ptr, _crow_sync) = d_crow.inner_mut().device_ptr_mut(&stream);
let (col_ptr, _col_sync) = d_col.inner_mut().device_ptr_mut(&stream);
let (vals_ptr, _vals_sync) = d_vals.inner_mut().device_ptr_mut(&stream);
let (cp_ptr, _cp_sync) = d_col_ptrs.device_ptr_mut(&stream);
let (ri_ptr, _ri_sync) = d_row_idx.device_ptr_mut(&stream);
let (vc_ptr, _vc_sync) = d_vals_csc.device_ptr_mut(&stream);
let ws_ptr = match workspace.as_mut() {
Some(s) => {
let (p, _sync) = s.device_ptr_mut(&stream);
p as *mut std::ffi::c_void
}
None => std::ptr::null_mut(),
};
let status = unsafe {
csys::cusparseCsr2cscEx2(
handle.raw(),
m_i,
n_i,
nnz_i,
vals_ptr as *const std::ffi::c_void,
crow_ptr as *const i32,
col_ptr as *const i32,
vc_ptr as *mut std::ffi::c_void,
cp_ptr as *mut i32,
ri_ptr as *mut i32,
csys::cudaDataType_t::CUDA_R_64F,
csys::cusparseAction_t::CUSPARSE_ACTION_NUMERIC,
csys::cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO,
csys::cusparseCsr2CscAlg_t::CUSPARSE_CSR2CSC_ALG_DEFAULT,
ws_ptr,
)
};
check(status, "cusparseCsr2cscEx2 (f64)")?;
}
drop(workspace);
let col_ptrs_h = stream.clone_dtoh(&d_col_ptrs)?;
let row_idx_h = stream.clone_dtoh(&d_row_idx)?;
let vals_h = stream.clone_dtoh(&d_vals_csc)?;
Ok((col_ptrs_h, row_idx_h, vals_h))
}
fn gpu_coo_to_csr_indices(
handle: &CusparseHandle,
row_indices: &[u32],
m: usize,
device: &GpuDevice,
) -> GpuResult<Vec<u32>> {
let nnz = row_indices.len();
if nnz == 0 {
return Ok(vec![0u32; m + 1]);
}
set_stream(handle, device)?;
let m_i = i32::try_from(m).map_err(|_| GpuError::ShapeMismatch {
op: "coo_to_csr_indices",
expected: vec![i32::MAX as usize],
got: vec![m],
})?;
let nnz_i = i32::try_from(nnz).map_err(|_| GpuError::ShapeMismatch {
op: "coo_to_csr_indices",
expected: vec![i32::MAX as usize],
got: vec![nnz],
})?;
let mut d_rows = cpu_to_gpu(row_indices, device)?;
let stream = device.stream();
let mut d_crow = stream.alloc_zeros::<u32>(m + 1)?;
{
let (rows_ptr, _rows_sync) = d_rows.inner_mut().device_ptr_mut(&stream);
let (crow_ptr, _crow_sync) = d_crow.device_ptr_mut(&stream);
let status = unsafe {
csys::cusparseXcoo2csr(
handle.raw(),
rows_ptr as *const i32,
nnz_i,
m_i,
crow_ptr as *mut i32,
csys::cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO,
)
};
check(status, "cusparseXcoo2csr")?;
}
Ok(stream.clone_dtoh(&d_crow)?)
}
pub fn gpu_coo_to_csr_f32(
handle: &CusparseHandle,
row_indices: &[u32],
col_indices: &[u32],
values: &[f32],
m: usize,
_n: usize,
device: &GpuDevice,
) -> GpuResult<(Vec<u32>, Vec<u32>, Vec<f32>)> {
if row_indices.len() != values.len() || col_indices.len() != values.len() {
return Err(GpuError::ShapeMismatch {
op: "coo_to_csr_f32",
expected: vec![values.len()],
got: vec![row_indices.len(), col_indices.len()],
});
}
let crow = gpu_coo_to_csr_indices(handle, row_indices, m, device)?;
Ok((crow, col_indices.to_vec(), values.to_vec()))
}
pub fn gpu_coo_to_csr_f64(
handle: &CusparseHandle,
row_indices: &[u32],
col_indices: &[u32],
values: &[f64],
m: usize,
_n: usize,
device: &GpuDevice,
) -> GpuResult<(Vec<u32>, Vec<u32>, Vec<f64>)> {
if row_indices.len() != values.len() || col_indices.len() != values.len() {
return Err(GpuError::ShapeMismatch {
op: "coo_to_csr_f64",
expected: vec![values.len()],
got: vec![row_indices.len(), col_indices.len()],
});
}
let crow = gpu_coo_to_csr_indices(handle, row_indices, m, device)?;
Ok((crow, col_indices.to_vec(), values.to_vec()))
}
fn gpu_csr_to_coo_indices(
handle: &CusparseHandle,
crow_indices: &[u32],
nnz: usize,
m: usize,
device: &GpuDevice,
) -> GpuResult<Vec<u32>> {
if nnz == 0 {
return Ok(Vec::new());
}
set_stream(handle, device)?;
let m_i = i32::try_from(m).map_err(|_| GpuError::ShapeMismatch {
op: "csr_to_coo_indices",
expected: vec![i32::MAX as usize],
got: vec![m],
})?;
let nnz_i = i32::try_from(nnz).map_err(|_| GpuError::ShapeMismatch {
op: "csr_to_coo_indices",
expected: vec![i32::MAX as usize],
got: vec![nnz],
})?;
let mut d_crow = cpu_to_gpu(crow_indices, device)?;
let stream = device.stream();
let mut d_rows = stream.alloc_zeros::<u32>(nnz)?;
{
let (crow_ptr, _crow_sync) = d_crow.inner_mut().device_ptr_mut(&stream);
let (rows_ptr, _rows_sync) = d_rows.device_ptr_mut(&stream);
let status = unsafe {
csys::cusparseXcsr2coo(
handle.raw(),
crow_ptr as *const i32,
nnz_i,
m_i,
rows_ptr as *mut i32,
csys::cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO,
)
};
check(status, "cusparseXcsr2coo")?;
}
Ok(stream.clone_dtoh(&d_rows)?)
}
pub fn gpu_csr_to_coo_f32(
handle: &CusparseHandle,
crow_indices: &[u32],
col_indices: &[u32],
values: &[f32],
m: usize,
_n: usize,
device: &GpuDevice,
) -> GpuResult<(Vec<u32>, Vec<u32>, Vec<f32>)> {
if crow_indices.len() != m + 1 {
return Err(GpuError::ShapeMismatch {
op: "csr_to_coo_f32",
expected: vec![m + 1],
got: vec![crow_indices.len()],
});
}
if col_indices.len() != values.len() {
return Err(GpuError::ShapeMismatch {
op: "csr_to_coo_f32",
expected: vec![values.len()],
got: vec![col_indices.len()],
});
}
let rows = gpu_csr_to_coo_indices(handle, crow_indices, values.len(), m, device)?;
Ok((rows, col_indices.to_vec(), values.to_vec()))
}
pub fn gpu_csr_to_coo_f64(
handle: &CusparseHandle,
crow_indices: &[u32],
col_indices: &[u32],
values: &[f64],
m: usize,
_n: usize,
device: &GpuDevice,
) -> GpuResult<(Vec<u32>, Vec<u32>, Vec<f64>)> {
if crow_indices.len() != m + 1 {
return Err(GpuError::ShapeMismatch {
op: "csr_to_coo_f64",
expected: vec![m + 1],
got: vec![crow_indices.len()],
});
}
if col_indices.len() != values.len() {
return Err(GpuError::ShapeMismatch {
op: "csr_to_coo_f64",
expected: vec![values.len()],
got: vec![col_indices.len()],
});
}
let rows = gpu_csr_to_coo_indices(handle, crow_indices, values.len(), m, device)?;
Ok((rows, col_indices.to_vec(), values.to_vec()))
}