1pub use crate::gpu::*;
9
10#[cfg(feature = "gpu")]
14pub use scirs2_core::gpu::{GpuBackend, GpuBuffer, GpuContext, GpuDataType, GpuKernelHandle};
15
16#[cfg(feature = "gpu")]
17pub use scirs2_core::GpuError;
18
19#[cfg(not(feature = "gpu"))]
23pub trait GpuDataType: Copy + Send + Sync + 'static {}
24
25#[cfg(not(feature = "gpu"))]
27impl GpuDataType for f32 {}
28#[cfg(not(feature = "gpu"))]
29impl GpuDataType for f64 {}
30#[cfg(not(feature = "gpu"))]
31impl GpuDataType for i32 {}
32#[cfg(not(feature = "gpu"))]
33impl GpuDataType for i64 {}
34#[cfg(not(feature = "gpu"))]
35impl GpuDataType for u32 {}
36#[cfg(not(feature = "gpu"))]
37impl GpuDataType for u64 {}
38
39#[cfg(not(feature = "gpu"))]
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
41pub enum GpuBackend {
42 #[default]
43 Cpu,
44 Cuda,
45 OpenCL,
46 Metal,
47 Rocm,
48 Wgpu,
49}
50
51#[cfg(not(feature = "gpu"))]
52#[derive(Debug, Clone)]
53pub struct GpuError(String);
54
55#[cfg(not(feature = "gpu"))]
56impl GpuError {
57 pub fn new(msg: &str) -> Self {
58 Self(msg.to_string())
59 }
60
61 pub fn invalid_buffer(msg: String) -> Self {
62 Self(msg)
63 }
64
65 pub fn invalid_parameter(msg: String) -> Self {
66 Self(msg)
67 }
68
69 pub fn kernel_compilation_error(msg: String) -> Self {
70 Self(msg)
71 }
72
73 pub fn other(msg: String) -> Self {
74 Self(msg)
75 }
76}
77
78#[cfg(not(feature = "gpu"))]
79impl std::fmt::Display for GpuError {
80 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
81 write!(f, "{}", self.0)
82 }
83}
84
85#[cfg(not(feature = "gpu"))]
86impl std::error::Error for GpuError {}
87
88#[cfg(not(feature = "gpu"))]
89pub struct GpuBuffer<T> {
90 data: Vec<T>,
91}
92
93#[cfg(not(feature = "gpu"))]
94impl<T: Clone + Copy> GpuBuffer<T> {
95 pub fn from_vec(data: Vec<T>) -> Self {
96 Self { data }
97 }
98
99 pub fn as_slice(&self) -> &[T] {
100 &self.data
101 }
102
103 pub fn as_mut_slice(&mut self) -> &mut [T] {
104 &mut self.data
105 }
106
107 pub fn to_vec(&self) -> Vec<T> {
108 self.data.clone()
109 }
110
111 pub fn to_host(&self) -> Result<Vec<T>, GpuError> {
112 Ok(self.data.clone())
113 }
114
115 pub fn len(&self) -> usize {
116 self.data.len()
117 }
118
119 pub fn is_empty(&self) -> bool {
120 self.data.is_empty()
121 }
122}
123
124#[cfg(not(feature = "gpu"))]
125#[derive(Debug, Clone)]
126pub struct GpuKernelHandle;
127
128#[cfg(not(feature = "gpu"))]
129pub struct GpuDevice {
130 backend: GpuBackend,
131}
132
133#[cfg(not(feature = "gpu"))]
134impl GpuDevice {
135 pub fn new(_backend: GpuBackend) -> Result<Self, GpuError> {
136 Ok(Self {
137 backend: GpuBackend::Cpu,
138 })
139 }
140
141 pub fn get_default(_backend: GpuBackend) -> Result<Self, GpuError> {
142 Self::new(_backend)
143 }
144
145 pub fn backend(&self) -> GpuBackend {
146 self.backend
147 }
148
149 pub fn create_buffer<T>(&self, data: &[T]) -> Result<GpuBuffer<T>, GpuError>
150 where
151 T: Clone + Copy,
152 {
153 Ok(GpuBuffer {
154 data: data.to_vec(),
155 })
156 }
157
158 pub fn create_buffer_zeros<T>(&self, size: usize) -> Result<GpuBuffer<T>, GpuError>
159 where
160 T: Clone + Copy + Default,
161 {
162 Ok(GpuBuffer {
163 data: vec![T::default(); size],
164 })
165 }
166}
167
168pub use crate::gpu::{BackendInfo, GpuSpMatVec, OptimizationHint};
173
174pub use crate::gpu::convenience::{available_backends, gpu_spmv, gpu_spmv_optimized};
176
177pub struct AdvancedGpuOps {
179 gpu_handler: GpuSpMatVec,
180}
181
182#[derive(Debug, Clone)]
183pub struct GpuKernelScheduler {
184 backend: GpuBackend,
185}
186
187#[derive(Debug, Clone)]
188pub struct GpuMemoryManager {
189 backend: GpuBackend,
190}
191
192#[derive(Debug, Clone)]
193pub struct GpuOptions {
194 pub backend: GpuBackend,
195 pub optimization: OptimizationHint,
196}
197
198#[derive(Debug, Clone)]
199pub struct GpuProfiler {
200 enabled: bool,
201}
202
203pub struct OptimizedGpuOps {
204 gpu_handler: GpuSpMatVec,
205}
206
207use crate::csr_array::CsrArray;
209use crate::error::SparseResult;
210use scirs2_core::ndarray::{Array1, ArrayView1};
211use scirs2_core::numeric::Float;
212use std::fmt::Debug;
213
214#[allow(dead_code)]
221pub fn gpu_sparse_matvec<T>(
222 matrix: &CsrArray<T>,
223 vector: &ArrayView1<T>,
224 backend: Option<GpuBackend>,
225) -> SparseResult<Array1<T>>
226where
227 T: Float + Debug + Copy + GpuDataType + std::iter::Sum,
228{
229 let gpu_handler = if let Some(backend) = backend {
230 GpuSpMatVec::with_backend(backend)?
231 } else {
232 GpuSpMatVec::new()?
233 };
234
235 #[cfg(all(target_os = "macos", feature = "gpu"))]
237 let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
238 Some(GpuDevice::new(GpuBackend::Metal, 0))
239 } else {
240 None
241 };
242 #[cfg(all(target_os = "macos", not(feature = "gpu")))]
243 let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
244 GpuDevice::new(GpuBackend::Metal).ok()
245 } else {
246 None
247 };
248 #[cfg(not(target_os = "macos"))]
249 let device = None;
250
251 gpu_handler.spmv(matrix, vector, device.as_ref())
252}
253
254#[allow(dead_code)]
256pub fn gpu_sym_sparse_matvec<T>(
257 matrix: &CsrArray<T>,
258 vector: &ArrayView1<T>,
259 backend: Option<GpuBackend>,
260) -> SparseResult<Array1<T>>
261where
262 T: Float + Debug + Copy + GpuDataType + std::iter::Sum,
263{
264 let gpu_handler = if let Some(backend) = backend {
265 GpuSpMatVec::with_backend(backend)?
266 } else {
267 GpuSpMatVec::new()?
268 };
269
270 #[cfg(all(target_os = "macos", feature = "gpu"))]
272 let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
273 Some(GpuDevice::new(GpuBackend::Metal, 0))
274 } else {
275 None
276 };
277 #[cfg(all(target_os = "macos", not(feature = "gpu")))]
278 let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
279 GpuDevice::new(GpuBackend::Metal).ok()
280 } else {
281 None
282 };
283 #[cfg(not(target_os = "macos"))]
284 let device = None;
285
286 gpu_handler.spmv(matrix, vector, device.as_ref())
287}
288
289#[allow(dead_code)]
291pub fn gpu_advanced_spmv<T>(
292 matrix: &CsrArray<T>,
293 vector: &ArrayView1<T>,
294 backend: Option<GpuBackend>,
295 optimization: OptimizationHint,
296) -> SparseResult<Array1<T>>
297where
298 T: Float + Debug + Copy + GpuDataType + std::iter::Sum,
299{
300 let gpu_handler = if let Some(backend) = backend {
301 GpuSpMatVec::with_backend(backend)?
302 } else {
303 GpuSpMatVec::new()?
304 };
305
306 #[cfg(all(target_os = "macos", feature = "gpu"))]
308 let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
309 Some(GpuDevice::new(GpuBackend::Metal, 0))
310 } else {
311 None
312 };
313 #[cfg(all(target_os = "macos", not(feature = "gpu")))]
314 let device = if matches!(backend, Some(GpuBackend::Metal) | None) {
315 GpuDevice::new(GpuBackend::Metal).ok()
316 } else {
317 None
318 };
319 #[cfg(not(target_os = "macos"))]
320 let device = None;
321
322 gpu_handler.spmv_optimized(matrix, vector, device.as_ref(), optimization)
323}
324
325#[allow(dead_code)]
327pub struct SpMVKernel {
328 gpu_handler: GpuSpMatVec,
329}
330
331impl SpMVKernel {
332 pub fn new(_device: &GpuDevice, _workgroupsize: [u32; 3]) -> Result<Self, GpuError> {
333 let gpu_handler = GpuSpMatVec::new().map_err(|e| {
334 #[cfg(feature = "gpu")]
335 return GpuError::Other(format!("{:?}", e));
336 #[cfg(not(feature = "gpu"))]
337 return GpuError::other(format!("{:?}", e));
338 })?;
339 Ok(Self { gpu_handler })
340 }
341
342 pub fn execute<T>(
343 &self,
344 matrix: &CsrArray<T>,
345 vector: &ArrayView1<T>,
346 device: &GpuDevice,
347 ) -> Result<Array1<T>, GpuError>
348 where
349 T: Float + Debug + Copy + GpuDataType + std::iter::Sum,
350 {
351 self.gpu_handler
352 .spmv(matrix, vector, Some(device))
353 .map_err(|e| {
354 #[cfg(feature = "gpu")]
355 return GpuError::Other(format!("{:?}", e));
356 #[cfg(not(feature = "gpu"))]
357 return GpuError::other(format!("{:?}", e));
358 })
359 }
360}
361
362pub trait GpuBufferExt<T: GpuDataType> {
364 fn to_host(&self) -> Result<Vec<T>, GpuError>;
365 fn to_host_range(&self, range: std::ops::Range<usize>) -> Result<Vec<T>, GpuError>;
366}
367
368impl<T: GpuDataType> GpuBufferExt<T> for GpuBuffer<T> {
369 fn to_host(&self) -> Result<Vec<T>, GpuError> {
370 Ok(self.to_vec())
371 }
372
373 fn to_host_range(&self, range: std::ops::Range<usize>) -> Result<Vec<T>, GpuError> {
374 let full_data = self.to_vec();
375 if range.end <= full_data.len() {
376 Ok(full_data[range].to_vec())
377 } else {
378 #[cfg(feature = "gpu")]
379 return Err(GpuError::InvalidParameter(
380 "Range out of bounds".to_string(),
381 ));
382 #[cfg(not(feature = "gpu"))]
383 return Err(GpuError::invalid_parameter(
384 "Range out of bounds".to_string(),
385 ));
386 }
387 }
388}
389
390#[cfg(test)]
391mod tests {
392 use super::*;
393 use scirs2_core::ndarray::Array1;
394
395 #[test]
396 fn test_backward_compatibility_gpu_sparse_matvec() {
397 let data = vec![1.0, 2.0, 3.0, 4.0];
399 let indices = vec![0, 1, 0, 1];
400 let indptr = vec![0, 2, 4];
401 let matrix = CsrArray::new(data.into(), indices.into(), indptr.into(), (2, 2)).unwrap();
402
403 let vector = Array1::from_vec(vec![1.0, 2.0]);
404
405 let result = gpu_sparse_matvec(&matrix, &vector.view(), None);
408 if let Err(e) = &result {
409 eprintln!("Error from gpu_sparse_matvec: {:?}", e);
410 let error_msg = format!("{:?}", e);
413 assert!(
414 error_msg.contains("GPU device required")
415 || error_msg.contains("not initialized")
416 || error_msg.contains("not available"),
417 "Unexpected error: {:?}",
418 e
419 );
420 } else {
421 assert!(result.is_ok());
423 }
424
425 let result = gpu_sparse_matvec(&matrix, &vector.view(), Some(GpuBackend::Cpu));
427 if let Err(e) = &result {
428 eprintln!("Error from gpu_sparse_matvec with CPU backend: {:?}", e);
429 }
430 assert!(result.is_ok(), "CPU backend should always work");
431 }
432
433 #[test]
434 fn test_gpu_spmv_kernel() {
435 #[cfg(feature = "gpu")]
436 let device = scirs2_core::gpu::GpuDevice::new(GpuBackend::Cpu, 0);
437 #[cfg(not(feature = "gpu"))]
438 let device = GpuDevice::new(GpuBackend::Cpu).unwrap();
439
440 let kernel = SpMVKernel::new(&device, [1, 1, 1]);
441 assert!(kernel.is_ok());
442 }
443
444 #[test]
445 fn test_gpu_buffer_ext() {
446 #[cfg(not(feature = "gpu"))]
447 {
448 let buffer = GpuBuffer {
449 data: vec![1.0, 2.0, 3.0, 4.0],
450 };
451 let host_data = buffer.to_host().unwrap();
452 assert_eq!(host_data, vec![1.0, 2.0, 3.0, 4.0]);
453
454 let range_data = buffer.to_host_range(1..3).unwrap();
455 assert_eq!(range_data, vec![2.0, 3.0]);
456 }
457 }
458
459 #[test]
460 fn test_gpu_data_types() {
461 fn is_gpu_data_type<T: GpuDataType>() {}
463
464 is_gpu_data_type::<f32>();
465 is_gpu_data_type::<f64>();
466 is_gpu_data_type::<u32>();
467 is_gpu_data_type::<u64>();
468 is_gpu_data_type::<i32>();
469 is_gpu_data_type::<i64>();
470 }
471
472 #[test]
473 fn test_gpu_backend_enum() {
474 let backends = [
475 GpuBackend::Cpu,
476 GpuBackend::Cuda,
477 GpuBackend::OpenCL,
478 GpuBackend::Metal,
479 GpuBackend::Rocm,
480 GpuBackend::Wgpu,
481 ];
482
483 for backend in &backends {
484 match backend {
485 GpuBackend::Cpu => (),
486 GpuBackend::Cuda => (),
487 GpuBackend::OpenCL => (),
488 GpuBackend::Metal => (),
489 GpuBackend::Rocm => (),
490 GpuBackend::Wgpu => (),
491 }
492 }
493 }
494
495 #[test]
496 fn test_available_backends() {
497 let backends = available_backends();
498 assert!(!backends.is_empty());
499 assert!(backends.contains(&GpuBackend::Cpu)); }
501}