1pub mod cuda;
7pub mod metal;
8pub mod opencl;
9pub mod vulkan;
10
11#[cfg(feature = "gpu")]
13pub use scirs2_core::gpu::{
14 GpuBackend, GpuBuffer, GpuContext, GpuDataType, GpuDevice, GpuError, GpuKernelHandle,
15};
16
17#[cfg(not(feature = "gpu"))]
19pub use crate::gpu_ops::{
20 GpuBackend, GpuBuffer, GpuDataType, GpuDevice, GpuError, GpuKernelHandle,
21};
22
23pub use cuda::{CudaMemoryManager, CudaOptimizationLevel, CudaSpMatVec};
25pub use metal::{MetalDeviceInfo, MetalMemoryManager, MetalOptimizationLevel, MetalSpMatVec};
26pub use opencl::{
27 OpenCLMemoryManager, OpenCLOptimizationLevel, OpenCLPlatformInfo, OpenCLSpMatVec,
28};
29pub use vulkan::{
30 VulkanDeviceInfo, VulkanDeviceType, VulkanMemoryManager, VulkanOptimizationLevel,
31 VulkanSpMatVec,
32};
33
34use crate::csr_array::CsrArray;
35use crate::error::{SparseError, SparseResult};
36use crate::sparray::SparseArray;
37use scirs2_core::ndarray::{Array1, ArrayView1};
38use scirs2_core::numeric::{Float, SparseElement};
39use std::fmt::Debug;
40
41pub struct GpuSpMatVec {
45 backend: GpuBackend,
46 cuda_handler: Option<CudaSpMatVec>,
47 opencl_handler: Option<OpenCLSpMatVec>,
48 metal_handler: Option<MetalSpMatVec>,
49 vulkan_handler: Option<VulkanSpMatVec>,
50}
51
52impl GpuSpMatVec {
53 pub fn new() -> SparseResult<Self> {
55 let backend = Self::detect_best_backend();
56
57 let mut handler = Self {
58 backend,
59 cuda_handler: None,
60 opencl_handler: None,
61 metal_handler: None,
62 vulkan_handler: None,
63 };
64
65 handler.initialize_backend()?;
67
68 Ok(handler)
69 }
70
71 pub fn with_backend(backend: GpuBackend) -> SparseResult<Self> {
73 let mut handler = Self {
74 backend,
75 cuda_handler: None,
76 opencl_handler: None,
77 metal_handler: None,
78 vulkan_handler: None,
79 };
80
81 handler.initialize_backend()?;
82
83 Ok(handler)
84 }
85
86 fn initialize_backend(&mut self) -> SparseResult<()> {
88 match self.backend {
89 GpuBackend::Cuda => {
90 self.cuda_handler = Some(CudaSpMatVec::new()?);
91 }
92 GpuBackend::OpenCL => {
93 self.opencl_handler = Some(OpenCLSpMatVec::new()?);
94 }
95 GpuBackend::Metal => {
96 self.metal_handler = Some(MetalSpMatVec::new()?);
97 }
98 GpuBackend::Wgpu => {
99 self.vulkan_handler = Some(VulkanSpMatVec::new()?);
100 }
101 #[cfg(not(feature = "gpu"))]
102 GpuBackend::Vulkan => {
103 self.vulkan_handler = Some(VulkanSpMatVec::new()?);
104 }
105 GpuBackend::Cpu => {
106 }
108 GpuBackend::Rocm => {
109 self.backend = GpuBackend::Cpu;
111 }
112 }
113
114 Ok(())
115 }
116
117 fn detect_best_backend() -> GpuBackend {
119 #[cfg(target_os = "macos")]
121 {
122 if Self::is_metal_available() {
123 return GpuBackend::Metal;
124 }
125 }
126
127 if Self::is_cuda_available() {
128 return GpuBackend::Cuda;
129 }
130
131 if Self::is_vulkan_available() {
132 return GpuBackend::Wgpu;
133 }
134
135 if Self::is_opencl_available() {
136 return GpuBackend::OpenCL;
137 }
138
139 GpuBackend::Cpu
140 }
141
142 fn is_cuda_available() -> bool {
144 #[cfg(feature = "gpu")]
146 {
147 std::env::var("CUDA_PATH").is_ok() || std::path::Path::new("/usr/local/cuda").exists()
149 }
150 #[cfg(not(feature = "gpu"))]
151 false
152 }
153
154 fn is_opencl_available() -> bool {
156 #[cfg(feature = "gpu")]
158 {
159 true
161 }
162 #[cfg(not(feature = "gpu"))]
163 false
164 }
165
166 fn is_metal_available() -> bool {
168 #[cfg(target_os = "macos")]
169 {
170 true
172 }
173 #[cfg(not(target_os = "macos"))]
174 false
175 }
176
177 fn is_vulkan_available() -> bool {
179 #[cfg(feature = "gpu")]
180 {
181 std::env::var("VULKAN_SDK").is_ok()
183 || std::path::Path::new("/usr/share/vulkan").exists()
184 || std::path::Path::new("/usr/local/share/vulkan").exists()
185 }
186 #[cfg(not(feature = "gpu"))]
187 false
188 }
189
190 pub fn spmv<T>(
192 &self,
193 matrix: &CsrArray<T>,
194 vector: &ArrayView1<T>,
195 device: Option<&GpuDevice>,
196 ) -> SparseResult<Array1<T>>
197 where
198 T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
199 {
200 match self.backend {
201 GpuBackend::Cuda => {
202 if let Some(ref handler) = self.cuda_handler {
203 #[cfg(feature = "gpu")]
204 {
205 if let Some(device) = device {
206 handler.execute_spmv(matrix, vector, device)
207 } else {
208 matrix.dot_vector(vector)
210 }
211 }
212 #[cfg(not(feature = "gpu"))]
213 handler.execute_spmv_cpu(matrix, vector)
214 } else {
215 Err(SparseError::ComputationError(
216 "CUDA handler not initialized".to_string(),
217 ))
218 }
219 }
220 GpuBackend::OpenCL => {
221 if let Some(ref handler) = self.opencl_handler {
222 #[cfg(feature = "gpu")]
223 {
224 if let Some(device) = device {
225 handler.execute_spmv(matrix, vector, device)
226 } else {
227 matrix.dot_vector(vector)
229 }
230 }
231 #[cfg(not(feature = "gpu"))]
232 handler.execute_spmv_cpu(matrix, vector)
233 } else {
234 Err(SparseError::ComputationError(
235 "OpenCL handler not initialized".to_string(),
236 ))
237 }
238 }
239 GpuBackend::Metal => {
240 if let Some(ref handler) = self.metal_handler {
241 #[cfg(feature = "gpu")]
242 {
243 if let Some(device) = device {
244 handler.execute_spmv(matrix, vector, device)
245 } else {
246 matrix.dot_vector(vector)
248 }
249 }
250 #[cfg(not(feature = "gpu"))]
251 handler.execute_spmv_cpu(matrix, vector)
252 } else {
253 Err(SparseError::ComputationError(
254 "Metal handler not initialized".to_string(),
255 ))
256 }
257 }
258 GpuBackend::Wgpu => {
259 if let Some(ref handler) = self.vulkan_handler {
260 #[cfg(feature = "gpu")]
261 {
262 if let Some(device) = device {
263 handler.execute_spmv(matrix, vector, device)
264 } else {
265 matrix.dot_vector(vector)
267 }
268 }
269 #[cfg(not(feature = "gpu"))]
270 handler.execute_spmv_cpu(matrix, vector)
271 } else {
272 Err(SparseError::ComputationError(
273 "Vulkan handler not initialized".to_string(),
274 ))
275 }
276 }
277 #[cfg(not(feature = "gpu"))]
278 GpuBackend::Vulkan => {
279 if let Some(ref handler) = self.vulkan_handler {
280 handler.execute_spmv_cpu(matrix, vector)
281 } else {
282 Err(SparseError::ComputationError(
283 "Vulkan handler not initialized".to_string(),
284 ))
285 }
286 }
287 GpuBackend::Cpu => {
288 matrix.dot_vector(vector)
290 }
291 GpuBackend::Rocm => {
292 matrix.dot_vector(vector)
294 }
295 }
296 }
297
298 pub fn spmv_optimized<T>(
300 &self,
301 matrix: &CsrArray<T>,
302 vector: &ArrayView1<T>,
303 device: Option<&GpuDevice>,
304 optimization_hint: OptimizationHint,
305 ) -> SparseResult<Array1<T>>
306 where
307 T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
308 {
309 match self.backend {
310 GpuBackend::Cuda => {
311 if let Some(ref handler) = self.cuda_handler {
312 let cuda_level = optimization_hint.to_cuda_level();
313 #[cfg(feature = "gpu")]
314 {
315 if let Some(device) = device {
316 handler.execute_optimized_spmv(matrix, vector, device, cuda_level)
317 } else {
318 return Err(SparseError::ComputationError(
319 "GPU device required for CUDA operations".to_string(),
320 ));
321 }
322 }
323 #[cfg(not(feature = "gpu"))]
324 handler.execute_spmv_cpu(matrix, vector)
325 } else {
326 Err(SparseError::ComputationError(
327 "CUDA handler not initialized".to_string(),
328 ))
329 }
330 }
331 GpuBackend::OpenCL => {
332 if let Some(ref handler) = self.opencl_handler {
333 let opencl_level = optimization_hint.to_opencl_level();
334 #[cfg(feature = "gpu")]
335 {
336 if let Some(device) = device {
337 handler.execute_optimized_spmv(matrix, vector, device, opencl_level)
338 } else {
339 return Err(SparseError::ComputationError(
340 "GPU device required for OpenCL operations".to_string(),
341 ));
342 }
343 }
344 #[cfg(not(feature = "gpu"))]
345 handler.execute_spmv_cpu(matrix, vector)
346 } else {
347 Err(SparseError::ComputationError(
348 "OpenCL handler not initialized".to_string(),
349 ))
350 }
351 }
352 GpuBackend::Metal => {
353 if let Some(ref handler) = self.metal_handler {
354 let metal_level = optimization_hint.to_metal_level();
355 #[cfg(feature = "gpu")]
356 {
357 if let Some(device) = device {
358 handler.execute_optimized_spmv(matrix, vector, device, metal_level)
359 } else {
360 return Err(SparseError::ComputationError(
361 "GPU device required for Metal operations".to_string(),
362 ));
363 }
364 }
365 #[cfg(not(feature = "gpu"))]
366 handler.execute_spmv_cpu(matrix, vector)
367 } else {
368 Err(SparseError::ComputationError(
369 "Metal handler not initialized".to_string(),
370 ))
371 }
372 }
373 GpuBackend::Wgpu => {
374 if let Some(ref handler) = self.vulkan_handler {
375 let vulkan_level = optimization_hint.to_vulkan_level();
376 #[cfg(feature = "gpu")]
377 {
378 if let Some(device) = device {
379 handler.execute_optimized_spmv(matrix, vector, device, vulkan_level)
380 } else {
381 return Err(SparseError::ComputationError(
382 "GPU device required for Vulkan operations".to_string(),
383 ));
384 }
385 }
386 #[cfg(not(feature = "gpu"))]
387 handler.execute_spmv_cpu(matrix, vector)
388 } else {
389 Err(SparseError::ComputationError(
390 "Vulkan handler not initialized".to_string(),
391 ))
392 }
393 }
394 #[cfg(not(feature = "gpu"))]
395 GpuBackend::Vulkan => {
396 if let Some(ref handler) = self.vulkan_handler {
397 let vulkan_level = optimization_hint.to_vulkan_level();
398 handler.execute_spmv_cpu(matrix, vector)
399 } else {
400 Err(SparseError::ComputationError(
401 "Vulkan handler not initialized".to_string(),
402 ))
403 }
404 }
405 GpuBackend::Cpu | GpuBackend::Rocm => {
406 self.spmv(matrix, vector, device)
408 }
409 }
410 }
411
412 pub fn backend(&self) -> GpuBackend {
414 self.backend
415 }
416
417 pub fn is_gpu_available(&self) -> bool {
419 !matches!(self.backend, GpuBackend::Cpu)
420 }
421
422 pub fn get_backend_info(&self) -> BackendInfo {
424 match self.backend {
425 GpuBackend::Cuda => BackendInfo {
426 name: "CUDA".to_string(),
427 version: "Unknown".to_string(),
428 device_count: 1, supports_double_precision: true,
430 max_memory_mb: 8192, },
432 GpuBackend::OpenCL => BackendInfo {
433 name: "OpenCL".to_string(),
434 version: "Unknown".to_string(),
435 device_count: 1,
436 supports_double_precision: true,
437 max_memory_mb: 4096, },
439 GpuBackend::Metal => BackendInfo {
440 name: "Metal".to_string(),
441 version: "Unknown".to_string(),
442 device_count: 1,
443 supports_double_precision: false, max_memory_mb: if MetalDeviceInfo::detect().is_apple_silicon {
445 16384
446 } else {
447 8192
448 },
449 },
450 GpuBackend::Wgpu => BackendInfo {
451 name: "Vulkan".to_string(),
452 version: "Unknown".to_string(),
453 device_count: 1,
454 supports_double_precision: true,
455 max_memory_mb: 8192, },
457 #[cfg(not(feature = "gpu"))]
458 GpuBackend::Vulkan => BackendInfo {
459 name: "Vulkan".to_string(),
460 version: "Unknown".to_string(),
461 device_count: 1,
462 supports_double_precision: true,
463 max_memory_mb: 8192, },
465 GpuBackend::Cpu | GpuBackend::Rocm => BackendInfo {
466 name: "CPU".to_string(),
467 version: "Fallback".to_string(),
468 device_count: 0,
469 supports_double_precision: true,
470 max_memory_mb: 0,
471 },
472 }
473 }
474}
475
476impl Default for GpuSpMatVec {
477 fn default() -> Self {
478 Self::new().unwrap_or_else(|_| Self {
479 backend: GpuBackend::Cpu,
480 cuda_handler: None,
481 opencl_handler: None,
482 metal_handler: None,
483 vulkan_handler: None,
484 })
485 }
486}
487
488#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
490pub enum OptimizationHint {
491 Basic,
493 #[default]
495 Balanced,
496 Maximum,
498 MemoryOptimized,
500}
501
502impl OptimizationHint {
503 pub fn to_cuda_level(self) -> CudaOptimizationLevel {
505 match self {
506 OptimizationHint::Basic => CudaOptimizationLevel::Basic,
507 OptimizationHint::Balanced => CudaOptimizationLevel::Vectorized,
508 OptimizationHint::Maximum => CudaOptimizationLevel::WarpLevel,
509 OptimizationHint::MemoryOptimized => CudaOptimizationLevel::Basic,
510 }
511 }
512
513 pub fn to_opencl_level(self) -> OpenCLOptimizationLevel {
515 match self {
516 OptimizationHint::Basic => OpenCLOptimizationLevel::Basic,
517 OptimizationHint::Balanced => OpenCLOptimizationLevel::Workgroup,
518 OptimizationHint::Maximum => OpenCLOptimizationLevel::Vectorized,
519 OptimizationHint::MemoryOptimized => OpenCLOptimizationLevel::Workgroup,
520 }
521 }
522
523 pub fn to_metal_level(self) -> MetalOptimizationLevel {
525 match self {
526 OptimizationHint::Basic => MetalOptimizationLevel::Basic,
527 OptimizationHint::Balanced => MetalOptimizationLevel::SimdGroup,
528 OptimizationHint::Maximum => MetalOptimizationLevel::AppleSilicon,
529 OptimizationHint::MemoryOptimized => MetalOptimizationLevel::AppleSilicon,
530 }
531 }
532
533 pub fn to_vulkan_level(self) -> VulkanOptimizationLevel {
535 match self {
536 OptimizationHint::Basic => VulkanOptimizationLevel::Basic,
537 OptimizationHint::Balanced => VulkanOptimizationLevel::ComputeShader,
538 OptimizationHint::Maximum => VulkanOptimizationLevel::Maximum,
539 OptimizationHint::MemoryOptimized => VulkanOptimizationLevel::Subgroup,
540 }
541 }
542}
543
544#[derive(Debug, Clone)]
546pub struct BackendInfo {
547 pub name: String,
548 pub version: String,
549 pub device_count: usize,
550 pub supports_double_precision: bool,
551 pub max_memory_mb: usize,
552}
553
554pub mod convenience {
556 use super::*;
557
558 pub fn gpu_spmv<T>(matrix: &CsrArray<T>, vector: &ArrayView1<T>) -> SparseResult<Array1<T>>
560 where
561 T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
562 {
563 let gpu_handler = GpuSpMatVec::new()?;
564 gpu_handler.spmv(matrix, vector, None)
565 }
566
567 pub fn gpu_spmv_optimized<T>(
569 matrix: &CsrArray<T>,
570 vector: &ArrayView1<T>,
571 optimization: OptimizationHint,
572 ) -> SparseResult<Array1<T>>
573 where
574 T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
575 {
576 let gpu_handler = GpuSpMatVec::new()?;
577 gpu_handler.spmv_optimized(matrix, vector, None, optimization)
578 }
579
580 pub fn available_backends() -> Vec<GpuBackend> {
582 let mut backends = Vec::new();
583
584 if GpuSpMatVec::is_cuda_available() {
585 backends.push(GpuBackend::Cuda);
586 }
587
588 if GpuSpMatVec::is_vulkan_available() {
589 backends.push(GpuBackend::Wgpu);
590 }
591
592 if GpuSpMatVec::is_opencl_available() {
593 backends.push(GpuBackend::OpenCL);
594 }
595
596 if GpuSpMatVec::is_metal_available() {
597 backends.push(GpuBackend::Metal);
598 }
599
600 backends.push(GpuBackend::Cpu); backends
603 }
604}
605
606#[cfg(test)]
607mod tests {
608 use super::*;
609 use scirs2_core::ndarray::Array1;
610
611 #[test]
612 fn test_gpu_spmv_creation() {
613 let gpu_spmv = GpuSpMatVec::new();
614 assert!(gpu_spmv.is_ok());
615 }
616
617 #[test]
618 fn test_backend_detection() {
619 let backend = GpuSpMatVec::detect_best_backend();
620
621 match backend {
623 GpuBackend::Cuda
624 | GpuBackend::OpenCL
625 | GpuBackend::Metal
626 | GpuBackend::Cpu
627 | GpuBackend::Wgpu
628 | GpuBackend::Rocm => (),
629 #[cfg(not(feature = "gpu"))]
630 GpuBackend::Vulkan => (),
631 }
632 }
633
634 #[test]
635 fn test_optimization_hint_conversions() {
636 let hint = OptimizationHint::Maximum;
637
638 let cuda_level = hint.to_cuda_level();
639 let opencl_level = hint.to_opencl_level();
640 let metal_level = hint.to_metal_level();
641
642 assert_eq!(cuda_level, CudaOptimizationLevel::WarpLevel);
643 assert_eq!(opencl_level, OpenCLOptimizationLevel::Vectorized);
644 assert_eq!(metal_level, MetalOptimizationLevel::AppleSilicon);
645 }
646
647 #[test]
648 fn test_backend_info() {
649 let gpu_spmv = GpuSpMatVec::new().expect("Operation failed");
650 let info = gpu_spmv.get_backend_info();
651
652 assert!(!info.name.is_empty());
653 assert!(!info.version.is_empty());
654 }
655
656 #[test]
657 fn test_convenience_functions() {
658 let backends = convenience::available_backends();
659 assert!(!backends.is_empty());
660 assert!(backends.contains(&GpuBackend::Cpu)); }
662
663 #[test]
664 fn test_is_gpu_available() {
665 let gpu_spmv = GpuSpMatVec::new().expect("Operation failed");
666
667 let _available = gpu_spmv.is_gpu_available();
669 }
670
671 #[test]
672 fn test_optimization_hint_default() {
673 assert_eq!(OptimizationHint::default(), OptimizationHint::Balanced);
674 }
675}