1pub mod construction;
7pub mod cuda;
8pub mod metal;
9pub mod opencl;
10pub mod solvers;
11pub mod spmv;
12pub mod vulkan;
13
14#[cfg(feature = "gpu")]
16pub use scirs2_core::gpu::{
17 GpuBackend, GpuBuffer, GpuContext, GpuDataType, GpuDevice, GpuError, GpuKernelHandle,
18};
19
20#[cfg(not(feature = "gpu"))]
22pub use crate::gpu_ops::{
23 GpuBackend, GpuBuffer, GpuDataType, GpuDevice, GpuError, GpuKernelHandle,
24};
25
26pub use cuda::{CudaMemoryManager, CudaOptimizationLevel, CudaSpMatVec};
28pub use metal::{MetalDeviceInfo, MetalMemoryManager, MetalOptimizationLevel, MetalSpMatVec};
29pub use opencl::{
30 OpenCLMemoryManager, OpenCLOptimizationLevel, OpenCLPlatformInfo, OpenCLSpMatVec,
31};
32pub use vulkan::{
33 VulkanDeviceInfo, VulkanDeviceType, VulkanMemoryManager, VulkanOptimizationLevel,
34 VulkanSpMatVec,
35};
36
37use crate::csr_array::CsrArray;
38use crate::error::{SparseError, SparseResult};
39use crate::sparray::SparseArray;
40use scirs2_core::ndarray::{Array1, ArrayView1};
41use scirs2_core::numeric::{Float, SparseElement};
42use std::fmt::Debug;
43
44pub struct GpuSpMatVec {
48 backend: GpuBackend,
49 cuda_handler: Option<CudaSpMatVec>,
50 opencl_handler: Option<OpenCLSpMatVec>,
51 metal_handler: Option<MetalSpMatVec>,
52 vulkan_handler: Option<VulkanSpMatVec>,
53}
54
55impl GpuSpMatVec {
56 pub fn new() -> SparseResult<Self> {
58 let backend = Self::detect_best_backend();
59
60 let mut handler = Self {
61 backend,
62 cuda_handler: None,
63 opencl_handler: None,
64 metal_handler: None,
65 vulkan_handler: None,
66 };
67
68 handler.initialize_backend()?;
70
71 Ok(handler)
72 }
73
74 pub fn with_backend(backend: GpuBackend) -> SparseResult<Self> {
76 let mut handler = Self {
77 backend,
78 cuda_handler: None,
79 opencl_handler: None,
80 metal_handler: None,
81 vulkan_handler: None,
82 };
83
84 handler.initialize_backend()?;
85
86 Ok(handler)
87 }
88
89 fn initialize_backend(&mut self) -> SparseResult<()> {
91 match self.backend {
92 GpuBackend::Cuda => {
93 self.cuda_handler = Some(CudaSpMatVec::new()?);
94 }
95 GpuBackend::OpenCL => {
96 self.opencl_handler = Some(OpenCLSpMatVec::new()?);
97 }
98 GpuBackend::Metal => {
99 self.metal_handler = Some(MetalSpMatVec::new()?);
100 }
101 GpuBackend::Wgpu => {
102 self.vulkan_handler = Some(VulkanSpMatVec::new()?);
103 }
104 #[cfg(not(feature = "gpu"))]
105 GpuBackend::Vulkan => {
106 self.vulkan_handler = Some(VulkanSpMatVec::new()?);
107 }
108 GpuBackend::Cpu => {
109 }
111 GpuBackend::Rocm => {
112 self.backend = GpuBackend::Cpu;
114 }
115 }
116
117 Ok(())
118 }
119
120 fn detect_best_backend() -> GpuBackend {
122 #[cfg(target_os = "macos")]
124 {
125 if Self::is_metal_available() {
126 return GpuBackend::Metal;
127 }
128 }
129
130 if Self::is_cuda_available() {
131 return GpuBackend::Cuda;
132 }
133
134 if Self::is_vulkan_available() {
135 return GpuBackend::Wgpu;
136 }
137
138 if Self::is_opencl_available() {
139 return GpuBackend::OpenCL;
140 }
141
142 GpuBackend::Cpu
143 }
144
145 fn is_cuda_available() -> bool {
147 #[cfg(feature = "gpu")]
149 {
150 std::env::var("CUDA_PATH").is_ok() || std::path::Path::new("/usr/local/cuda").exists()
152 }
153 #[cfg(not(feature = "gpu"))]
154 false
155 }
156
157 fn is_opencl_available() -> bool {
159 #[cfg(feature = "gpu")]
161 {
162 true
164 }
165 #[cfg(not(feature = "gpu"))]
166 false
167 }
168
169 fn is_metal_available() -> bool {
171 #[cfg(target_os = "macos")]
172 {
173 true
175 }
176 #[cfg(not(target_os = "macos"))]
177 false
178 }
179
180 fn is_vulkan_available() -> bool {
182 #[cfg(feature = "gpu")]
183 {
184 std::env::var("VULKAN_SDK").is_ok()
186 || std::path::Path::new("/usr/share/vulkan").exists()
187 || std::path::Path::new("/usr/local/share/vulkan").exists()
188 }
189 #[cfg(not(feature = "gpu"))]
190 false
191 }
192
193 pub fn spmv<T>(
195 &self,
196 matrix: &CsrArray<T>,
197 vector: &ArrayView1<T>,
198 device: Option<&GpuDevice>,
199 ) -> SparseResult<Array1<T>>
200 where
201 T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
202 {
203 match self.backend {
204 GpuBackend::Cuda => {
205 if let Some(ref handler) = self.cuda_handler {
206 #[cfg(feature = "gpu")]
207 {
208 if let Some(device) = device {
209 handler.execute_spmv(matrix, vector, device)
210 } else {
211 matrix.dot_vector(vector)
213 }
214 }
215 #[cfg(not(feature = "gpu"))]
216 handler.execute_spmv_cpu(matrix, vector)
217 } else {
218 Err(SparseError::ComputationError(
219 "CUDA handler not initialized".to_string(),
220 ))
221 }
222 }
223 GpuBackend::OpenCL => {
224 if let Some(ref handler) = self.opencl_handler {
225 #[cfg(feature = "gpu")]
226 {
227 if let Some(device) = device {
228 handler.execute_spmv(matrix, vector, device)
229 } else {
230 matrix.dot_vector(vector)
232 }
233 }
234 #[cfg(not(feature = "gpu"))]
235 handler.execute_spmv_cpu(matrix, vector)
236 } else {
237 Err(SparseError::ComputationError(
238 "OpenCL handler not initialized".to_string(),
239 ))
240 }
241 }
242 GpuBackend::Metal => {
243 if let Some(ref handler) = self.metal_handler {
244 #[cfg(feature = "gpu")]
245 {
246 if let Some(device) = device {
247 handler.execute_spmv(matrix, vector, device)
248 } else {
249 matrix.dot_vector(vector)
251 }
252 }
253 #[cfg(not(feature = "gpu"))]
254 handler.execute_spmv_cpu(matrix, vector)
255 } else {
256 Err(SparseError::ComputationError(
257 "Metal handler not initialized".to_string(),
258 ))
259 }
260 }
261 GpuBackend::Wgpu => {
262 if let Some(ref handler) = self.vulkan_handler {
263 #[cfg(feature = "gpu")]
264 {
265 if let Some(device) = device {
266 handler.execute_spmv(matrix, vector, device)
267 } else {
268 matrix.dot_vector(vector)
270 }
271 }
272 #[cfg(not(feature = "gpu"))]
273 handler.execute_spmv_cpu(matrix, vector)
274 } else {
275 Err(SparseError::ComputationError(
276 "Vulkan handler not initialized".to_string(),
277 ))
278 }
279 }
280 #[cfg(not(feature = "gpu"))]
281 GpuBackend::Vulkan => {
282 if let Some(ref handler) = self.vulkan_handler {
283 handler.execute_spmv_cpu(matrix, vector)
284 } else {
285 Err(SparseError::ComputationError(
286 "Vulkan handler not initialized".to_string(),
287 ))
288 }
289 }
290 GpuBackend::Cpu => {
291 matrix.dot_vector(vector)
293 }
294 GpuBackend::Rocm => {
295 matrix.dot_vector(vector)
297 }
298 }
299 }
300
301 pub fn spmv_optimized<T>(
303 &self,
304 matrix: &CsrArray<T>,
305 vector: &ArrayView1<T>,
306 device: Option<&GpuDevice>,
307 optimization_hint: OptimizationHint,
308 ) -> SparseResult<Array1<T>>
309 where
310 T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
311 {
312 match self.backend {
313 GpuBackend::Cuda => {
314 if let Some(ref handler) = self.cuda_handler {
315 let cuda_level = optimization_hint.to_cuda_level();
316 #[cfg(feature = "gpu")]
317 {
318 if let Some(device) = device {
319 handler.execute_optimized_spmv(matrix, vector, device, cuda_level)
320 } else {
321 return Err(SparseError::ComputationError(
322 "GPU device required for CUDA operations".to_string(),
323 ));
324 }
325 }
326 #[cfg(not(feature = "gpu"))]
327 handler.execute_spmv_cpu(matrix, vector)
328 } else {
329 Err(SparseError::ComputationError(
330 "CUDA handler not initialized".to_string(),
331 ))
332 }
333 }
334 GpuBackend::OpenCL => {
335 if let Some(ref handler) = self.opencl_handler {
336 let opencl_level = optimization_hint.to_opencl_level();
337 #[cfg(feature = "gpu")]
338 {
339 if let Some(device) = device {
340 handler.execute_optimized_spmv(matrix, vector, device, opencl_level)
341 } else {
342 return Err(SparseError::ComputationError(
343 "GPU device required for OpenCL operations".to_string(),
344 ));
345 }
346 }
347 #[cfg(not(feature = "gpu"))]
348 handler.execute_spmv_cpu(matrix, vector)
349 } else {
350 Err(SparseError::ComputationError(
351 "OpenCL handler not initialized".to_string(),
352 ))
353 }
354 }
355 GpuBackend::Metal => {
356 if let Some(ref handler) = self.metal_handler {
357 let metal_level = optimization_hint.to_metal_level();
358 #[cfg(feature = "gpu")]
359 {
360 if let Some(device) = device {
361 handler.execute_optimized_spmv(matrix, vector, device, metal_level)
362 } else {
363 return Err(SparseError::ComputationError(
364 "GPU device required for Metal operations".to_string(),
365 ));
366 }
367 }
368 #[cfg(not(feature = "gpu"))]
369 handler.execute_spmv_cpu(matrix, vector)
370 } else {
371 Err(SparseError::ComputationError(
372 "Metal handler not initialized".to_string(),
373 ))
374 }
375 }
376 GpuBackend::Wgpu => {
377 if let Some(ref handler) = self.vulkan_handler {
378 let vulkan_level = optimization_hint.to_vulkan_level();
379 #[cfg(feature = "gpu")]
380 {
381 if let Some(device) = device {
382 handler.execute_optimized_spmv(matrix, vector, device, vulkan_level)
383 } else {
384 return Err(SparseError::ComputationError(
385 "GPU device required for Vulkan operations".to_string(),
386 ));
387 }
388 }
389 #[cfg(not(feature = "gpu"))]
390 handler.execute_spmv_cpu(matrix, vector)
391 } else {
392 Err(SparseError::ComputationError(
393 "Vulkan handler not initialized".to_string(),
394 ))
395 }
396 }
397 #[cfg(not(feature = "gpu"))]
398 GpuBackend::Vulkan => {
399 if let Some(ref handler) = self.vulkan_handler {
400 let vulkan_level = optimization_hint.to_vulkan_level();
401 handler.execute_spmv_cpu(matrix, vector)
402 } else {
403 Err(SparseError::ComputationError(
404 "Vulkan handler not initialized".to_string(),
405 ))
406 }
407 }
408 GpuBackend::Cpu | GpuBackend::Rocm => {
409 self.spmv(matrix, vector, device)
411 }
412 }
413 }
414
415 pub fn backend(&self) -> GpuBackend {
417 self.backend
418 }
419
420 pub fn is_gpu_available(&self) -> bool {
422 !matches!(self.backend, GpuBackend::Cpu)
423 }
424
425 pub fn get_backend_info(&self) -> BackendInfo {
427 match self.backend {
428 GpuBackend::Cuda => BackendInfo {
429 name: "CUDA".to_string(),
430 version: "Unknown".to_string(),
431 device_count: 1, supports_double_precision: true,
433 max_memory_mb: 8192, },
435 GpuBackend::OpenCL => BackendInfo {
436 name: "OpenCL".to_string(),
437 version: "Unknown".to_string(),
438 device_count: 1,
439 supports_double_precision: true,
440 max_memory_mb: 4096, },
442 GpuBackend::Metal => BackendInfo {
443 name: "Metal".to_string(),
444 version: "Unknown".to_string(),
445 device_count: 1,
446 supports_double_precision: false, max_memory_mb: if MetalDeviceInfo::detect().is_apple_silicon {
448 16384
449 } else {
450 8192
451 },
452 },
453 GpuBackend::Wgpu => BackendInfo {
454 name: "Vulkan".to_string(),
455 version: "Unknown".to_string(),
456 device_count: 1,
457 supports_double_precision: true,
458 max_memory_mb: 8192, },
460 #[cfg(not(feature = "gpu"))]
461 GpuBackend::Vulkan => BackendInfo {
462 name: "Vulkan".to_string(),
463 version: "Unknown".to_string(),
464 device_count: 1,
465 supports_double_precision: true,
466 max_memory_mb: 8192, },
468 GpuBackend::Cpu | GpuBackend::Rocm => BackendInfo {
469 name: "CPU".to_string(),
470 version: "Fallback".to_string(),
471 device_count: 0,
472 supports_double_precision: true,
473 max_memory_mb: 0,
474 },
475 }
476 }
477}
478
479impl Default for GpuSpMatVec {
480 fn default() -> Self {
481 Self::new().unwrap_or_else(|_| Self {
482 backend: GpuBackend::Cpu,
483 cuda_handler: None,
484 opencl_handler: None,
485 metal_handler: None,
486 vulkan_handler: None,
487 })
488 }
489}
490
491#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
493pub enum OptimizationHint {
494 Basic,
496 #[default]
498 Balanced,
499 Maximum,
501 MemoryOptimized,
503}
504
505impl OptimizationHint {
506 pub fn to_cuda_level(self) -> CudaOptimizationLevel {
508 match self {
509 OptimizationHint::Basic => CudaOptimizationLevel::Basic,
510 OptimizationHint::Balanced => CudaOptimizationLevel::Vectorized,
511 OptimizationHint::Maximum => CudaOptimizationLevel::WarpLevel,
512 OptimizationHint::MemoryOptimized => CudaOptimizationLevel::Basic,
513 }
514 }
515
516 pub fn to_opencl_level(self) -> OpenCLOptimizationLevel {
518 match self {
519 OptimizationHint::Basic => OpenCLOptimizationLevel::Basic,
520 OptimizationHint::Balanced => OpenCLOptimizationLevel::Workgroup,
521 OptimizationHint::Maximum => OpenCLOptimizationLevel::Vectorized,
522 OptimizationHint::MemoryOptimized => OpenCLOptimizationLevel::Workgroup,
523 }
524 }
525
526 pub fn to_metal_level(self) -> MetalOptimizationLevel {
528 match self {
529 OptimizationHint::Basic => MetalOptimizationLevel::Basic,
530 OptimizationHint::Balanced => MetalOptimizationLevel::SimdGroup,
531 OptimizationHint::Maximum => MetalOptimizationLevel::AppleSilicon,
532 OptimizationHint::MemoryOptimized => MetalOptimizationLevel::AppleSilicon,
533 }
534 }
535
536 pub fn to_vulkan_level(self) -> VulkanOptimizationLevel {
538 match self {
539 OptimizationHint::Basic => VulkanOptimizationLevel::Basic,
540 OptimizationHint::Balanced => VulkanOptimizationLevel::ComputeShader,
541 OptimizationHint::Maximum => VulkanOptimizationLevel::Maximum,
542 OptimizationHint::MemoryOptimized => VulkanOptimizationLevel::Subgroup,
543 }
544 }
545}
546
547#[derive(Debug, Clone)]
549pub struct BackendInfo {
550 pub name: String,
551 pub version: String,
552 pub device_count: usize,
553 pub supports_double_precision: bool,
554 pub max_memory_mb: usize,
555}
556
557pub mod convenience {
559 use super::*;
560
561 pub fn gpu_spmv<T>(matrix: &CsrArray<T>, vector: &ArrayView1<T>) -> SparseResult<Array1<T>>
563 where
564 T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
565 {
566 let gpu_handler = GpuSpMatVec::new()?;
567 gpu_handler.spmv(matrix, vector, None)
568 }
569
570 pub fn gpu_spmv_optimized<T>(
572 matrix: &CsrArray<T>,
573 vector: &ArrayView1<T>,
574 optimization: OptimizationHint,
575 ) -> SparseResult<Array1<T>>
576 where
577 T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
578 {
579 let gpu_handler = GpuSpMatVec::new()?;
580 gpu_handler.spmv_optimized(matrix, vector, None, optimization)
581 }
582
583 pub fn available_backends() -> Vec<GpuBackend> {
585 let mut backends = Vec::new();
586
587 if GpuSpMatVec::is_cuda_available() {
588 backends.push(GpuBackend::Cuda);
589 }
590
591 if GpuSpMatVec::is_vulkan_available() {
592 backends.push(GpuBackend::Wgpu);
593 }
594
595 if GpuSpMatVec::is_opencl_available() {
596 backends.push(GpuBackend::OpenCL);
597 }
598
599 if GpuSpMatVec::is_metal_available() {
600 backends.push(GpuBackend::Metal);
601 }
602
603 backends.push(GpuBackend::Cpu); backends
606 }
607}
608
609#[cfg(test)]
610mod tests {
611 use super::*;
612 use scirs2_core::ndarray::Array1;
613
614 #[test]
615 fn test_gpu_spmv_creation() {
616 let gpu_spmv = GpuSpMatVec::new();
617 assert!(gpu_spmv.is_ok());
618 }
619
620 #[test]
621 fn test_backend_detection() {
622 let backend = GpuSpMatVec::detect_best_backend();
623
624 match backend {
626 GpuBackend::Cuda
627 | GpuBackend::OpenCL
628 | GpuBackend::Metal
629 | GpuBackend::Cpu
630 | GpuBackend::Wgpu
631 | GpuBackend::Rocm => (),
632 #[cfg(not(feature = "gpu"))]
633 GpuBackend::Vulkan => (),
634 }
635 }
636
637 #[test]
638 fn test_optimization_hint_conversions() {
639 let hint = OptimizationHint::Maximum;
640
641 let cuda_level = hint.to_cuda_level();
642 let opencl_level = hint.to_opencl_level();
643 let metal_level = hint.to_metal_level();
644
645 assert_eq!(cuda_level, CudaOptimizationLevel::WarpLevel);
646 assert_eq!(opencl_level, OpenCLOptimizationLevel::Vectorized);
647 assert_eq!(metal_level, MetalOptimizationLevel::AppleSilicon);
648 }
649
650 #[test]
651 fn test_backend_info() {
652 let gpu_spmv = GpuSpMatVec::new().expect("Operation failed");
653 let info = gpu_spmv.get_backend_info();
654
655 assert!(!info.name.is_empty());
656 assert!(!info.version.is_empty());
657 }
658
659 #[test]
660 fn test_convenience_functions() {
661 let backends = convenience::available_backends();
662 assert!(!backends.is_empty());
663 assert!(backends.contains(&GpuBackend::Cpu)); }
665
666 #[test]
667 fn test_is_gpu_available() {
668 let gpu_spmv = GpuSpMatVec::new().expect("Operation failed");
669
670 let _available = gpu_spmv.is_gpu_available();
672 }
673
674 #[test]
675 fn test_optimization_hint_default() {
676 assert_eq!(OptimizationHint::default(), OptimizationHint::Balanced);
677 }
678}