1pub mod cuda;
7pub mod metal;
8pub mod opencl;
9
10#[cfg(feature = "gpu")]
12pub use scirs2_core::gpu::{
13 GpuBackend, GpuBuffer, GpuContext, GpuDataType, GpuDevice, GpuError, GpuKernelHandle,
14};
15
16#[cfg(not(feature = "gpu"))]
18pub use crate::gpu_ops::{
19 GpuBackend, GpuBuffer, GpuDataType, GpuDevice, GpuError, GpuKernelHandle,
20};
21
22pub use cuda::{CudaMemoryManager, CudaOptimizationLevel, CudaSpMatVec};
24pub use metal::{MetalDeviceInfo, MetalMemoryManager, MetalOptimizationLevel, MetalSpMatVec};
25pub use opencl::{
26 OpenCLMemoryManager, OpenCLOptimizationLevel, OpenCLPlatformInfo, OpenCLSpMatVec,
27};
28
29use crate::csr_array::CsrArray;
30use crate::error::{SparseError, SparseResult};
31use crate::sparray::SparseArray;
32use scirs2_core::ndarray::{Array1, ArrayView1};
33use scirs2_core::numeric::Float;
34use std::fmt::Debug;
35
36pub struct GpuSpMatVec {
40 backend: GpuBackend,
41 cuda_handler: Option<CudaSpMatVec>,
42 opencl_handler: Option<OpenCLSpMatVec>,
43 metal_handler: Option<MetalSpMatVec>,
44}
45
46impl GpuSpMatVec {
47 pub fn new() -> SparseResult<Self> {
49 let backend = Self::detect_best_backend();
50
51 let mut handler = Self {
52 backend,
53 cuda_handler: None,
54 opencl_handler: None,
55 metal_handler: None,
56 };
57
58 handler.initialize_backend()?;
60
61 Ok(handler)
62 }
63
64 pub fn with_backend(backend: GpuBackend) -> SparseResult<Self> {
66 let mut handler = Self {
67 backend,
68 cuda_handler: None,
69 opencl_handler: None,
70 metal_handler: None,
71 };
72
73 handler.initialize_backend()?;
74
75 Ok(handler)
76 }
77
78 fn initialize_backend(&mut self) -> SparseResult<()> {
80 match self.backend {
81 GpuBackend::Cuda => {
82 self.cuda_handler = Some(CudaSpMatVec::new()?);
83 }
84 GpuBackend::OpenCL => {
85 self.opencl_handler = Some(OpenCLSpMatVec::new()?);
86 }
87 GpuBackend::Metal => {
88 self.metal_handler = Some(MetalSpMatVec::new()?);
89 }
90 GpuBackend::Cpu => {
91 }
93 _ => {
94 self.backend = GpuBackend::Cpu;
96 }
97 }
98
99 Ok(())
100 }
101
102 fn detect_best_backend() -> GpuBackend {
104 #[cfg(target_os = "macos")]
106 {
107 if Self::is_metal_available() {
108 return GpuBackend::Metal;
109 }
110 }
111
112 if Self::is_cuda_available() {
113 return GpuBackend::Cuda;
114 }
115
116 if Self::is_opencl_available() {
117 return GpuBackend::OpenCL;
118 }
119
120 GpuBackend::Cpu
121 }
122
123 fn is_cuda_available() -> bool {
125 #[cfg(feature = "gpu")]
127 {
128 std::env::var("CUDA_PATH").is_ok() || std::path::Path::new("/usr/local/cuda").exists()
130 }
131 #[cfg(not(feature = "gpu"))]
132 false
133 }
134
135 fn is_opencl_available() -> bool {
137 #[cfg(feature = "gpu")]
139 {
140 true
142 }
143 #[cfg(not(feature = "gpu"))]
144 false
145 }
146
147 fn is_metal_available() -> bool {
149 #[cfg(target_os = "macos")]
150 {
151 true
153 }
154 #[cfg(not(target_os = "macos"))]
155 false
156 }
157
158 pub fn spmv<T>(
160 &self,
161 matrix: &CsrArray<T>,
162 vector: &ArrayView1<T>,
163 device: Option<&GpuDevice>,
164 ) -> SparseResult<Array1<T>>
165 where
166 T: Float + Debug + Copy + GpuDataType + std::iter::Sum,
167 {
168 match self.backend {
169 GpuBackend::Cuda => {
170 if let Some(ref handler) = self.cuda_handler {
171 #[cfg(feature = "gpu")]
172 {
173 if let Some(device) = device {
174 handler.execute_spmv(matrix, vector, device)
175 } else {
176 return Err(SparseError::ComputationError(
177 "GPU device required for CUDA operations".to_string(),
178 ));
179 }
180 }
181 #[cfg(not(feature = "gpu"))]
182 handler.execute_spmv_cpu(matrix, vector)
183 } else {
184 Err(SparseError::ComputationError(
185 "CUDA handler not initialized".to_string(),
186 ))
187 }
188 }
189 GpuBackend::OpenCL => {
190 if let Some(ref handler) = self.opencl_handler {
191 #[cfg(feature = "gpu")]
192 {
193 if let Some(device) = device {
194 handler.execute_spmv(matrix, vector, device)
195 } else {
196 return Err(SparseError::ComputationError(
197 "GPU device required for OpenCL operations".to_string(),
198 ));
199 }
200 }
201 #[cfg(not(feature = "gpu"))]
202 handler.execute_spmv_cpu(matrix, vector)
203 } else {
204 Err(SparseError::ComputationError(
205 "OpenCL handler not initialized".to_string(),
206 ))
207 }
208 }
209 GpuBackend::Metal => {
210 if let Some(ref handler) = self.metal_handler {
211 #[cfg(feature = "gpu")]
212 {
213 if let Some(device) = device {
214 handler.execute_spmv(matrix, vector, device)
215 } else {
216 return Err(SparseError::ComputationError(
217 "GPU device required for Metal operations".to_string(),
218 ));
219 }
220 }
221 #[cfg(not(feature = "gpu"))]
222 handler.execute_spmv_cpu(matrix, vector)
223 } else {
224 Err(SparseError::ComputationError(
225 "Metal handler not initialized".to_string(),
226 ))
227 }
228 }
229 GpuBackend::Cpu => {
230 matrix.dot_vector(vector)
232 }
233 _ => {
234 matrix.dot_vector(vector)
236 }
237 }
238 }
239
240 pub fn spmv_optimized<T>(
242 &self,
243 matrix: &CsrArray<T>,
244 vector: &ArrayView1<T>,
245 device: Option<&GpuDevice>,
246 optimization_hint: OptimizationHint,
247 ) -> SparseResult<Array1<T>>
248 where
249 T: Float + Debug + Copy + GpuDataType + std::iter::Sum,
250 {
251 match self.backend {
252 GpuBackend::Cuda => {
253 if let Some(ref handler) = self.cuda_handler {
254 let cuda_level = optimization_hint.to_cuda_level();
255 #[cfg(feature = "gpu")]
256 {
257 if let Some(device) = device {
258 handler.execute_optimized_spmv(matrix, vector, device, cuda_level)
259 } else {
260 return Err(SparseError::ComputationError(
261 "GPU device required for CUDA operations".to_string(),
262 ));
263 }
264 }
265 #[cfg(not(feature = "gpu"))]
266 handler.execute_spmv_cpu(matrix, vector)
267 } else {
268 Err(SparseError::ComputationError(
269 "CUDA handler not initialized".to_string(),
270 ))
271 }
272 }
273 GpuBackend::OpenCL => {
274 if let Some(ref handler) = self.opencl_handler {
275 let opencl_level = optimization_hint.to_opencl_level();
276 #[cfg(feature = "gpu")]
277 {
278 if let Some(device) = device {
279 handler.execute_optimized_spmv(matrix, vector, device, opencl_level)
280 } else {
281 return Err(SparseError::ComputationError(
282 "GPU device required for OpenCL operations".to_string(),
283 ));
284 }
285 }
286 #[cfg(not(feature = "gpu"))]
287 handler.execute_spmv_cpu(matrix, vector)
288 } else {
289 Err(SparseError::ComputationError(
290 "OpenCL handler not initialized".to_string(),
291 ))
292 }
293 }
294 GpuBackend::Metal => {
295 if let Some(ref handler) = self.metal_handler {
296 let metal_level = optimization_hint.to_metal_level();
297 #[cfg(feature = "gpu")]
298 {
299 if let Some(device) = device {
300 handler.execute_optimized_spmv(matrix, vector, device, metal_level)
301 } else {
302 return Err(SparseError::ComputationError(
303 "GPU device required for Metal operations".to_string(),
304 ));
305 }
306 }
307 #[cfg(not(feature = "gpu"))]
308 handler.execute_spmv_cpu(matrix, vector)
309 } else {
310 Err(SparseError::ComputationError(
311 "Metal handler not initialized".to_string(),
312 ))
313 }
314 }
315 _ => {
316 self.spmv(matrix, vector, device)
318 }
319 }
320 }
321
322 pub fn backend(&self) -> GpuBackend {
324 self.backend
325 }
326
327 pub fn is_gpu_available(&self) -> bool {
329 !matches!(self.backend, GpuBackend::Cpu)
330 }
331
332 pub fn get_backend_info(&self) -> BackendInfo {
334 match self.backend {
335 GpuBackend::Cuda => BackendInfo {
336 name: "CUDA".to_string(),
337 version: "Unknown".to_string(),
338 device_count: 1, supports_double_precision: true,
340 max_memory_mb: 8192, },
342 GpuBackend::OpenCL => BackendInfo {
343 name: "OpenCL".to_string(),
344 version: "Unknown".to_string(),
345 device_count: 1,
346 supports_double_precision: true,
347 max_memory_mb: 4096, },
349 GpuBackend::Metal => BackendInfo {
350 name: "Metal".to_string(),
351 version: "Unknown".to_string(),
352 device_count: 1,
353 supports_double_precision: false, max_memory_mb: if MetalDeviceInfo::detect().is_apple_silicon {
355 16384
356 } else {
357 8192
358 },
359 },
360 _ => BackendInfo {
361 name: "CPU".to_string(),
362 version: "Fallback".to_string(),
363 device_count: 0,
364 supports_double_precision: true,
365 max_memory_mb: 0,
366 },
367 }
368 }
369}
370
371impl Default for GpuSpMatVec {
372 fn default() -> Self {
373 Self::new().unwrap_or_else(|_| Self {
374 backend: GpuBackend::Cpu,
375 cuda_handler: None,
376 opencl_handler: None,
377 metal_handler: None,
378 })
379 }
380}
381
382#[derive(Debug, Clone, Copy, PartialEq, Eq)]
384pub enum OptimizationHint {
385 Basic,
387 Balanced,
389 Maximum,
391 MemoryOptimized,
393}
394
395impl OptimizationHint {
396 pub fn to_cuda_level(self) -> CudaOptimizationLevel {
398 match self {
399 OptimizationHint::Basic => CudaOptimizationLevel::Basic,
400 OptimizationHint::Balanced => CudaOptimizationLevel::Vectorized,
401 OptimizationHint::Maximum => CudaOptimizationLevel::WarpLevel,
402 OptimizationHint::MemoryOptimized => CudaOptimizationLevel::Basic,
403 }
404 }
405
406 pub fn to_opencl_level(self) -> OpenCLOptimizationLevel {
408 match self {
409 OptimizationHint::Basic => OpenCLOptimizationLevel::Basic,
410 OptimizationHint::Balanced => OpenCLOptimizationLevel::Workgroup,
411 OptimizationHint::Maximum => OpenCLOptimizationLevel::Vectorized,
412 OptimizationHint::MemoryOptimized => OpenCLOptimizationLevel::Workgroup,
413 }
414 }
415
416 pub fn to_metal_level(self) -> MetalOptimizationLevel {
418 match self {
419 OptimizationHint::Basic => MetalOptimizationLevel::Basic,
420 OptimizationHint::Balanced => MetalOptimizationLevel::SimdGroup,
421 OptimizationHint::Maximum => MetalOptimizationLevel::AppleSilicon,
422 OptimizationHint::MemoryOptimized => MetalOptimizationLevel::AppleSilicon,
423 }
424 }
425}
426
427impl Default for OptimizationHint {
428 fn default() -> Self {
429 Self::Balanced
430 }
431}
432
433#[derive(Debug, Clone)]
435pub struct BackendInfo {
436 pub name: String,
437 pub version: String,
438 pub device_count: usize,
439 pub supports_double_precision: bool,
440 pub max_memory_mb: usize,
441}
442
443pub mod convenience {
445 use super::*;
446
447 pub fn gpu_spmv<T>(matrix: &CsrArray<T>, vector: &ArrayView1<T>) -> SparseResult<Array1<T>>
449 where
450 T: Float + Debug + Copy + GpuDataType + std::iter::Sum,
451 {
452 let gpu_handler = GpuSpMatVec::new()?;
453 gpu_handler.spmv(matrix, vector, None)
454 }
455
456 pub fn gpu_spmv_optimized<T>(
458 matrix: &CsrArray<T>,
459 vector: &ArrayView1<T>,
460 optimization: OptimizationHint,
461 ) -> SparseResult<Array1<T>>
462 where
463 T: Float + Debug + Copy + GpuDataType + std::iter::Sum,
464 {
465 let gpu_handler = GpuSpMatVec::new()?;
466 gpu_handler.spmv_optimized(matrix, vector, None, optimization)
467 }
468
469 pub fn available_backends() -> Vec<GpuBackend> {
471 let mut backends = Vec::new();
472
473 if GpuSpMatVec::is_cuda_available() {
474 backends.push(GpuBackend::Cuda);
475 }
476
477 if GpuSpMatVec::is_opencl_available() {
478 backends.push(GpuBackend::OpenCL);
479 }
480
481 if GpuSpMatVec::is_metal_available() {
482 backends.push(GpuBackend::Metal);
483 }
484
485 backends.push(GpuBackend::Cpu); backends
488 }
489}
490
491#[cfg(test)]
492mod tests {
493 use super::*;
494 use scirs2_core::ndarray::Array1;
495
496 #[test]
497 fn test_gpu_spmv_creation() {
498 let gpu_spmv = GpuSpMatVec::new();
499 assert!(gpu_spmv.is_ok());
500 }
501
502 #[test]
503 fn test_backend_detection() {
504 let backend = GpuSpMatVec::detect_best_backend();
505
506 match backend {
508 GpuBackend::Cuda | GpuBackend::OpenCL | GpuBackend::Metal | GpuBackend::Cpu => (),
509 _ => panic!("Unexpected backend detected"),
510 }
511 }
512
513 #[test]
514 fn test_optimization_hint_conversions() {
515 let hint = OptimizationHint::Maximum;
516
517 let cuda_level = hint.to_cuda_level();
518 let opencl_level = hint.to_opencl_level();
519 let metal_level = hint.to_metal_level();
520
521 assert_eq!(cuda_level, CudaOptimizationLevel::WarpLevel);
522 assert_eq!(opencl_level, OpenCLOptimizationLevel::Vectorized);
523 assert_eq!(metal_level, MetalOptimizationLevel::AppleSilicon);
524 }
525
526 #[test]
527 fn test_backend_info() {
528 let gpu_spmv = GpuSpMatVec::new().unwrap();
529 let info = gpu_spmv.get_backend_info();
530
531 assert!(!info.name.is_empty());
532 assert!(!info.version.is_empty());
533 }
534
535 #[test]
536 fn test_convenience_functions() {
537 let backends = convenience::available_backends();
538 assert!(!backends.is_empty());
539 assert!(backends.contains(&GpuBackend::Cpu)); }
541
542 #[test]
543 fn test_is_gpu_available() {
544 let gpu_spmv = GpuSpMatVec::new().unwrap();
545
546 let _available = gpu_spmv.is_gpu_available();
548 }
549
550 #[test]
551 fn test_optimization_hint_default() {
552 assert_eq!(OptimizationHint::default(), OptimizationHint::Balanced);
553 }
554}