1pub mod cuda;
7pub mod metal;
8pub mod opencl;
9
10#[cfg(feature = "gpu")]
12pub use scirs2_core::gpu::{
13 GpuBackend, GpuBuffer, GpuContext, GpuDataType, GpuDevice, GpuError, GpuKernelHandle,
14};
15
16#[cfg(not(feature = "gpu"))]
18pub use crate::gpu_ops::{
19 GpuBackend, GpuBuffer, GpuDataType, GpuDevice, GpuError, GpuKernelHandle,
20};
21
22pub use cuda::{CudaMemoryManager, CudaOptimizationLevel, CudaSpMatVec};
24pub use metal::{MetalDeviceInfo, MetalMemoryManager, MetalOptimizationLevel, MetalSpMatVec};
25pub use opencl::{
26 OpenCLMemoryManager, OpenCLOptimizationLevel, OpenCLPlatformInfo, OpenCLSpMatVec,
27};
28
29use crate::csr_array::CsrArray;
30use crate::error::{SparseError, SparseResult};
31use crate::sparray::SparseArray;
32use scirs2_core::ndarray::{Array1, ArrayView1};
33use scirs2_core::numeric::{Float, SparseElement};
34use std::fmt::Debug;
35
36pub struct GpuSpMatVec {
40 backend: GpuBackend,
41 cuda_handler: Option<CudaSpMatVec>,
42 opencl_handler: Option<OpenCLSpMatVec>,
43 metal_handler: Option<MetalSpMatVec>,
44}
45
46impl GpuSpMatVec {
47 pub fn new() -> SparseResult<Self> {
49 let backend = Self::detect_best_backend();
50
51 let mut handler = Self {
52 backend,
53 cuda_handler: None,
54 opencl_handler: None,
55 metal_handler: None,
56 };
57
58 handler.initialize_backend()?;
60
61 Ok(handler)
62 }
63
64 pub fn with_backend(backend: GpuBackend) -> SparseResult<Self> {
66 let mut handler = Self {
67 backend,
68 cuda_handler: None,
69 opencl_handler: None,
70 metal_handler: None,
71 };
72
73 handler.initialize_backend()?;
74
75 Ok(handler)
76 }
77
78 fn initialize_backend(&mut self) -> SparseResult<()> {
80 match self.backend {
81 GpuBackend::Cuda => {
82 self.cuda_handler = Some(CudaSpMatVec::new()?);
83 }
84 GpuBackend::OpenCL => {
85 self.opencl_handler = Some(OpenCLSpMatVec::new()?);
86 }
87 GpuBackend::Metal => {
88 self.metal_handler = Some(MetalSpMatVec::new()?);
89 }
90 GpuBackend::Cpu => {
91 }
93 _ => {
94 self.backend = GpuBackend::Cpu;
96 }
97 }
98
99 Ok(())
100 }
101
102 fn detect_best_backend() -> GpuBackend {
104 #[cfg(target_os = "macos")]
106 {
107 if Self::is_metal_available() {
108 return GpuBackend::Metal;
109 }
110 }
111
112 if Self::is_cuda_available() {
113 return GpuBackend::Cuda;
114 }
115
116 if Self::is_opencl_available() {
117 return GpuBackend::OpenCL;
118 }
119
120 GpuBackend::Cpu
121 }
122
123 fn is_cuda_available() -> bool {
125 #[cfg(feature = "gpu")]
127 {
128 std::env::var("CUDA_PATH").is_ok() || std::path::Path::new("/usr/local/cuda").exists()
130 }
131 #[cfg(not(feature = "gpu"))]
132 false
133 }
134
135 fn is_opencl_available() -> bool {
137 #[cfg(feature = "gpu")]
139 {
140 true
142 }
143 #[cfg(not(feature = "gpu"))]
144 false
145 }
146
147 fn is_metal_available() -> bool {
149 #[cfg(target_os = "macos")]
150 {
151 true
153 }
154 #[cfg(not(target_os = "macos"))]
155 false
156 }
157
158 pub fn spmv<T>(
160 &self,
161 matrix: &CsrArray<T>,
162 vector: &ArrayView1<T>,
163 device: Option<&GpuDevice>,
164 ) -> SparseResult<Array1<T>>
165 where
166 T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
167 {
168 match self.backend {
169 GpuBackend::Cuda => {
170 if let Some(ref handler) = self.cuda_handler {
171 #[cfg(feature = "gpu")]
172 {
173 if let Some(device) = device {
174 handler.execute_spmv(matrix, vector, device)
175 } else {
176 return Err(SparseError::ComputationError(
177 "GPU device required for CUDA operations".to_string(),
178 ));
179 }
180 }
181 #[cfg(not(feature = "gpu"))]
182 handler.execute_spmv_cpu(matrix, vector)
183 } else {
184 Err(SparseError::ComputationError(
185 "CUDA handler not initialized".to_string(),
186 ))
187 }
188 }
189 GpuBackend::OpenCL => {
190 if let Some(ref handler) = self.opencl_handler {
191 #[cfg(feature = "gpu")]
192 {
193 if let Some(device) = device {
194 handler.execute_spmv(matrix, vector, device)
195 } else {
196 return Err(SparseError::ComputationError(
197 "GPU device required for OpenCL operations".to_string(),
198 ));
199 }
200 }
201 #[cfg(not(feature = "gpu"))]
202 handler.execute_spmv_cpu(matrix, vector)
203 } else {
204 Err(SparseError::ComputationError(
205 "OpenCL handler not initialized".to_string(),
206 ))
207 }
208 }
209 GpuBackend::Metal => {
210 if let Some(ref handler) = self.metal_handler {
211 #[cfg(feature = "gpu")]
212 {
213 if let Some(device) = device {
214 handler.execute_spmv(matrix, vector, device)
215 } else {
216 return Err(SparseError::ComputationError(
217 "GPU device required for Metal operations".to_string(),
218 ));
219 }
220 }
221 #[cfg(not(feature = "gpu"))]
222 handler.execute_spmv_cpu(matrix, vector)
223 } else {
224 Err(SparseError::ComputationError(
225 "Metal handler not initialized".to_string(),
226 ))
227 }
228 }
229 GpuBackend::Cpu => {
230 matrix.dot_vector(vector)
232 }
233 _ => {
234 matrix.dot_vector(vector)
236 }
237 }
238 }
239
240 pub fn spmv_optimized<T>(
242 &self,
243 matrix: &CsrArray<T>,
244 vector: &ArrayView1<T>,
245 device: Option<&GpuDevice>,
246 optimization_hint: OptimizationHint,
247 ) -> SparseResult<Array1<T>>
248 where
249 T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
250 {
251 match self.backend {
252 GpuBackend::Cuda => {
253 if let Some(ref handler) = self.cuda_handler {
254 let cuda_level = optimization_hint.to_cuda_level();
255 #[cfg(feature = "gpu")]
256 {
257 if let Some(device) = device {
258 handler.execute_optimized_spmv(matrix, vector, device, cuda_level)
259 } else {
260 return Err(SparseError::ComputationError(
261 "GPU device required for CUDA operations".to_string(),
262 ));
263 }
264 }
265 #[cfg(not(feature = "gpu"))]
266 handler.execute_spmv_cpu(matrix, vector)
267 } else {
268 Err(SparseError::ComputationError(
269 "CUDA handler not initialized".to_string(),
270 ))
271 }
272 }
273 GpuBackend::OpenCL => {
274 if let Some(ref handler) = self.opencl_handler {
275 let opencl_level = optimization_hint.to_opencl_level();
276 #[cfg(feature = "gpu")]
277 {
278 if let Some(device) = device {
279 handler.execute_optimized_spmv(matrix, vector, device, opencl_level)
280 } else {
281 return Err(SparseError::ComputationError(
282 "GPU device required for OpenCL operations".to_string(),
283 ));
284 }
285 }
286 #[cfg(not(feature = "gpu"))]
287 handler.execute_spmv_cpu(matrix, vector)
288 } else {
289 Err(SparseError::ComputationError(
290 "OpenCL handler not initialized".to_string(),
291 ))
292 }
293 }
294 GpuBackend::Metal => {
295 if let Some(ref handler) = self.metal_handler {
296 let metal_level = optimization_hint.to_metal_level();
297 #[cfg(feature = "gpu")]
298 {
299 if let Some(device) = device {
300 handler.execute_optimized_spmv(matrix, vector, device, metal_level)
301 } else {
302 return Err(SparseError::ComputationError(
303 "GPU device required for Metal operations".to_string(),
304 ));
305 }
306 }
307 #[cfg(not(feature = "gpu"))]
308 handler.execute_spmv_cpu(matrix, vector)
309 } else {
310 Err(SparseError::ComputationError(
311 "Metal handler not initialized".to_string(),
312 ))
313 }
314 }
315 _ => {
316 self.spmv(matrix, vector, device)
318 }
319 }
320 }
321
322 pub fn backend(&self) -> GpuBackend {
324 self.backend
325 }
326
327 pub fn is_gpu_available(&self) -> bool {
329 !matches!(self.backend, GpuBackend::Cpu)
330 }
331
332 pub fn get_backend_info(&self) -> BackendInfo {
334 match self.backend {
335 GpuBackend::Cuda => BackendInfo {
336 name: "CUDA".to_string(),
337 version: "Unknown".to_string(),
338 device_count: 1, supports_double_precision: true,
340 max_memory_mb: 8192, },
342 GpuBackend::OpenCL => BackendInfo {
343 name: "OpenCL".to_string(),
344 version: "Unknown".to_string(),
345 device_count: 1,
346 supports_double_precision: true,
347 max_memory_mb: 4096, },
349 GpuBackend::Metal => BackendInfo {
350 name: "Metal".to_string(),
351 version: "Unknown".to_string(),
352 device_count: 1,
353 supports_double_precision: false, max_memory_mb: if MetalDeviceInfo::detect().is_apple_silicon {
355 16384
356 } else {
357 8192
358 },
359 },
360 _ => BackendInfo {
361 name: "CPU".to_string(),
362 version: "Fallback".to_string(),
363 device_count: 0,
364 supports_double_precision: true,
365 max_memory_mb: 0,
366 },
367 }
368 }
369}
370
371impl Default for GpuSpMatVec {
372 fn default() -> Self {
373 Self::new().unwrap_or_else(|_| Self {
374 backend: GpuBackend::Cpu,
375 cuda_handler: None,
376 opencl_handler: None,
377 metal_handler: None,
378 })
379 }
380}
381
382#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
384pub enum OptimizationHint {
385 Basic,
387 #[default]
389 Balanced,
390 Maximum,
392 MemoryOptimized,
394}
395
396impl OptimizationHint {
397 pub fn to_cuda_level(self) -> CudaOptimizationLevel {
399 match self {
400 OptimizationHint::Basic => CudaOptimizationLevel::Basic,
401 OptimizationHint::Balanced => CudaOptimizationLevel::Vectorized,
402 OptimizationHint::Maximum => CudaOptimizationLevel::WarpLevel,
403 OptimizationHint::MemoryOptimized => CudaOptimizationLevel::Basic,
404 }
405 }
406
407 pub fn to_opencl_level(self) -> OpenCLOptimizationLevel {
409 match self {
410 OptimizationHint::Basic => OpenCLOptimizationLevel::Basic,
411 OptimizationHint::Balanced => OpenCLOptimizationLevel::Workgroup,
412 OptimizationHint::Maximum => OpenCLOptimizationLevel::Vectorized,
413 OptimizationHint::MemoryOptimized => OpenCLOptimizationLevel::Workgroup,
414 }
415 }
416
417 pub fn to_metal_level(self) -> MetalOptimizationLevel {
419 match self {
420 OptimizationHint::Basic => MetalOptimizationLevel::Basic,
421 OptimizationHint::Balanced => MetalOptimizationLevel::SimdGroup,
422 OptimizationHint::Maximum => MetalOptimizationLevel::AppleSilicon,
423 OptimizationHint::MemoryOptimized => MetalOptimizationLevel::AppleSilicon,
424 }
425 }
426}
427
428#[derive(Debug, Clone)]
430pub struct BackendInfo {
431 pub name: String,
432 pub version: String,
433 pub device_count: usize,
434 pub supports_double_precision: bool,
435 pub max_memory_mb: usize,
436}
437
438pub mod convenience {
440 use super::*;
441
442 pub fn gpu_spmv<T>(matrix: &CsrArray<T>, vector: &ArrayView1<T>) -> SparseResult<Array1<T>>
444 where
445 T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
446 {
447 let gpu_handler = GpuSpMatVec::new()?;
448 gpu_handler.spmv(matrix, vector, None)
449 }
450
451 pub fn gpu_spmv_optimized<T>(
453 matrix: &CsrArray<T>,
454 vector: &ArrayView1<T>,
455 optimization: OptimizationHint,
456 ) -> SparseResult<Array1<T>>
457 where
458 T: Float + SparseElement + Debug + Copy + GpuDataType + std::iter::Sum,
459 {
460 let gpu_handler = GpuSpMatVec::new()?;
461 gpu_handler.spmv_optimized(matrix, vector, None, optimization)
462 }
463
464 pub fn available_backends() -> Vec<GpuBackend> {
466 let mut backends = Vec::new();
467
468 if GpuSpMatVec::is_cuda_available() {
469 backends.push(GpuBackend::Cuda);
470 }
471
472 if GpuSpMatVec::is_opencl_available() {
473 backends.push(GpuBackend::OpenCL);
474 }
475
476 if GpuSpMatVec::is_metal_available() {
477 backends.push(GpuBackend::Metal);
478 }
479
480 backends.push(GpuBackend::Cpu); backends
483 }
484}
485
486#[cfg(test)]
487mod tests {
488 use super::*;
489 use scirs2_core::ndarray::Array1;
490
491 #[test]
492 fn test_gpu_spmv_creation() {
493 let gpu_spmv = GpuSpMatVec::new();
494 assert!(gpu_spmv.is_ok());
495 }
496
497 #[test]
498 fn test_backend_detection() {
499 let backend = GpuSpMatVec::detect_best_backend();
500
501 match backend {
503 GpuBackend::Cuda | GpuBackend::OpenCL | GpuBackend::Metal | GpuBackend::Cpu => (),
504 _ => panic!("Unexpected backend detected"),
505 }
506 }
507
508 #[test]
509 fn test_optimization_hint_conversions() {
510 let hint = OptimizationHint::Maximum;
511
512 let cuda_level = hint.to_cuda_level();
513 let opencl_level = hint.to_opencl_level();
514 let metal_level = hint.to_metal_level();
515
516 assert_eq!(cuda_level, CudaOptimizationLevel::WarpLevel);
517 assert_eq!(opencl_level, OpenCLOptimizationLevel::Vectorized);
518 assert_eq!(metal_level, MetalOptimizationLevel::AppleSilicon);
519 }
520
521 #[test]
522 fn test_backend_info() {
523 let gpu_spmv = GpuSpMatVec::new().expect("Operation failed");
524 let info = gpu_spmv.get_backend_info();
525
526 assert!(!info.name.is_empty());
527 assert!(!info.version.is_empty());
528 }
529
530 #[test]
531 fn test_convenience_functions() {
532 let backends = convenience::available_backends();
533 assert!(!backends.is_empty());
534 assert!(backends.contains(&GpuBackend::Cpu)); }
536
537 #[test]
538 fn test_is_gpu_available() {
539 let gpu_spmv = GpuSpMatVec::new().expect("Operation failed");
540
541 let _available = gpu_spmv.is_gpu_available();
543 }
544
545 #[test]
546 fn test_optimization_hint_default() {
547 assert_eq!(OptimizationHint::default(), OptimizationHint::Balanced);
548 }
549}