Skip to main content

scirs2_ndimage/backend/
mod.rs

1//! Backend delegation system for GPU acceleration
2//!
3//! This module provides a unified interface for delegating operations
4//! to different computational backends (CPU, CUDA, OpenCL, etc.).
5//! It allows seamless switching between implementations based on
6//! hardware availability and performance characteristics.
7
8pub mod concrete_gpu_backends;
9pub mod device_detection;
10pub mod gpu_acceleration_framework;
11pub mod kernels;
12
13#[cfg(feature = "cuda")]
14pub mod cuda;
15
16pub use device_detection::{DeviceCapability, DeviceManager, MemoryManager, SystemCapabilities};
17pub use gpu_acceleration_framework::{
18    CompiledKernel, GpuAccelerationManager, GpuKernelCache, GpuMemoryPool, GpuPerformanceReport,
19    KernelPerformanceStats, MemoryPoolConfig, MemoryPoolStatistics,
20};
21pub use kernels::{GpuBuffer, GpuKernelExecutor, KernelInfo};
22
23#[cfg(feature = "cuda")]
24pub use concrete_gpu_backends::CudaContext;
25#[cfg(feature = "opencl")]
26pub use concrete_gpu_backends::OpenCLContext;
27// TODO: Implement MetalContext in concrete_gpu_backends.rs
28// #[cfg(all(target_os = "macos", feature = "metal"))]
29// pub use concrete_gpu_backends::MetalContext;
30
31use crate::error::{NdimageError, NdimageResult};
32use scirs2_core::ndarray::{Array, ArrayView, Dimension};
33use scirs2_core::numeric::{Float, FromPrimitive};
34use std::fmt::Debug;
35use std::sync::Arc;
36
37/// Available computation backends
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
39pub enum Backend {
40    /// CPU-based implementation (default)
41    Cpu,
42    /// NVIDIA CUDA GPU acceleration
43    #[cfg(feature = "cuda")]
44    Cuda,
45    /// OpenCL GPU acceleration
46    #[cfg(feature = "opencl")]
47    OpenCL,
48    /// Apple Metal GPU acceleration
49    #[cfg(all(target_os = "macos", feature = "metal"))]
50    Metal,
51    /// Automatic selection based on operation and data size
52    Auto,
53}
54
55impl Default for Backend {
56    fn default() -> Self {
57        Backend::Cpu
58    }
59}
60
61/// Configuration for backend selection and operation
62#[derive(Debug, Clone)]
63pub struct BackendConfig {
64    /// Preferred backend
65    pub backend: Backend,
66    /// Minimum array size for GPU acceleration (elements)
67    pub gpu_threshold: usize,
68    /// Maximum GPU memory to use (bytes)
69    pub gpu_memory_limit: Option<usize>,
70    /// Whether to allow fallback to CPU if GPU fails
71    pub allow_fallback: bool,
72    /// Device ID for multi-GPU systems
73    pub device_id: Option<usize>,
74}
75
76impl Default for BackendConfig {
77    fn default() -> Self {
78        Self {
79            backend: Backend::default(),
80            gpu_threshold: 100_000, // 100k elements minimum for GPU
81            gpu_memory_limit: None,
82            allow_fallback: true,
83            device_id: None,
84        }
85    }
86}
87
88/// Trait for operations that can be delegated to different backends
89pub trait BackendOp<T, D>: Send + Sync
90where
91    T: Float + FromPrimitive + Debug + Clone,
92    D: Dimension,
93{
94    /// Execute operation on CPU
95    fn execute_cpu(&self, input: &ArrayView<T, D>) -> NdimageResult<Array<T, D>>;
96
97    /// Execute operation on GPU (if available)
98    #[cfg(feature = "gpu")]
99    fn execute_gpu(&self, input: &ArrayView<T, D>, backend: Backend) -> NdimageResult<Array<T, D>>;
100
101    /// Get estimated memory requirements
102    fn memory_requirement(&self, input_shape: &[usize]) -> usize;
103
104    /// Check if this operation benefits from GPU acceleration
105    fn benefits_from_gpu(&self, array_size: usize) -> bool {
106        array_size > 50_000 // Default threshold
107    }
108}
109
110/// Backend executor that handles delegation
111pub struct BackendExecutor {
112    config: BackendConfig,
113    #[cfg(feature = "gpu")]
114    gpu_context: Option<Arc<dyn GpuContext>>,
115}
116
117impl BackendExecutor {
118    pub fn new(config: BackendConfig) -> NdimageResult<Self> {
119        #[cfg(feature = "gpu")]
120        let gpu_context: Option<Arc<dyn GpuContext>> = match config.backend {
121            #[cfg(feature = "cuda")]
122            Backend::Cuda => Some(Arc::new(CudaContext::new(config.device_id)?)),
123            #[cfg(feature = "opencl")]
124            Backend::OpenCL => Some(Arc::new(OpenCLContext::new(config.device_id)?)),
125            // TODO: Implement Metal backend
126            // #[cfg(all(target_os = "macos", feature = "metal"))]
127            // Backend::Metal => Some(Arc::new(MetalContext::new(_config.device_id)?),
128            _ => None,
129        };
130
131        Ok(Self {
132            config,
133            #[cfg(feature = "gpu")]
134            gpu_context,
135        })
136    }
137
138    /// Execute an operation with automatic backend selection
139    pub fn execute<T, D, Op>(&self, input: &ArrayView<T, D>, op: Op) -> NdimageResult<Array<T, D>>
140    where
141        T: Float + FromPrimitive + Debug + Clone + Send + Sync + 'static,
142        D: Dimension,
143        Op: BackendOp<T, D>,
144    {
145        let array_size = input.len();
146        let backend = self.select_backend(&op, array_size)?;
147
148        match backend {
149            Backend::Cpu => op.execute_cpu(input),
150            #[cfg(feature = "gpu")]
151            _ => match op.execute_gpu(input, backend) {
152                Ok(result) => Ok(result),
153                Err(e) if self.config.allow_fallback => {
154                    eprintln!("GPU execution failed, falling back to CPU: {}", e);
155                    op.execute_cpu(input)
156                }
157                Err(e) => Err(e),
158            },
159            #[cfg(not(feature = "gpu"))]
160            _ => op.execute_cpu(input),
161        }
162    }
163
164    /// Select the best backend for an operation
165    fn select_backend<T, D, Op>(&self, op: &Op, array_size: usize) -> NdimageResult<Backend>
166    where
167        T: Float + FromPrimitive + Debug + Clone,
168        D: Dimension,
169        Op: BackendOp<T, D>,
170    {
171        match self.config.backend {
172            Backend::Auto => {
173                // Automatic selection based on heuristics
174                if array_size < self.config.gpu_threshold {
175                    Ok(Backend::Cpu)
176                } else if op.benefits_from_gpu(array_size) {
177                    // Check available GPU backends
178                    #[cfg(feature = "cuda")]
179                    if self.is_cuda_available() {
180                        return Ok(Backend::Cuda);
181                    }
182                    #[cfg(feature = "opencl")]
183                    if self.is_opencl_available() {
184                        return Ok(Backend::OpenCL);
185                    }
186                    #[cfg(all(target_os = "macos", feature = "metal"))]
187                    if self.is_metal_available() {
188                        return Ok(Backend::Metal);
189                    }
190                    Ok(Backend::Cpu)
191                } else {
192                    Ok(Backend::Cpu)
193                }
194            }
195            backend => Ok(backend),
196        }
197    }
198
199    #[cfg(feature = "cuda")]
200    fn is_cuda_available(&self) -> bool {
201        device_detection::get_device_manager()
202            .map(|manager| {
203                manager
204                    .lock()
205                    .expect("Operation failed")
206                    .is_backend_available(Backend::Cuda)
207            })
208            .unwrap_or(false)
209    }
210
211    #[cfg(feature = "opencl")]
212    fn is_opencl_available(&self) -> bool {
213        device_detection::get_device_manager()
214            .map(|manager| {
215                manager
216                    .lock()
217                    .expect("Operation failed")
218                    .is_backend_available(Backend::OpenCL)
219            })
220            .unwrap_or(false)
221    }
222
223    #[cfg(all(target_os = "macos", feature = "metal"))]
224    fn is_metal_available(&self) -> bool {
225        device_detection::get_device_manager()
226            .map(|manager| {
227                manager
228                    .lock()
229                    .expect("Operation failed")
230                    .is_backend_available(Backend::Metal)
231            })
232            .unwrap_or(false)
233    }
234}
235
236/// GPU context trait for different GPU backends
237#[cfg(feature = "gpu")]
238pub trait GpuContext: Send + Sync {
239    fn name(&self) -> &str;
240    fn device_count(&self) -> usize;
241    fn current_device(&self) -> usize;
242    fn memory_info(&self) -> (usize, usize); // (used, total)
243}
244
245/// Example: Gaussian filter with backend support
246pub struct GaussianFilterOp<T> {
247    sigma: Vec<T>,
248    truncate: Option<T>,
249}
250
251impl<T: Float + FromPrimitive + Debug + Clone> GaussianFilterOp<T> {
252    pub fn new(sigma: Vec<T>, truncate: Option<T>) -> Self {
253        Self { sigma, truncate }
254    }
255}
256
257impl<T, D> BackendOp<T, D> for GaussianFilterOp<T>
258where
259    T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
260    D: Dimension + 'static,
261{
262    fn execute_cpu(&self, input: &ArrayView<T, D>) -> NdimageResult<Array<T, D>> {
263        // Call the existing CPU implementation
264        crate::filters::gaussian_filter_chunked(
265            &input.to_owned(),
266            &self.sigma,
267            self.truncate,
268            crate::filters::BorderMode::Reflect,
269            None,
270        )
271    }
272
273    #[cfg(feature = "gpu")]
274    fn execute_gpu(&self, input: &ArrayView<T, D>, backend: Backend) -> NdimageResult<Array<T, D>> {
275        match backend {
276            #[cfg(feature = "cuda")]
277            Backend::Cuda => {
278                // Would call CUDA implementation
279                cuda_gaussian_filter(input, &self.sigma, self.truncate)
280            }
281            _ => self.execute_cpu(input),
282        }
283    }
284
285    fn memory_requirement(&self, input_shape: &[usize]) -> usize {
286        let elements: usize = input_shape.iter().product();
287        // Input + output + temporary buffers
288        elements * std::mem::size_of::<T>() * 3
289    }
290
291    fn benefits_from_gpu(&self, array_size: usize) -> bool {
292        // Gaussian filter benefits from GPU for large arrays
293        array_size > 100_000
294    }
295}
296
297#[cfg(feature = "cuda")]
298#[allow(dead_code)]
299fn cuda_gaussian_filter<T, D>(
300    input: &ArrayView<T, D>,
301    sigma: &[T],
302    _truncate: Option<T>,
303) -> NdimageResult<Array<T, D>>
304where
305    T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
306    D: Dimension,
307{
308    // Currently only support 2D arrays for GPU acceleration
309    if input.ndim() == 2 {
310        let input_2d = input
311            .view()
312            .into_dimensionality::<scirs2_core::ndarray::Ix2>()
313            .map_err(|_| NdimageError::DimensionError("Failed to convert to 2D array".into()))?;
314
315        if sigma.len() >= 2 {
316            let sigma_2d = [sigma[0], sigma[1]];
317            let cuda_ops = cuda::CudaOperations::new(None)?;
318            let result_2d = cuda_ops.gaussian_filter_2d(&input_2d, sigma_2d)?;
319
320            // Convert back to original dimension
321            let result = result_2d.into_dimensionality::<D>().map_err(|_| {
322                NdimageError::DimensionError("Failed to convert result dimension".into())
323            })?;
324            return Ok(result);
325        }
326    }
327
328    // Fallback for non-2D or unsupported cases
329    Err(NdimageError::NotImplementedError(
330        "CUDA Gaussian filter currently only supports 2D arrays".into(),
331    ))
332}
333
334/// Builder for creating backend executors
335pub struct BackendBuilder {
336    config: BackendConfig,
337}
338
339impl BackendBuilder {
340    pub fn new() -> Self {
341        Self {
342            config: BackendConfig::default(),
343        }
344    }
345
346    pub fn backend(mut self, backend: Backend) -> Self {
347        self.config.backend = backend;
348        self
349    }
350
351    pub fn gpu_threshold(mut self, threshold: usize) -> Self {
352        self.config.gpu_threshold = threshold;
353        self
354    }
355
356    pub fn gpu_memory_limit(mut self, limit: usize) -> Self {
357        self.config.gpu_memory_limit = Some(limit);
358        self
359    }
360
361    pub fn allow_fallback(mut self, allow: bool) -> Self {
362        self.config.allow_fallback = allow;
363        self
364    }
365
366    pub fn device_id(mut self, id: usize) -> Self {
367        self.config.device_id = Some(id);
368        self
369    }
370
371    pub fn build(self) -> NdimageResult<BackendExecutor> {
372        BackendExecutor::new(self.config)
373    }
374}
375
376/// Convenience function to create an auto-selecting backend executor
377#[allow(dead_code)]
378pub fn auto_backend() -> NdimageResult<BackendExecutor> {
379    BackendBuilder::new().backend(Backend::Auto).build()
380}
381
382#[cfg(test)]
383mod tests {
384    use super::*;
385    use scirs2_core::ndarray::arr2;
386
387    #[test]
388    fn test_backend_selection() {
389        let config = BackendConfig {
390            backend: Backend::Auto,
391            gpu_threshold: 1000,
392            ..Default::default()
393        };
394
395        let executor = BackendExecutor::new(config).expect("Operation failed");
396        let small_array = arr2(&[[1.0, 2.0], [3.0, 4.0]]);
397        let op = GaussianFilterOp::new(vec![1.0, 1.0], None);
398
399        // Small array should use CPU
400        let _result = executor
401            .execute(&small_array.view(), op)
402            .expect("Operation failed");
403    }
404
405    #[test]
406    fn test_backend_builder() {
407        let executor = BackendBuilder::new()
408            .backend(Backend::Cpu)
409            .gpu_threshold(50_000)
410            .allow_fallback(true)
411            .build()
412            .expect("Operation failed");
413
414        assert_eq!(executor.config.backend, Backend::Cpu);
415        assert_eq!(executor.config.gpu_threshold, 50_000);
416        assert!(executor.config.allow_fallback);
417    }
418}