scirs2_ndimage/backend/
mod.rs

1//! Backend delegation system for GPU acceleration
2//!
3//! This module provides a unified interface for delegating operations
4//! to different computational backends (CPU, CUDA, OpenCL, etc.).
5//! It allows seamless switching between implementations based on
6//! hardware availability and performance characteristics.
7
8pub mod concrete_gpu_backends;
9pub mod device_detection;
10pub mod gpu_acceleration_framework;
11pub mod kernels;
12
13#[cfg(feature = "cuda")]
14pub mod cuda;
15
16pub use device_detection::{DeviceCapability, DeviceManager, MemoryManager, SystemCapabilities};
17pub use gpu_acceleration_framework::{
18    CompiledKernel, GpuAccelerationManager, GpuKernelCache, GpuMemoryPool, GpuPerformanceReport,
19    KernelPerformanceStats, MemoryPoolConfig, MemoryPoolStatistics,
20};
21pub use kernels::{GpuBuffer, GpuKernelExecutor, KernelInfo};
22
23#[cfg(feature = "cuda")]
24pub use concrete_gpu_backends::CudaContext;
25#[cfg(feature = "opencl")]
26pub use concrete_gpu_backends::OpenCLContext;
27// TODO: Implement MetalContext in concrete_gpu_backends.rs
28// #[cfg(all(target_os = "macos", feature = "metal"))]
29// pub use concrete_gpu_backends::MetalContext;
30
31use crate::error::{NdimageError, NdimageResult};
32use scirs2_core::ndarray::{Array, ArrayView, Dimension};
33use scirs2_core::numeric::{Float, FromPrimitive};
34use std::fmt::Debug;
35use std::sync::Arc;
36
37/// Available computation backends
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
39pub enum Backend {
40    /// CPU-based implementation (default)
41    Cpu,
42    /// NVIDIA CUDA GPU acceleration
43    #[cfg(feature = "cuda")]
44    Cuda,
45    /// OpenCL GPU acceleration
46    #[cfg(feature = "opencl")]
47    OpenCL,
48    /// Apple Metal GPU acceleration
49    #[cfg(all(target_os = "macos", feature = "metal"))]
50    Metal,
51    /// Automatic selection based on operation and data size
52    Auto,
53}
54
55impl Default for Backend {
56    fn default() -> Self {
57        Backend::Cpu
58    }
59}
60
61/// Configuration for backend selection and operation
62#[derive(Debug, Clone)]
63pub struct BackendConfig {
64    /// Preferred backend
65    pub backend: Backend,
66    /// Minimum array size for GPU acceleration (elements)
67    pub gpu_threshold: usize,
68    /// Maximum GPU memory to use (bytes)
69    pub gpu_memory_limit: Option<usize>,
70    /// Whether to allow fallback to CPU if GPU fails
71    pub allow_fallback: bool,
72    /// Device ID for multi-GPU systems
73    pub device_id: Option<usize>,
74}
75
76impl Default for BackendConfig {
77    fn default() -> Self {
78        Self {
79            backend: Backend::default(),
80            gpu_threshold: 100_000, // 100k elements minimum for GPU
81            gpu_memory_limit: None,
82            allow_fallback: true,
83            device_id: None,
84        }
85    }
86}
87
88/// Trait for operations that can be delegated to different backends
89pub trait BackendOp<T, D>: Send + Sync
90where
91    T: Float + FromPrimitive + Debug + Clone,
92    D: Dimension,
93{
94    /// Execute operation on CPU
95    fn execute_cpu(&self, input: &ArrayView<T, D>) -> NdimageResult<Array<T, D>>;
96
97    /// Execute operation on GPU (if available)
98    #[cfg(feature = "gpu")]
99    fn execute_gpu(&self, input: &ArrayView<T, D>, backend: Backend) -> NdimageResult<Array<T, D>>;
100
101    /// Get estimated memory requirements
102    fn memory_requirement(&self, input_shape: &[usize]) -> usize;
103
104    /// Check if this operation benefits from GPU acceleration
105    fn benefits_from_gpu(&self, array_size: usize) -> bool {
106        array_size > 50_000 // Default threshold
107    }
108}
109
110/// Backend executor that handles delegation
111pub struct BackendExecutor {
112    config: BackendConfig,
113    #[cfg(feature = "gpu")]
114    gpu_context: Option<Arc<dyn GpuContext>>,
115}
116
117impl BackendExecutor {
118    pub fn new(config: BackendConfig) -> NdimageResult<Self> {
119        #[cfg(feature = "gpu")]
120        let gpu_context: Option<Arc<dyn GpuContext>> = match config.backend {
121            #[cfg(feature = "cuda")]
122            Backend::Cuda => Some(Arc::new(CudaContext::new(config.device_id)?)),
123            #[cfg(feature = "opencl")]
124            Backend::OpenCL => Some(Arc::new(OpenCLContext::new(config.device_id)?)),
125            // TODO: Implement Metal backend
126            // #[cfg(all(target_os = "macos", feature = "metal"))]
127            // Backend::Metal => Some(Arc::new(MetalContext::new(_config.device_id)?),
128            _ => None,
129        };
130
131        Ok(Self {
132            config,
133            #[cfg(feature = "gpu")]
134            gpu_context,
135        })
136    }
137
138    /// Execute an operation with automatic backend selection
139    pub fn execute<T, D, Op>(&self, input: &ArrayView<T, D>, op: Op) -> NdimageResult<Array<T, D>>
140    where
141        T: Float + FromPrimitive + Debug + Clone + Send + Sync + 'static,
142        D: Dimension,
143        Op: BackendOp<T, D>,
144    {
145        let array_size = input.len();
146        let backend = self.select_backend(&op, array_size)?;
147
148        match backend {
149            Backend::Cpu => op.execute_cpu(input),
150            #[cfg(feature = "gpu")]
151            _ => match op.execute_gpu(input, backend) {
152                Ok(result) => Ok(result),
153                Err(e) if self.config.allow_fallback => {
154                    eprintln!("GPU execution failed, falling back to CPU: {}", e);
155                    op.execute_cpu(input)
156                }
157                Err(e) => Err(e),
158            },
159            #[cfg(not(feature = "gpu"))]
160            _ => op.execute_cpu(input),
161        }
162    }
163
164    /// Select the best backend for an operation
165    fn select_backend<T, D, Op>(&self, op: &Op, array_size: usize) -> NdimageResult<Backend>
166    where
167        T: Float + FromPrimitive + Debug + Clone,
168        D: Dimension,
169        Op: BackendOp<T, D>,
170    {
171        match self.config.backend {
172            Backend::Auto => {
173                // Automatic selection based on heuristics
174                if array_size < self.config.gpu_threshold {
175                    Ok(Backend::Cpu)
176                } else if op.benefits_from_gpu(array_size) {
177                    // Check available GPU backends
178                    #[cfg(feature = "cuda")]
179                    if self.is_cuda_available() {
180                        return Ok(Backend::Cuda);
181                    }
182                    #[cfg(feature = "opencl")]
183                    if self.is_opencl_available() {
184                        return Ok(Backend::OpenCL);
185                    }
186                    #[cfg(all(target_os = "macos", feature = "metal"))]
187                    if self.is_metal_available() {
188                        return Ok(Backend::Metal);
189                    }
190                    Ok(Backend::Cpu)
191                } else {
192                    Ok(Backend::Cpu)
193                }
194            }
195            backend => Ok(backend),
196        }
197    }
198
199    #[cfg(feature = "cuda")]
200    fn is_cuda_available(&self) -> bool {
201        device_detection::get_device_manager()
202            .map(|manager| manager.lock().unwrap().is_backend_available(Backend::Cuda))
203            .unwrap_or(false)
204    }
205
206    #[cfg(feature = "opencl")]
207    fn is_opencl_available(&self) -> bool {
208        device_detection::get_device_manager()
209            .map(|manager| {
210                manager
211                    .lock()
212                    .unwrap()
213                    .is_backend_available(Backend::OpenCL)
214            })
215            .unwrap_or(false)
216    }
217
218    #[cfg(all(target_os = "macos", feature = "metal"))]
219    fn is_metal_available(&self) -> bool {
220        device_detection::get_device_manager()
221            .map(|manager| manager.lock().unwrap().is_backend_available(Backend::Metal))
222            .unwrap_or(false)
223    }
224}
225
226/// GPU context trait for different GPU backends
227#[cfg(feature = "gpu")]
228pub trait GpuContext: Send + Sync {
229    fn name(&self) -> &str;
230    fn device_count(&self) -> usize;
231    fn current_device(&self) -> usize;
232    fn memory_info(&self) -> (usize, usize); // (used, total)
233}
234
235/// Example: Gaussian filter with backend support
236pub struct GaussianFilterOp<T> {
237    sigma: Vec<T>,
238    truncate: Option<T>,
239}
240
241impl<T: Float + FromPrimitive + Debug + Clone> GaussianFilterOp<T> {
242    pub fn new(sigma: Vec<T>, truncate: Option<T>) -> Self {
243        Self { sigma, truncate }
244    }
245}
246
247impl<T, D> BackendOp<T, D> for GaussianFilterOp<T>
248where
249    T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
250    D: Dimension + 'static,
251{
252    fn execute_cpu(&self, input: &ArrayView<T, D>) -> NdimageResult<Array<T, D>> {
253        // Call the existing CPU implementation
254        crate::filters::gaussian_filter_chunked(
255            &input.to_owned(),
256            &self.sigma,
257            self.truncate,
258            crate::filters::BorderMode::Reflect,
259            None,
260        )
261    }
262
263    #[cfg(feature = "gpu")]
264    fn execute_gpu(&self, input: &ArrayView<T, D>, backend: Backend) -> NdimageResult<Array<T, D>> {
265        match backend {
266            #[cfg(feature = "cuda")]
267            Backend::Cuda => {
268                // Would call CUDA implementation
269                cuda_gaussian_filter(input, &self.sigma, self.truncate)
270            }
271            _ => self.execute_cpu(input),
272        }
273    }
274
275    fn memory_requirement(&self, input_shape: &[usize]) -> usize {
276        let elements: usize = input_shape.iter().product();
277        // Input + output + temporary buffers
278        elements * std::mem::size_of::<T>() * 3
279    }
280
281    fn benefits_from_gpu(&self, array_size: usize) -> bool {
282        // Gaussian filter benefits from GPU for large arrays
283        array_size > 100_000
284    }
285}
286
287#[cfg(feature = "cuda")]
288#[allow(dead_code)]
289fn cuda_gaussian_filter<T, D>(
290    input: &ArrayView<T, D>,
291    sigma: &[T],
292    _truncate: Option<T>,
293) -> NdimageResult<Array<T, D>>
294where
295    T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
296    D: Dimension,
297{
298    // Currently only support 2D arrays for GPU acceleration
299    if input.ndim() == 2 {
300        let input_2d = input
301            .view()
302            .into_dimensionality::<scirs2_core::ndarray::Ix2>()
303            .map_err(|_| NdimageError::DimensionError("Failed to convert to 2D array".into()))?;
304
305        if sigma.len() >= 2 {
306            let sigma_2d = [sigma[0], sigma[1]];
307            let cuda_ops = cuda::CudaOperations::new(None)?;
308            let result_2d = cuda_ops.gaussian_filter_2d(&input_2d, sigma_2d)?;
309
310            // Convert back to original dimension
311            let result = result_2d.into_dimensionality::<D>().map_err(|_| {
312                NdimageError::DimensionError("Failed to convert result dimension".into())
313            })?;
314            return Ok(result);
315        }
316    }
317
318    // Fallback for non-2D or unsupported cases
319    Err(NdimageError::NotImplementedError(
320        "CUDA Gaussian filter currently only supports 2D arrays".into(),
321    ))
322}
323
324/// Builder for creating backend executors
325pub struct BackendBuilder {
326    config: BackendConfig,
327}
328
329impl BackendBuilder {
330    pub fn new() -> Self {
331        Self {
332            config: BackendConfig::default(),
333        }
334    }
335
336    pub fn backend(mut self, backend: Backend) -> Self {
337        self.config.backend = backend;
338        self
339    }
340
341    pub fn gpu_threshold(mut self, threshold: usize) -> Self {
342        self.config.gpu_threshold = threshold;
343        self
344    }
345
346    pub fn gpu_memory_limit(mut self, limit: usize) -> Self {
347        self.config.gpu_memory_limit = Some(limit);
348        self
349    }
350
351    pub fn allow_fallback(mut self, allow: bool) -> Self {
352        self.config.allow_fallback = allow;
353        self
354    }
355
356    pub fn device_id(mut self, id: usize) -> Self {
357        self.config.device_id = Some(id);
358        self
359    }
360
361    pub fn build(self) -> NdimageResult<BackendExecutor> {
362        BackendExecutor::new(self.config)
363    }
364}
365
366/// Convenience function to create an auto-selecting backend executor
367#[allow(dead_code)]
368pub fn auto_backend() -> NdimageResult<BackendExecutor> {
369    BackendBuilder::new().backend(Backend::Auto).build()
370}
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375    use scirs2_core::ndarray::arr2;
376
377    #[test]
378    fn test_backend_selection() {
379        let config = BackendConfig {
380            backend: Backend::Auto,
381            gpu_threshold: 1000,
382            ..Default::default()
383        };
384
385        let executor = BackendExecutor::new(config).unwrap();
386        let small_array = arr2(&[[1.0, 2.0], [3.0, 4.0]]);
387        let op = GaussianFilterOp::new(vec![1.0, 1.0], None);
388
389        // Small array should use CPU
390        let _result = executor.execute(&small_array.view(), op).unwrap();
391    }
392
393    #[test]
394    fn test_backend_builder() {
395        let executor = BackendBuilder::new()
396            .backend(Backend::Cpu)
397            .gpu_threshold(50_000)
398            .allow_fallback(true)
399            .build()
400            .unwrap();
401
402        assert_eq!(executor.config.backend, Backend::Cpu);
403        assert_eq!(executor.config.gpu_threshold, 50_000);
404        assert!(executor.config.allow_fallback);
405    }
406}