1pub mod concrete_gpu_backends;
9pub mod device_detection;
10pub mod gpu_acceleration_framework;
11pub mod kernels;
12
13#[cfg(feature = "cuda")]
14pub mod cuda;
15
16pub use device_detection::{DeviceCapability, DeviceManager, MemoryManager, SystemCapabilities};
17pub use gpu_acceleration_framework::{
18 CompiledKernel, GpuAccelerationManager, GpuKernelCache, GpuMemoryPool, GpuPerformanceReport,
19 KernelPerformanceStats, MemoryPoolConfig, MemoryPoolStatistics,
20};
21pub use kernels::{GpuBuffer, GpuKernelExecutor, KernelInfo};
22
23#[cfg(feature = "cuda")]
24pub use concrete_gpu_backends::CudaContext;
25#[cfg(feature = "opencl")]
26pub use concrete_gpu_backends::OpenCLContext;
27use crate::error::{NdimageError, NdimageResult};
32use scirs2_core::ndarray::{Array, ArrayView, Dimension};
33use scirs2_core::numeric::{Float, FromPrimitive};
34use std::fmt::Debug;
35use std::sync::Arc;
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
39pub enum Backend {
40 Cpu,
42 #[cfg(feature = "cuda")]
44 Cuda,
45 #[cfg(feature = "opencl")]
47 OpenCL,
48 #[cfg(all(target_os = "macos", feature = "metal"))]
50 Metal,
51 Auto,
53}
54
55impl Default for Backend {
56 fn default() -> Self {
57 Backend::Cpu
58 }
59}
60
61#[derive(Debug, Clone)]
63pub struct BackendConfig {
64 pub backend: Backend,
66 pub gpu_threshold: usize,
68 pub gpu_memory_limit: Option<usize>,
70 pub allow_fallback: bool,
72 pub device_id: Option<usize>,
74}
75
76impl Default for BackendConfig {
77 fn default() -> Self {
78 Self {
79 backend: Backend::default(),
80 gpu_threshold: 100_000, gpu_memory_limit: None,
82 allow_fallback: true,
83 device_id: None,
84 }
85 }
86}
87
88pub trait BackendOp<T, D>: Send + Sync
90where
91 T: Float + FromPrimitive + Debug + Clone,
92 D: Dimension,
93{
94 fn execute_cpu(&self, input: &ArrayView<T, D>) -> NdimageResult<Array<T, D>>;
96
97 #[cfg(feature = "gpu")]
99 fn execute_gpu(&self, input: &ArrayView<T, D>, backend: Backend) -> NdimageResult<Array<T, D>>;
100
101 fn memory_requirement(&self, input_shape: &[usize]) -> usize;
103
104 fn benefits_from_gpu(&self, array_size: usize) -> bool {
106 array_size > 50_000 }
108}
109
110pub struct BackendExecutor {
112 config: BackendConfig,
113 #[cfg(feature = "gpu")]
114 gpu_context: Option<Arc<dyn GpuContext>>,
115}
116
117impl BackendExecutor {
118 pub fn new(config: BackendConfig) -> NdimageResult<Self> {
119 #[cfg(feature = "gpu")]
120 let gpu_context: Option<Arc<dyn GpuContext>> = match config.backend {
121 #[cfg(feature = "cuda")]
122 Backend::Cuda => Some(Arc::new(CudaContext::new(config.device_id)?)),
123 #[cfg(feature = "opencl")]
124 Backend::OpenCL => Some(Arc::new(OpenCLContext::new(config.device_id)?)),
125 _ => None,
129 };
130
131 Ok(Self {
132 config,
133 #[cfg(feature = "gpu")]
134 gpu_context,
135 })
136 }
137
138 pub fn execute<T, D, Op>(&self, input: &ArrayView<T, D>, op: Op) -> NdimageResult<Array<T, D>>
140 where
141 T: Float + FromPrimitive + Debug + Clone + Send + Sync + 'static,
142 D: Dimension,
143 Op: BackendOp<T, D>,
144 {
145 let array_size = input.len();
146 let backend = self.select_backend(&op, array_size)?;
147
148 match backend {
149 Backend::Cpu => op.execute_cpu(input),
150 #[cfg(feature = "gpu")]
151 _ => match op.execute_gpu(input, backend) {
152 Ok(result) => Ok(result),
153 Err(e) if self.config.allow_fallback => {
154 eprintln!("GPU execution failed, falling back to CPU: {}", e);
155 op.execute_cpu(input)
156 }
157 Err(e) => Err(e),
158 },
159 #[cfg(not(feature = "gpu"))]
160 _ => op.execute_cpu(input),
161 }
162 }
163
164 fn select_backend<T, D, Op>(&self, op: &Op, array_size: usize) -> NdimageResult<Backend>
166 where
167 T: Float + FromPrimitive + Debug + Clone,
168 D: Dimension,
169 Op: BackendOp<T, D>,
170 {
171 match self.config.backend {
172 Backend::Auto => {
173 if array_size < self.config.gpu_threshold {
175 Ok(Backend::Cpu)
176 } else if op.benefits_from_gpu(array_size) {
177 #[cfg(feature = "cuda")]
179 if self.is_cuda_available() {
180 return Ok(Backend::Cuda);
181 }
182 #[cfg(feature = "opencl")]
183 if self.is_opencl_available() {
184 return Ok(Backend::OpenCL);
185 }
186 #[cfg(all(target_os = "macos", feature = "metal"))]
187 if self.is_metal_available() {
188 return Ok(Backend::Metal);
189 }
190 Ok(Backend::Cpu)
191 } else {
192 Ok(Backend::Cpu)
193 }
194 }
195 backend => Ok(backend),
196 }
197 }
198
199 #[cfg(feature = "cuda")]
200 fn is_cuda_available(&self) -> bool {
201 device_detection::get_device_manager()
202 .map(|manager| manager.lock().unwrap().is_backend_available(Backend::Cuda))
203 .unwrap_or(false)
204 }
205
206 #[cfg(feature = "opencl")]
207 fn is_opencl_available(&self) -> bool {
208 device_detection::get_device_manager()
209 .map(|manager| {
210 manager
211 .lock()
212 .unwrap()
213 .is_backend_available(Backend::OpenCL)
214 })
215 .unwrap_or(false)
216 }
217
218 #[cfg(all(target_os = "macos", feature = "metal"))]
219 fn is_metal_available(&self) -> bool {
220 device_detection::get_device_manager()
221 .map(|manager| manager.lock().unwrap().is_backend_available(Backend::Metal))
222 .unwrap_or(false)
223 }
224}
225
226#[cfg(feature = "gpu")]
228pub trait GpuContext: Send + Sync {
229 fn name(&self) -> &str;
230 fn device_count(&self) -> usize;
231 fn current_device(&self) -> usize;
232 fn memory_info(&self) -> (usize, usize); }
234
235pub struct GaussianFilterOp<T> {
237 sigma: Vec<T>,
238 truncate: Option<T>,
239}
240
241impl<T: Float + FromPrimitive + Debug + Clone> GaussianFilterOp<T> {
242 pub fn new(sigma: Vec<T>, truncate: Option<T>) -> Self {
243 Self { sigma, truncate }
244 }
245}
246
247impl<T, D> BackendOp<T, D> for GaussianFilterOp<T>
248where
249 T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
250 D: Dimension + 'static,
251{
252 fn execute_cpu(&self, input: &ArrayView<T, D>) -> NdimageResult<Array<T, D>> {
253 crate::filters::gaussian_filter_chunked(
255 &input.to_owned(),
256 &self.sigma,
257 self.truncate,
258 crate::filters::BorderMode::Reflect,
259 None,
260 )
261 }
262
263 #[cfg(feature = "gpu")]
264 fn execute_gpu(&self, input: &ArrayView<T, D>, backend: Backend) -> NdimageResult<Array<T, D>> {
265 match backend {
266 #[cfg(feature = "cuda")]
267 Backend::Cuda => {
268 cuda_gaussian_filter(input, &self.sigma, self.truncate)
270 }
271 _ => self.execute_cpu(input),
272 }
273 }
274
275 fn memory_requirement(&self, input_shape: &[usize]) -> usize {
276 let elements: usize = input_shape.iter().product();
277 elements * std::mem::size_of::<T>() * 3
279 }
280
281 fn benefits_from_gpu(&self, array_size: usize) -> bool {
282 array_size > 100_000
284 }
285}
286
287#[cfg(feature = "cuda")]
288#[allow(dead_code)]
289fn cuda_gaussian_filter<T, D>(
290 input: &ArrayView<T, D>,
291 sigma: &[T],
292 _truncate: Option<T>,
293) -> NdimageResult<Array<T, D>>
294where
295 T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
296 D: Dimension,
297{
298 if input.ndim() == 2 {
300 let input_2d = input
301 .view()
302 .into_dimensionality::<scirs2_core::ndarray::Ix2>()
303 .map_err(|_| NdimageError::DimensionError("Failed to convert to 2D array".into()))?;
304
305 if sigma.len() >= 2 {
306 let sigma_2d = [sigma[0], sigma[1]];
307 let cuda_ops = cuda::CudaOperations::new(None)?;
308 let result_2d = cuda_ops.gaussian_filter_2d(&input_2d, sigma_2d)?;
309
310 let result = result_2d.into_dimensionality::<D>().map_err(|_| {
312 NdimageError::DimensionError("Failed to convert result dimension".into())
313 })?;
314 return Ok(result);
315 }
316 }
317
318 Err(NdimageError::NotImplementedError(
320 "CUDA Gaussian filter currently only supports 2D arrays".into(),
321 ))
322}
323
324pub struct BackendBuilder {
326 config: BackendConfig,
327}
328
329impl BackendBuilder {
330 pub fn new() -> Self {
331 Self {
332 config: BackendConfig::default(),
333 }
334 }
335
336 pub fn backend(mut self, backend: Backend) -> Self {
337 self.config.backend = backend;
338 self
339 }
340
341 pub fn gpu_threshold(mut self, threshold: usize) -> Self {
342 self.config.gpu_threshold = threshold;
343 self
344 }
345
346 pub fn gpu_memory_limit(mut self, limit: usize) -> Self {
347 self.config.gpu_memory_limit = Some(limit);
348 self
349 }
350
351 pub fn allow_fallback(mut self, allow: bool) -> Self {
352 self.config.allow_fallback = allow;
353 self
354 }
355
356 pub fn device_id(mut self, id: usize) -> Self {
357 self.config.device_id = Some(id);
358 self
359 }
360
361 pub fn build(self) -> NdimageResult<BackendExecutor> {
362 BackendExecutor::new(self.config)
363 }
364}
365
366#[allow(dead_code)]
368pub fn auto_backend() -> NdimageResult<BackendExecutor> {
369 BackendBuilder::new().backend(Backend::Auto).build()
370}
371
372#[cfg(test)]
373mod tests {
374 use super::*;
375 use scirs2_core::ndarray::arr2;
376
377 #[test]
378 fn test_backend_selection() {
379 let config = BackendConfig {
380 backend: Backend::Auto,
381 gpu_threshold: 1000,
382 ..Default::default()
383 };
384
385 let executor = BackendExecutor::new(config).unwrap();
386 let small_array = arr2(&[[1.0, 2.0], [3.0, 4.0]]);
387 let op = GaussianFilterOp::new(vec![1.0, 1.0], None);
388
389 let _result = executor.execute(&small_array.view(), op).unwrap();
391 }
392
393 #[test]
394 fn test_backend_builder() {
395 let executor = BackendBuilder::new()
396 .backend(Backend::Cpu)
397 .gpu_threshold(50_000)
398 .allow_fallback(true)
399 .build()
400 .unwrap();
401
402 assert_eq!(executor.config.backend, Backend::Cpu);
403 assert_eq!(executor.config.gpu_threshold, 50_000);
404 assert!(executor.config.allow_fallback);
405 }
406}