1pub mod concrete_gpu_backends;
9pub mod device_detection;
10pub mod gpu_acceleration_framework;
11pub mod kernels;
12
13#[cfg(feature = "cuda")]
14pub mod cuda;
15
16pub use device_detection::{DeviceCapability, DeviceManager, MemoryManager, SystemCapabilities};
17pub use gpu_acceleration_framework::{
18 CompiledKernel, GpuAccelerationManager, GpuKernelCache, GpuMemoryPool, GpuPerformanceReport,
19 KernelPerformanceStats, MemoryPoolConfig, MemoryPoolStatistics,
20};
21pub use kernels::{GpuBuffer, GpuKernelExecutor, KernelInfo};
22
23#[cfg(feature = "cuda")]
24pub use concrete_gpu_backends::CudaContext;
25#[cfg(feature = "opencl")]
26pub use concrete_gpu_backends::OpenCLContext;
27use crate::error::{NdimageError, NdimageResult};
32use scirs2_core::ndarray::{Array, ArrayView, Dimension};
33use scirs2_core::numeric::{Float, FromPrimitive};
34use std::fmt::Debug;
35use std::sync::Arc;
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
39pub enum Backend {
40 Cpu,
42 #[cfg(feature = "cuda")]
44 Cuda,
45 #[cfg(feature = "opencl")]
47 OpenCL,
48 #[cfg(all(target_os = "macos", feature = "metal"))]
50 Metal,
51 Auto,
53}
54
55impl Default for Backend {
56 fn default() -> Self {
57 Backend::Cpu
58 }
59}
60
61#[derive(Debug, Clone)]
63pub struct BackendConfig {
64 pub backend: Backend,
66 pub gpu_threshold: usize,
68 pub gpu_memory_limit: Option<usize>,
70 pub allow_fallback: bool,
72 pub device_id: Option<usize>,
74}
75
76impl Default for BackendConfig {
77 fn default() -> Self {
78 Self {
79 backend: Backend::default(),
80 gpu_threshold: 100_000, gpu_memory_limit: None,
82 allow_fallback: true,
83 device_id: None,
84 }
85 }
86}
87
88pub trait BackendOp<T, D>: Send + Sync
90where
91 T: Float + FromPrimitive + Debug + Clone,
92 D: Dimension,
93{
94 fn execute_cpu(&self, input: &ArrayView<T, D>) -> NdimageResult<Array<T, D>>;
96
97 #[cfg(feature = "gpu")]
99 fn execute_gpu(&self, input: &ArrayView<T, D>, backend: Backend) -> NdimageResult<Array<T, D>>;
100
101 fn memory_requirement(&self, input_shape: &[usize]) -> usize;
103
104 fn benefits_from_gpu(&self, array_size: usize) -> bool {
106 array_size > 50_000 }
108}
109
110pub struct BackendExecutor {
112 config: BackendConfig,
113 #[cfg(feature = "gpu")]
114 gpu_context: Option<Arc<dyn GpuContext>>,
115}
116
117impl BackendExecutor {
118 pub fn new(config: BackendConfig) -> NdimageResult<Self> {
119 #[cfg(feature = "gpu")]
120 let gpu_context: Option<Arc<dyn GpuContext>> = match config.backend {
121 #[cfg(feature = "cuda")]
122 Backend::Cuda => Some(Arc::new(CudaContext::new(config.device_id)?)),
123 #[cfg(feature = "opencl")]
124 Backend::OpenCL => Some(Arc::new(OpenCLContext::new(config.device_id)?)),
125 _ => None,
129 };
130
131 Ok(Self {
132 config,
133 #[cfg(feature = "gpu")]
134 gpu_context,
135 })
136 }
137
138 pub fn execute<T, D, Op>(&self, input: &ArrayView<T, D>, op: Op) -> NdimageResult<Array<T, D>>
140 where
141 T: Float + FromPrimitive + Debug + Clone + Send + Sync + 'static,
142 D: Dimension,
143 Op: BackendOp<T, D>,
144 {
145 let array_size = input.len();
146 let backend = self.select_backend(&op, array_size)?;
147
148 match backend {
149 Backend::Cpu => op.execute_cpu(input),
150 #[cfg(feature = "gpu")]
151 _ => match op.execute_gpu(input, backend) {
152 Ok(result) => Ok(result),
153 Err(e) if self.config.allow_fallback => {
154 eprintln!("GPU execution failed, falling back to CPU: {}", e);
155 op.execute_cpu(input)
156 }
157 Err(e) => Err(e),
158 },
159 #[cfg(not(feature = "gpu"))]
160 _ => op.execute_cpu(input),
161 }
162 }
163
164 fn select_backend<T, D, Op>(&self, op: &Op, array_size: usize) -> NdimageResult<Backend>
166 where
167 T: Float + FromPrimitive + Debug + Clone,
168 D: Dimension,
169 Op: BackendOp<T, D>,
170 {
171 match self.config.backend {
172 Backend::Auto => {
173 if array_size < self.config.gpu_threshold {
175 Ok(Backend::Cpu)
176 } else if op.benefits_from_gpu(array_size) {
177 #[cfg(feature = "cuda")]
179 if self.is_cuda_available() {
180 return Ok(Backend::Cuda);
181 }
182 #[cfg(feature = "opencl")]
183 if self.is_opencl_available() {
184 return Ok(Backend::OpenCL);
185 }
186 #[cfg(all(target_os = "macos", feature = "metal"))]
187 if self.is_metal_available() {
188 return Ok(Backend::Metal);
189 }
190 Ok(Backend::Cpu)
191 } else {
192 Ok(Backend::Cpu)
193 }
194 }
195 backend => Ok(backend),
196 }
197 }
198
199 #[cfg(feature = "cuda")]
200 fn is_cuda_available(&self) -> bool {
201 device_detection::get_device_manager()
202 .map(|manager| {
203 manager
204 .lock()
205 .expect("Operation failed")
206 .is_backend_available(Backend::Cuda)
207 })
208 .unwrap_or(false)
209 }
210
211 #[cfg(feature = "opencl")]
212 fn is_opencl_available(&self) -> bool {
213 device_detection::get_device_manager()
214 .map(|manager| {
215 manager
216 .lock()
217 .expect("Operation failed")
218 .is_backend_available(Backend::OpenCL)
219 })
220 .unwrap_or(false)
221 }
222
223 #[cfg(all(target_os = "macos", feature = "metal"))]
224 fn is_metal_available(&self) -> bool {
225 device_detection::get_device_manager()
226 .map(|manager| {
227 manager
228 .lock()
229 .expect("Operation failed")
230 .is_backend_available(Backend::Metal)
231 })
232 .unwrap_or(false)
233 }
234}
235
236#[cfg(feature = "gpu")]
238pub trait GpuContext: Send + Sync {
239 fn name(&self) -> &str;
240 fn device_count(&self) -> usize;
241 fn current_device(&self) -> usize;
242 fn memory_info(&self) -> (usize, usize); }
244
245pub struct GaussianFilterOp<T> {
247 sigma: Vec<T>,
248 truncate: Option<T>,
249}
250
251impl<T: Float + FromPrimitive + Debug + Clone> GaussianFilterOp<T> {
252 pub fn new(sigma: Vec<T>, truncate: Option<T>) -> Self {
253 Self { sigma, truncate }
254 }
255}
256
257impl<T, D> BackendOp<T, D> for GaussianFilterOp<T>
258where
259 T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
260 D: Dimension + 'static,
261{
262 fn execute_cpu(&self, input: &ArrayView<T, D>) -> NdimageResult<Array<T, D>> {
263 crate::filters::gaussian_filter_chunked(
265 &input.to_owned(),
266 &self.sigma,
267 self.truncate,
268 crate::filters::BorderMode::Reflect,
269 None,
270 )
271 }
272
273 #[cfg(feature = "gpu")]
274 fn execute_gpu(&self, input: &ArrayView<T, D>, backend: Backend) -> NdimageResult<Array<T, D>> {
275 match backend {
276 #[cfg(feature = "cuda")]
277 Backend::Cuda => {
278 cuda_gaussian_filter(input, &self.sigma, self.truncate)
280 }
281 _ => self.execute_cpu(input),
282 }
283 }
284
285 fn memory_requirement(&self, input_shape: &[usize]) -> usize {
286 let elements: usize = input_shape.iter().product();
287 elements * std::mem::size_of::<T>() * 3
289 }
290
291 fn benefits_from_gpu(&self, array_size: usize) -> bool {
292 array_size > 100_000
294 }
295}
296
297#[cfg(feature = "cuda")]
298#[allow(dead_code)]
299fn cuda_gaussian_filter<T, D>(
300 input: &ArrayView<T, D>,
301 sigma: &[T],
302 _truncate: Option<T>,
303) -> NdimageResult<Array<T, D>>
304where
305 T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
306 D: Dimension,
307{
308 if input.ndim() == 2 {
310 let input_2d = input
311 .view()
312 .into_dimensionality::<scirs2_core::ndarray::Ix2>()
313 .map_err(|_| NdimageError::DimensionError("Failed to convert to 2D array".into()))?;
314
315 if sigma.len() >= 2 {
316 let sigma_2d = [sigma[0], sigma[1]];
317 let cuda_ops = cuda::CudaOperations::new(None)?;
318 let result_2d = cuda_ops.gaussian_filter_2d(&input_2d, sigma_2d)?;
319
320 let result = result_2d.into_dimensionality::<D>().map_err(|_| {
322 NdimageError::DimensionError("Failed to convert result dimension".into())
323 })?;
324 return Ok(result);
325 }
326 }
327
328 Err(NdimageError::NotImplementedError(
330 "CUDA Gaussian filter currently only supports 2D arrays".into(),
331 ))
332}
333
334pub struct BackendBuilder {
336 config: BackendConfig,
337}
338
339impl BackendBuilder {
340 pub fn new() -> Self {
341 Self {
342 config: BackendConfig::default(),
343 }
344 }
345
346 pub fn backend(mut self, backend: Backend) -> Self {
347 self.config.backend = backend;
348 self
349 }
350
351 pub fn gpu_threshold(mut self, threshold: usize) -> Self {
352 self.config.gpu_threshold = threshold;
353 self
354 }
355
356 pub fn gpu_memory_limit(mut self, limit: usize) -> Self {
357 self.config.gpu_memory_limit = Some(limit);
358 self
359 }
360
361 pub fn allow_fallback(mut self, allow: bool) -> Self {
362 self.config.allow_fallback = allow;
363 self
364 }
365
366 pub fn device_id(mut self, id: usize) -> Self {
367 self.config.device_id = Some(id);
368 self
369 }
370
371 pub fn build(self) -> NdimageResult<BackendExecutor> {
372 BackendExecutor::new(self.config)
373 }
374}
375
376#[allow(dead_code)]
378pub fn auto_backend() -> NdimageResult<BackendExecutor> {
379 BackendBuilder::new().backend(Backend::Auto).build()
380}
381
382#[cfg(test)]
383mod tests {
384 use super::*;
385 use scirs2_core::ndarray::arr2;
386
387 #[test]
388 fn test_backend_selection() {
389 let config = BackendConfig {
390 backend: Backend::Auto,
391 gpu_threshold: 1000,
392 ..Default::default()
393 };
394
395 let executor = BackendExecutor::new(config).expect("Operation failed");
396 let small_array = arr2(&[[1.0, 2.0], [3.0, 4.0]]);
397 let op = GaussianFilterOp::new(vec![1.0, 1.0], None);
398
399 let _result = executor
401 .execute(&small_array.view(), op)
402 .expect("Operation failed");
403 }
404
405 #[test]
406 fn test_backend_builder() {
407 let executor = BackendBuilder::new()
408 .backend(Backend::Cpu)
409 .gpu_threshold(50_000)
410 .allow_fallback(true)
411 .build()
412 .expect("Operation failed");
413
414 assert_eq!(executor.config.backend, Backend::Cpu);
415 assert_eq!(executor.config.gpu_threshold, 50_000);
416 assert!(executor.config.allow_fallback);
417 }
418}