1use std::collections::HashMap;
9use std::mem;
10use std::sync::Arc;
11
12use scirs2_core::ndarray::{Array, ArrayView, ArrayView2, Dimension, Ix2};
13use scirs2_core::numeric::{Float, FromPrimitive};
14
15use crate::backend::gpu_acceleration_framework::{
16 GpuAccelerationManager, GpuPerformanceReport, MemoryPoolConfig,
17};
18use crate::backend::{Backend, DeviceManager};
19use crate::error::{NdimageError, NdimageResult};
20use crate::interpolation::BoundaryMode;
21use std::f64::consts::PI;
22
23pub struct GpuOperations {
25 acceleration_manager: Arc<GpuAccelerationManager>,
27 device_manager: DeviceManager,
29 config: GpuOperationsConfig,
31 operation_registry: HashMap<String, OperationInfo>,
33}
34
35#[derive(Debug, Clone)]
36pub struct GpuOperationsConfig {
37 pub min_gpu_size: usize,
39 pub memory_pool_config: MemoryPoolConfig,
41 pub auto_fallback: bool,
43 pub enable_monitoring: bool,
45 pub operation_timeout: u64,
47}
48
49impl Default for GpuOperationsConfig {
50 fn default() -> Self {
51 Self {
52 min_gpu_size: 1024 * 1024, memory_pool_config: MemoryPoolConfig::default(),
54 auto_fallback: true,
55 enable_monitoring: true,
56 operation_timeout: 10000, }
58 }
59}
60
61#[derive(Debug, Clone)]
62struct OperationInfo {
63 name: String,
65 kernel_source: String,
67 preferred_backend: Backend,
69 memory_complexity: f64,
71 compute_complexity: f64,
73}
74
75impl GpuOperations {
76 pub fn new(config: GpuOperationsConfig) -> NdimageResult<Self> {
78 let acceleration_manager = Arc::new(GpuAccelerationManager::new(
79 config.memory_pool_config.clone(),
80 )?);
81 let device_manager = DeviceManager::new()?;
82
83 let mut gpu_ops = Self {
84 acceleration_manager,
85 device_manager,
86 config,
87 operation_registry: HashMap::new(),
88 };
89
90 gpu_ops.register_builtin_operations()?;
92
93 Ok(gpu_ops)
94 }
95
96 pub fn gpu_convolution_2d<T>(
98 &self,
99 input: ArrayView2<T>,
100 kernel: ArrayView2<T>,
101 mode: BoundaryMode,
102 ) -> NdimageResult<Array<T, Ix2>>
103 where
104 T: Float
105 + FromPrimitive
106 + Clone
107 + Send
108 + Sync
109 + std::ops::DivAssign
110 + std::ops::AddAssign
111 + std::fmt::Debug
112 + 'static,
113 {
114 let operation_name = "convolution_2d";
115
116 if !self.should_use_gpu(&input, operation_name) {
118 return self.fallback_convolution_2d(input, kernel, mode);
119 }
120
121 let kernel_source = self.get_convolution_kernel_source();
123
124 let backend = self.select_backend_for_operation(operation_name)?;
126
127 match self.execute_gpu_convolution(input, kernel, &kernel_source, backend, mode) {
129 Ok(result) => Ok(result),
130 Err(e) if self.config.auto_fallback => {
131 eprintln!("GPU convolution failed: {:?}, falling back to CPU", e);
132 self.fallback_convolution_2d(input, kernel, mode)
133 }
134 Err(e) => Err(e),
135 }
136 }
137
138 pub fn gpu_morphological_erosion<T>(
140 &self,
141 input: ArrayView2<T>,
142 structuring_element: ArrayView2<bool>,
143 mode: BoundaryMode,
144 ) -> NdimageResult<Array<T, Ix2>>
145 where
146 T: Float
147 + FromPrimitive
148 + Clone
149 + Send
150 + Sync
151 + PartialOrd
152 + std::ops::AddAssign
153 + std::ops::DivAssign
154 + std::fmt::Debug
155 + 'static,
156 {
157 let operation_name = "morphological_erosion";
158
159 if !self.should_use_gpu(&input, operation_name) {
160 return self.fallback_morphological_erosion(input, structuring_element, mode);
161 }
162
163 let kernel_source = self.get_morphology_kernel_source();
164 let backend = self.select_backend_for_operation(operation_name)?;
165
166 match self.execute_gpu_morphology(
167 input,
168 structuring_element,
169 &kernel_source,
170 backend,
171 mode,
172 MorphologyOperation::Erosion,
173 ) {
174 Ok(result) => Ok(result),
175 Err(e) if self.config.auto_fallback => {
176 eprintln!("GPU erosion failed: {:?}, falling back to CPU", e);
177 self.fallback_morphological_erosion(input, structuring_element, mode)
178 }
179 Err(e) => Err(e),
180 }
181 }
182
183 pub fn gpu_morphological_dilation<T>(
185 &self,
186 input: ArrayView2<T>,
187 structuring_element: ArrayView2<bool>,
188 mode: BoundaryMode,
189 ) -> NdimageResult<Array<T, Ix2>>
190 where
191 T: Float
192 + FromPrimitive
193 + Clone
194 + Send
195 + Sync
196 + PartialOrd
197 + std::ops::AddAssign
198 + std::ops::DivAssign
199 + std::fmt::Debug
200 + 'static,
201 {
202 let operation_name = "morphological_dilation";
203
204 if !self.should_use_gpu(&input, operation_name) {
205 return self.fallback_morphological_dilation(input, structuring_element, mode);
206 }
207
208 let kernel_source = self.get_morphology_kernel_source();
209 let backend = self.select_backend_for_operation(operation_name)?;
210
211 match self.execute_gpu_morphology(
212 input,
213 structuring_element,
214 &kernel_source,
215 backend,
216 mode,
217 MorphologyOperation::Dilation,
218 ) {
219 Ok(result) => Ok(result),
220 Err(e) if self.config.auto_fallback => {
221 eprintln!("GPU dilation failed: {:?}, falling back to CPU", e);
222 self.fallback_morphological_dilation(input, structuring_element, mode)
223 }
224 Err(e) => Err(e),
225 }
226 }
227
228 pub fn gpu_gaussian_filter<T>(
230 &self,
231 input: ArrayView2<T>,
232 sigma: (f64, f64),
233 mode: BoundaryMode,
234 ) -> NdimageResult<Array<T, Ix2>>
235 where
236 T: Float + FromPrimitive + Clone + Send + Sync + std::ops::DivAssign + std::ops::AddAssign,
237 {
238 let operation_name = "gaussian_filter";
239
240 if !self.should_use_gpu(&input, operation_name) {
241 return self.fallback_gaussian_filter(input, sigma, mode);
242 }
243
244 let kernel_source = self.get_gaussian_kernel_source();
245 let backend = self.select_backend_for_operation(operation_name)?;
246
247 match self.execute_gpu_gaussian(input, sigma, &kernel_source, backend, mode) {
248 Ok(result) => Ok(result),
249 Err(e) if self.config.auto_fallback => {
250 eprintln!("GPU Gaussian filter failed: {:?}, falling back to CPU", e);
251 self.fallback_gaussian_filter(input, sigma, mode)
252 }
253 Err(e) => Err(e),
254 }
255 }
256
257 pub fn gpu_distance_transform<T>(
259 &self,
260 input: ArrayView2<T>,
261 metric: DistanceMetric,
262 ) -> NdimageResult<Array<T, Ix2>>
263 where
264 T: Float + FromPrimitive + Clone + Send + Sync + PartialOrd,
265 {
266 let operation_name = "distance_transform";
267
268 if !self.should_use_gpu(&input, operation_name) {
269 return self.fallback_distance_transform(input, metric);
270 }
271
272 let kernel_source = self.get_distance_transform_kernel_source();
273 let backend = self.select_backend_for_operation(operation_name)?;
274
275 match self.execute_gpu_distance_transform(input, metric, &kernel_source, backend) {
276 Ok(result) => Ok(result),
277 Err(e) if self.config.auto_fallback => {
278 eprintln!(
279 "GPU distance transform failed: {:?}, falling back to CPU",
280 e
281 );
282 self.fallback_distance_transform(input, metric)
283 }
284 Err(e) => Err(e),
285 }
286 }
287
288 pub fn get_performance_report(&self) -> GpuPerformanceReport {
290 self.acceleration_manager.get_performance_report()
291 }
292
293 pub fn get_gpu_info(&self) -> GpuInfo {
295 let capabilities = self.device_manager.get_capabilities();
296
297 GpuInfo {
298 cuda_available: capabilities.cuda_available,
299 opencl_available: capabilities.opencl_available,
300 metal_available: capabilities.metal_available,
301 gpu_memory: capabilities.gpu_memory_mb,
302 compute_units: capabilities.compute_units,
303 preferred_backend: self.select_preferred_backend(),
304 }
305 }
306
307 fn register_builtin_operations(&mut self) -> NdimageResult<()> {
310 self.operation_registry.insert(
312 "convolution_2d".to_string(),
313 OperationInfo {
314 name: "convolution_2d".to_string(),
315 kernel_source: self.get_convolution_kernel_source(),
316 preferred_backend: {
317 #[cfg(feature = "opencl")]
318 {
319 Backend::OpenCL
320 }
321 #[cfg(not(feature = "opencl"))]
322 {
323 Backend::Cpu
324 }
325 },
326 memory_complexity: 2.0, compute_complexity: 9.0, },
329 );
330
331 self.operation_registry.insert(
333 "morphological_erosion".to_string(),
334 OperationInfo {
335 name: "morphological_erosion".to_string(),
336 kernel_source: self.get_morphology_kernel_source(),
337 preferred_backend: {
338 #[cfg(feature = "opencl")]
339 {
340 Backend::OpenCL
341 }
342 #[cfg(not(feature = "opencl"))]
343 {
344 Backend::Cpu
345 }
346 },
347 memory_complexity: 2.0,
348 compute_complexity: 9.0,
349 },
350 );
351
352 self.operation_registry.insert(
354 "gaussian_filter".to_string(),
355 OperationInfo {
356 name: "gaussian_filter".to_string(),
357 kernel_source: self.get_gaussian_kernel_source(),
358 preferred_backend: {
359 #[cfg(feature = "opencl")]
360 {
361 Backend::OpenCL
362 }
363 #[cfg(not(feature = "opencl"))]
364 {
365 Backend::Cpu
366 }
367 },
368 memory_complexity: 3.0, compute_complexity: 6.0, },
371 );
372
373 self.operation_registry.insert(
375 "distance_transform".to_string(),
376 OperationInfo {
377 name: "distance_transform".to_string(),
378 kernel_source: self.get_distance_transform_kernel_source(),
379 preferred_backend: {
380 #[cfg(feature = "opencl")]
381 {
382 Backend::OpenCL
383 }
384 #[cfg(not(feature = "opencl"))]
385 {
386 Backend::Cpu
387 }
388 },
389 memory_complexity: 2.0,
390 compute_complexity: 10.0, },
392 );
393
394 Ok(())
395 }
396
397 fn should_use_gpu<T, D>(&self, input: &ArrayView<T, D>, operation_name: &str) -> bool
398 where
399 T: Float + FromPrimitive,
400 D: Dimension,
401 {
402 if input.len() < self.config.min_gpu_size {
404 return false;
405 }
406
407 let capabilities = self.device_manager.get_capabilities();
409 if !capabilities.gpu_available {
410 return false;
411 }
412
413 if let Some(op_info) = self.operation_registry.get(operation_name) {
415 let required_memory =
416 input.len() * std::mem::size_of::<T>() * op_info.memory_complexity as usize;
417 let available_memory = capabilities.gpu_memory_mb * 1024 * 1024;
418
419 if required_memory > available_memory {
420 return false;
421 }
422 }
423
424 true
425 }
426
427 fn select_backend_for_operation(&self, operation_name: &str) -> NdimageResult<Backend> {
428 let capabilities = self.device_manager.get_capabilities();
429
430 if let Some(op_info) = self.operation_registry.get(operation_name) {
432 match op_info.preferred_backend {
433 #[cfg(feature = "cuda")]
434 Backend::Cuda if capabilities.cuda_available => return Ok(Backend::Cuda),
435 #[cfg(feature = "opencl")]
436 Backend::OpenCL if capabilities.opencl_available => return Ok(Backend::OpenCL),
437 #[cfg(all(target_os = "macos", feature = "metal"))]
438 Backend::Metal if capabilities.metal_available => return Ok(Backend::Metal),
439 _ => {}
440 }
441 }
442
443 if capabilities.cuda_available {
445 #[cfg(feature = "cuda")]
446 {
447 Ok(Backend::Cuda)
448 }
449 #[cfg(not(feature = "cuda"))]
450 {
451 Ok(Backend::Cpu)
452 }
453 } else if capabilities.opencl_available {
454 #[cfg(feature = "opencl")]
455 {
456 Ok(Backend::OpenCL)
457 }
458 #[cfg(not(feature = "opencl"))]
459 {
460 Ok(Backend::Cpu)
461 }
462 } else if capabilities.metal_available {
463 #[cfg(all(target_os = "macos", feature = "metal"))]
464 {
465 Ok(Backend::Metal)
466 }
467 #[cfg(not(all(target_os = "macos", feature = "metal")))]
468 {
469 Ok(Backend::Cpu)
470 }
471 } else {
472 Err(NdimageError::GpuNotAvailable(
473 "No GPU backend available".to_string(),
474 ))
475 }
476 }
477
478 fn select_preferred_backend(&self) -> Backend {
479 let capabilities = self.device_manager.get_capabilities();
480
481 if capabilities.cuda_available {
482 #[cfg(feature = "cuda")]
483 {
484 Backend::Cuda
485 }
486 #[cfg(not(feature = "cuda"))]
487 {
488 Backend::Cpu
489 }
490 } else if capabilities.opencl_available {
491 #[cfg(feature = "opencl")]
492 {
493 Backend::OpenCL
494 }
495 #[cfg(not(feature = "opencl"))]
496 {
497 Backend::Cpu
498 }
499 } else if capabilities.metal_available {
500 #[cfg(all(target_os = "macos", feature = "metal"))]
501 {
502 Backend::Metal
503 }
504 #[cfg(not(all(target_os = "macos", feature = "metal")))]
505 {
506 Backend::Cpu
507 }
508 } else {
509 Backend::Cpu
510 }
511 }
512
513 fn execute_gpu_convolution<T>(
516 &self,
517 input: ArrayView2<T>,
518 _kernel: ArrayView2<T>,
519 kernel_source: &str,
520 backend: Backend,
521 mode: BoundaryMode,
522 ) -> NdimageResult<Array<T, Ix2>>
523 where
524 T: Float + FromPrimitive + Clone + Send + Sync + std::ops::DivAssign + std::ops::AddAssign,
525 {
526 self.acceleration_manager
529 .execute_operation("convolution_2d", input.into_dyn(), kernel_source, backend)
530 .map(|result| {
531 result
532 .into_dimensionality::<Ix2>()
533 .expect("Operation failed")
534 })
535 }
536
537 fn execute_gpu_morphology<T>(
538 &self,
539 input: ArrayView2<T>,
540 _structuring_element: ArrayView2<bool>,
541 kernel_source: &str,
542 backend: Backend,
543 mode: BoundaryMode,
544 _operation: MorphologyOperation,
545 ) -> NdimageResult<Array<T, Ix2>>
546 where
547 T: Float + FromPrimitive + Clone + Send + Sync + PartialOrd,
548 {
549 self.acceleration_manager
551 .execute_operation(
552 "morphological_operation",
553 input.into_dyn(),
554 kernel_source,
555 backend,
556 )
557 .map(|result| {
558 result
559 .into_dimensionality::<Ix2>()
560 .expect("Operation failed")
561 })
562 }
563
564 fn execute_gpu_gaussian<T>(
565 &self,
566 input: ArrayView2<T>,
567 _sigma: (f64, f64),
568 kernel_source: &str,
569 backend: Backend,
570 mode: BoundaryMode,
571 ) -> NdimageResult<Array<T, Ix2>>
572 where
573 T: Float + FromPrimitive + Clone + Send + Sync + std::ops::DivAssign + std::ops::AddAssign,
574 {
575 self.acceleration_manager
577 .execute_operation("gaussian_filter", input.into_dyn(), kernel_source, backend)
578 .map(|result| {
579 result
580 .into_dimensionality::<Ix2>()
581 .expect("Operation failed")
582 })
583 }
584
585 fn execute_gpu_distance_transform<T>(
586 &self,
587 input: ArrayView2<T>,
588 _metric: DistanceMetric,
589 kernel_source: &str,
590 backend: Backend,
591 ) -> NdimageResult<Array<T, Ix2>>
592 where
593 T: Float + FromPrimitive + Clone + Send + Sync + PartialOrd,
594 {
595 self.acceleration_manager
597 .execute_operation(
598 "distance_transform",
599 input.into_dyn(),
600 kernel_source,
601 backend,
602 )
603 .map(|result| {
604 result
605 .into_dimensionality::<Ix2>()
606 .expect("Operation failed")
607 })
608 }
609
610 fn fallback_convolution_2d<T>(
613 &self,
614 input: ArrayView2<T>,
615 kernel: ArrayView2<T>,
616 mode: BoundaryMode,
617 ) -> NdimageResult<Array<T, Ix2>>
618 where
619 T: Float
620 + FromPrimitive
621 + Clone
622 + Send
623 + Sync
624 + std::ops::DivAssign
625 + std::ops::AddAssign
626 + std::fmt::Debug
627 + 'static,
628 {
629 let border_mode = match mode {
631 BoundaryMode::Constant => Some(crate::filters::BorderMode::Constant),
632 BoundaryMode::Reflect => Some(crate::filters::BorderMode::Reflect),
633 BoundaryMode::Mirror => Some(crate::filters::BorderMode::Mirror),
634 BoundaryMode::Wrap => Some(crate::filters::BorderMode::Wrap),
635 BoundaryMode::Nearest => Some(crate::filters::BorderMode::Constant), };
637 crate::convolve(&input.to_owned(), &kernel.to_owned(), border_mode)
638 }
639
640 fn fallback_morphological_erosion<T>(
641 &self,
642 input: ArrayView2<T>,
643 structuring_element: ArrayView2<bool>,
644 mode: BoundaryMode,
645 ) -> NdimageResult<Array<T, Ix2>>
646 where
647 T: Float
648 + FromPrimitive
649 + Clone
650 + Send
651 + Sync
652 + PartialOrd
653 + std::ops::DivAssign
654 + std::ops::AddAssign
655 + std::fmt::Debug
656 + 'static,
657 {
658 use crate::morphology::MorphBorderMode;
660 let morph_mode = match mode {
661 BoundaryMode::Constant => MorphBorderMode::Constant,
662 BoundaryMode::Reflect => MorphBorderMode::Reflect,
663 BoundaryMode::Mirror => MorphBorderMode::Mirror,
664 BoundaryMode::Wrap => MorphBorderMode::Wrap,
665 BoundaryMode::Nearest => MorphBorderMode::Nearest,
666 };
667 crate::grey_erosion(
668 &input.to_owned(),
669 None,
670 Some(&structuring_element.to_owned()),
671 Some(morph_mode),
672 None,
673 None,
674 )
675 }
676
677 fn fallback_morphological_dilation<T>(
678 &self,
679 input: ArrayView2<T>,
680 structuring_element: ArrayView2<bool>,
681 mode: BoundaryMode,
682 ) -> NdimageResult<Array<T, Ix2>>
683 where
684 T: Float
685 + FromPrimitive
686 + Clone
687 + Send
688 + Sync
689 + PartialOrd
690 + std::ops::DivAssign
691 + std::ops::AddAssign
692 + std::fmt::Debug
693 + 'static,
694 {
695 use crate::morphology::MorphBorderMode;
697 let morph_mode = match mode {
698 BoundaryMode::Constant => MorphBorderMode::Constant,
699 BoundaryMode::Reflect => MorphBorderMode::Reflect,
700 BoundaryMode::Mirror => MorphBorderMode::Mirror,
701 BoundaryMode::Wrap => MorphBorderMode::Wrap,
702 BoundaryMode::Nearest => MorphBorderMode::Nearest,
703 };
704 crate::grey_dilation(
705 &input.to_owned(),
706 None,
707 Some(&structuring_element.to_owned()),
708 Some(morph_mode),
709 None,
710 None,
711 )
712 }
713
714 fn fallback_gaussian_filter<T>(
715 &self,
716 input: ArrayView2<T>,
717 sigma: (f64, f64),
718 mode: BoundaryMode,
719 ) -> NdimageResult<Array<T, Ix2>>
720 where
721 T: Float + FromPrimitive + Clone + Send + Sync + std::ops::DivAssign + std::ops::AddAssign,
722 {
723 let (rows, cols) = (input.nrows(), input.ncols());
727 let (sigma_x, sigma_y) = sigma;
728
729 let kernel_size_x = (6.0 * sigma_x).ceil() as usize | 1; let kernel_size_y = (6.0 * sigma_y).ceil() as usize | 1;
732
733 let kernel_x = gaussian_kernel_1d(sigma_x, kernel_size_x);
734 let kernel_y = gaussian_kernel_1d(sigma_y, kernel_size_y);
735
736 let mut temp = Array::zeros((rows, cols));
738 for i in 0..rows {
739 for j in 0..cols {
740 let mut sum = T::zero();
741 let mut weight_sum = T::zero();
742
743 let half_k = (kernel_size_x / 2) as isize;
744 for k_idx in 0..kernel_size_x {
745 let j_offset = j as isize + k_idx as isize - half_k;
746 let weight = safe_f64_to_float::<T>(kernel_x[k_idx])?;
747
748 if j_offset >= 0 && j_offset < cols as isize {
749 sum = sum + input[[i, j_offset as usize]] * weight;
750 weight_sum = weight_sum + weight;
751 }
752 }
753
754 if weight_sum > T::zero() {
755 temp[[i, j]] = sum / weight_sum;
756 } else {
757 temp[[i, j]] = input[[i, j]];
758 }
759 }
760 }
761
762 let mut output = Array::zeros((rows, cols));
764 for j in 0..cols {
765 for i in 0..rows {
766 let mut sum = T::zero();
767 let mut weight_sum = T::zero();
768
769 let half_k = (kernel_size_y / 2) as isize;
770 for k_idx in 0..kernel_size_y {
771 let i_offset = i as isize + k_idx as isize - half_k;
772 let weight = safe_f64_to_float::<T>(kernel_y[k_idx])?;
773
774 if i_offset >= 0 && i_offset < rows as isize {
775 sum = sum + temp[[i_offset as usize, j]] * weight;
776 weight_sum = weight_sum + weight;
777 }
778 }
779
780 if weight_sum > T::zero() {
781 output[[i, j]] = sum / weight_sum;
782 } else {
783 output[[i, j]] = temp[[i, j]];
784 }
785 }
786 }
787
788 Ok(output)
789 }
790
791 fn fallback_distance_transform<T>(
792 &self,
793 input: ArrayView2<T>,
794 metric: DistanceMetric,
795 ) -> NdimageResult<Array<T, Ix2>>
796 where
797 T: Float + FromPrimitive + Clone + Send + Sync + PartialOrd,
798 {
799 let bool_input = input.mapv(|x| x > T::zero());
801
802 let result = crate::distance_transform_edt(&bool_input.into_dyn(), None, true, false)?;
804
805 if let Some(distances) = result.0 {
807 let converted_distances = distances.mapv(|x| T::from_f64(x).unwrap_or(T::zero()));
808 converted_distances
810 .into_dimensionality::<scirs2_core::ndarray::Ix2>()
811 .map_err(|_| {
812 NdimageError::DimensionError(
813 "Failed to convert distance transform result to 2D".into(),
814 )
815 })
816 } else {
817 Err(NdimageError::ComputationError(
818 "Distance transform did not return distances".into(),
819 ))
820 }
821 }
822
823 fn get_convolution_kernel_source(&self) -> String {
826 include_str!("backend/kernels/convolution.kernel").to_string()
827 }
828
829 fn get_morphology_kernel_source(&self) -> String {
830 include_str!("backend/kernels/morphology.kernel").to_string()
831 }
832
833 fn get_gaussian_kernel_source(&self) -> String {
834 include_str!("backend/kernels/gaussian_blur.kernel").to_string()
835 }
836
837 fn get_distance_transform_kernel_source(&self) -> String {
838 include_str!("backend/kernels/morphology.kernel").to_string()
840 }
841}
842
843#[derive(Debug, Clone, Copy)]
846pub enum MorphologyOperation {
847 Erosion,
848 Dilation,
849}
850
851#[derive(Debug, Clone, Copy)]
852pub enum DistanceMetric {
853 Euclidean,
854 Manhattan,
855 Chessboard,
856}
857
858#[derive(Debug, Clone)]
859pub struct GpuInfo {
860 pub cuda_available: bool,
862 pub opencl_available: bool,
864 pub metal_available: bool,
866 pub gpu_memory: usize,
868 pub compute_units: u32,
870 pub preferred_backend: Backend,
872}
873
874impl GpuInfo {
875 pub fn display(&self) {
877 println!("=== GPU Information ===");
878 println!("CUDA Available: {}", self.cuda_available);
879 println!("OpenCL Available: {}", self.opencl_available);
880 println!("Metal Available: {}", self.metal_available);
881 println!("GPU Memory: {} MB", self.gpu_memory);
882 println!("Compute Units: {}", self.compute_units);
883 println!("Preferred Backend: {:?}", self.preferred_backend);
884 }
885}
886
887#[allow(dead_code)]
889pub fn create_gpu_operations() -> NdimageResult<GpuOperations> {
890 GpuOperations::new(GpuOperationsConfig::default())
891}
892
893#[allow(dead_code)]
895pub fn create_gpu_operations_with_config(
896 config: GpuOperationsConfig,
897) -> NdimageResult<GpuOperations> {
898 GpuOperations::new(config)
899}
900
901fn gaussian_kernel_1d(sigma: f64, size: usize) -> Vec<f64> {
903 let center = (size / 2) as f64;
904 let coeff = 1.0 / (sigma * (2.0 * PI).sqrt());
905 let denom = 2.0 * sigma * sigma;
906
907 let mut kernel: Vec<f64> = (0..size)
908 .map(|i| {
909 let x = i as f64 - center;
910 coeff * (-x * x / denom).exp()
911 })
912 .collect();
913
914 let sum: f64 = kernel.iter().sum();
916 if sum > 0.0 {
917 for val in &mut kernel {
918 *val /= sum;
919 }
920 }
921
922 kernel
923}
924
925fn safe_f64_to_float<T: Float>(value: f64) -> NdimageResult<T> {
927 T::from(value).ok_or_else(|| {
928 NdimageError::InvalidInput(format!("Cannot convert {} to target float type", value))
929 })
930}
931
932#[cfg(test)]
933mod tests {
934 use super::*;
935 use scirs2_core::ndarray::array;
936
937 #[test]
938 fn test_gpu_operations_creation() {
939 let result = create_gpu_operations();
940 assert!(result.is_ok() || result.is_err());
942 }
943
944 #[test]
945 fn test_gpu_info_display() {
946 let gpu_info = GpuInfo {
947 cuda_available: false,
948 opencl_available: true,
949 metal_available: false,
950 gpu_memory: 8192,
951 compute_units: 16,
952 preferred_backend: {
953 #[cfg(feature = "opencl")]
954 {
955 Backend::OpenCL
956 }
957 #[cfg(not(feature = "opencl"))]
958 {
959 Backend::Cpu
960 }
961 },
962 };
963
964 gpu_info.display();
966 }
967
968 #[test]
969 fn test_gpu_convolution_fallback() {
970 let input = array![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]];
972
973 let kernel = array![[1.0, 0.0, -1.0], [2.0, 0.0, -2.0], [1.0, 0.0, -1.0]];
974
975 if let Ok(gpu_ops) = create_gpu_operations() {
977 let result =
978 gpu_ops.gpu_convolution_2d(input.view(), kernel.view(), BoundaryMode::Constant);
979
980 assert!(result.is_ok() || result.is_err());
982 }
983 }
984}