1use scirs2_core::ndarray::{Array, ArrayView, ArrayView2, Dimension};
8use scirs2_core::numeric::{Float, FromPrimitive};
9use std::fmt::Debug;
10
11use crate::error::{NdimageError, NdimageResult};
12use crate::utils::safe_f64_to_float;
13
14#[allow(dead_code)]
16fn safe_usize_to_float<T: Float + FromPrimitive>(value: usize) -> NdimageResult<T> {
17 T::from_usize(value).ok_or_else(|| {
18 NdimageError::ComputationError(format!("Failed to convert usize {} to float type", value))
19 })
20}
21
22#[allow(dead_code)]
24fn safe_as_slice<'a, T, D: Dimension>(array: &'a ArrayView<T, D>) -> NdimageResult<&'a [T]> {
25 array.as_slice().ok_or_else(|| {
26 NdimageError::ComputationError("Failed to convert _array to contiguous slice".to_string())
27 })
28}
29
30const GAUSSIAN_BLUR_KERNEL: &str = include_str!("kernels/gaussian_blur.kernel");
32const CONVOLUTION_KERNEL: &str = include_str!("kernels/convolution.kernel");
33const MEDIAN_FILTER_KERNEL: &str = include_str!("kernels/median_filter.kernel");
34const MORPHOLOGY_KERNEL: &str = include_str!("kernels/morphology.kernel");
35
36const ADVANCED_MORPHOLOGY_KERNEL: &str = include_str!("kernels/advanced_morphology.kernel");
38const ADVANCED_EDGE_DETECTION_KERNEL: &str = include_str!("kernels/advanced_edge_detection.kernel");
39const TEXTURE_ANALYSIS_KERNEL: &str = include_str!("kernels/texture_analysis.kernel");
40const ADVANCED_SEGMENTATION_KERNEL: &str = include_str!("kernels/advanced_segmentation.kernel");
41
42const SEPARABLE_GAUSSIAN_KERNEL: &str = r#"
44__kernel void separable_gaussian_1d(
45 __global const float* input__global float* output__global const float* weights,
46 const int size,
47 const int radius,
48 const int direction, // 0 for horizontal, 1 for vertical
49 const int width,
50 const int height
51) {
52 int x = get_global_id(0);
53 int y = get_global_id(1);
54
55 if (x >= width || y >= height) return;
56
57 float sum = 0.0f;
58
59 if (direction == 0) {
60 // Horizontal pass
61 for (int i = -radius; i <= radius; i++) {
62 int px = clamp(x + i, 0, width - 1);
63 sum += input[y * width + px] * weights[i + radius];
64 }
65 } else {
66 // Vertical pass
67 for (int i = -radius; i <= radius; i++) {
68 int py = clamp(y + i, 0, height - 1);
69 sum += input[py * width + x] * weights[i + radius];
70 }
71 }
72
73 output[y * width + x] = sum;
74}
75"#;
76
77const BILATERAL_FILTER_KERNEL: &str = r#"
78__kernel void bilateral_filter_2d(
79 __global const float* input__global float* output,
80 const float sigma_spatial,
81 const float sigma_intensity,
82 const int radius,
83 const int width,
84 const int height
85) {
86 int x = get_global_id(0);
87 int y = get_global_id(1);
88
89 if (x >= width || y >= height) return;
90
91 float center_val = input[y * width + x];
92 float sum = 0.0f;
93 float weight_sum = 0.0f;
94
95 float spatial_coeff = -0.5f / (sigma_spatial * sigma_spatial);
96 float intensity_coeff = -0.5f / (sigma_intensity * sigma_intensity);
97
98 for (int dy = -radius; dy <= radius; dy++) {
99 for (int dx = -radius; dx <= radius; dx++) {
100 int px = clamp(x + dx, 0, width - 1);
101 int py = clamp(y + dy, 0, height - 1);
102
103 float pixel_val = input[py * width + px];
104
105 // Spatial weight
106 float spatial_dist = dx * dx + dy * dy;
107 float spatial_weight = exp(spatial_dist * spatial_coeff);
108
109 // Intensity weight
110 float intensity_dist = (pixel_val - center_val) * (pixel_val - center_val);
111 float intensity_weight = exp(intensity_dist * intensity_coeff);
112
113 float total_weight = spatial_weight * intensity_weight;
114
115 sum += pixel_val * total_weight;
116 weight_sum += total_weight;
117 }
118 }
119
120 output[y * width + x] = sum / weight_sum;
121}
122"#;
123
124const SOBEL_FILTER_KERNEL: &str = r#"
125__kernel void sobel_filter_2d(
126 __global const float* input__global float* output_x__global float* output_y__global float* magnitude,
127 const int width,
128 const int height
129) {
130 int x = get_global_id(0);
131 int y = get_global_id(1);
132
133 if (x >= width || y >= height) return;
134
135 // Sobel X kernel: [-1, 0, 1; -2, 0, 2; -1, 0, 1]
136 float gx = 0.0f;
137 gx += -1.0f * input[clamp(y-1, 0, height-1) * width + clamp(x-1, 0, width-1)];
138 gx += -2.0f * input[y * width + clamp(x-1, 0, width-1)];
139 gx += -1.0f * input[clamp(y+1, 0, height-1) * width + clamp(x-1, 0, width-1)];
140 gx += 1.0f * input[clamp(y-1, 0, height-1) * width + clamp(x+1, 0, width-1)];
141 gx += 2.0f * input[y * width + clamp(x+1, 0, width-1)];
142 gx += 1.0f * input[clamp(y+1, 0, height-1) * width + clamp(x+1, 0, width-1)];
143
144 // Sobel Y kernel: [-1, -2, -1; 0, 0, 0; 1, 2, 1]
145 float gy = 0.0f;
146 gy += -1.0f * input[clamp(y-1, 0, height-1) * width + clamp(x-1, 0, width-1)];
147 gy += -2.0f * input[clamp(y-1, 0, height-1) * width + x];
148 gy += -1.0f * input[clamp(y-1, 0, height-1) * width + clamp(x+1, 0, width-1)];
149 gy += 1.0f * input[clamp(y+1, 0, height-1) * width + clamp(x-1, 0, width-1)];
150 gy += 2.0f * input[clamp(y+1, 0, height-1) * width + x];
151 gy += 1.0f * input[clamp(y+1, 0, height-1) * width + clamp(x+1, 0, width-1)];
152
153 output_x[y * width + x] = gx;
154 output_y[y * width + x] = gy;
155 magnitude[y * width + x] = sqrt(gx * gx + gy * gy);
156}
157"#;
158
159const HISTOGRAM_KERNEL: &str = r#"
160__kernel void compute_histogram(
161 __global const float* input__global int* histogram,
162 const float min_val,
163 const float max_val,
164 const int num_bins,
165 const int total_pixels
166) {
167 int idx = get_global_id(0);
168
169 if (idx >= total_pixels) return;
170
171 float val = input[idx];
172 if (val >= min_val && val <= max_val) {
173 float normalized = (val - min_val) / (max_val - min_val);
174 int bin = (int)(normalized * (num_bins - 1));
175 bin = clamp(bin, 0, num_bins - 1);
176 atomic_inc(&histogram[bin]);
177 }
178}
179"#;
180
181const MORPHOLOGY_EROSION_KERNEL: &str = r#"
182__kernel void morphology_erosion_2d(
183 __global const float* input__global float* output__global const int* structure,
184 const int struct_width,
185 const int struct_height,
186 const int width,
187 const int height
188) {
189 int x = get_global_id(0);
190 int y = get_global_id(1);
191
192 if (x >= width || y >= height) return;
193
194 float min_val = INFINITY;
195 int struct_center_x = struct_width / 2;
196 int struct_center_y = struct_height / 2;
197
198 for (int sy = 0; sy < struct_height; sy++) {
199 for (int sx = 0; sx < struct_width; sx++) {
200 if (structure[sy * struct_width + sx]) {
201 int px = x + sx - struct_center_x;
202 int py = y + sy - struct_center_y;
203 px = clamp(px, 0, width - 1);
204 py = clamp(py, 0, height - 1);
205
206 float val = input[py * width + px];
207 min_val = fmin(min_val, val);
208 }
209 }
210 }
211
212 output[y * width + x] = min_val;
213}
214"#;
215
216const WAVELET_TRANSFORM_KERNEL: &str = r#"
217__kernel void wavelet_transform_2d(
218 __global const float* input__global float* ll_output__global float* lh_output__global float* hl_output__global float* hh_output__global const float* low_filter__global const float* high_filter,
219 const int filter_length,
220 const int width,
221 const int height
222) {
223 int x = get_global_id(0);
224 int y = get_global_id(1);
225
226 if (x >= width/2 || y >= height/2) return;
227
228 int filter_radius = filter_length / 2;
229
230 // Apply row filters first
231 float ll_row = 0.0f, lh_row = 0.0f;
232 for (int i = 0; i < filter_length; i++) {
233 int px = clamp(x * 2 + i - filter_radius, 0, width - 1);
234 ll_row += input[y * 2 * width + px] * low_filter[i];
235 lh_row += input[y * 2 * width + px] * high_filter[i];
236 }
237
238 // Apply column filters to row results
239 float ll = 0.0f, lh = 0.0f, hl = 0.0f, hh = 0.0f;
240 for (int i = 0; i < filter_length; i++) {
241 int py = clamp(y * 2 + i - filter_radius, 0, height - 1);
242 // This is simplified - full implementation would need temporary storage
243 ll += ll_row * low_filter[i];
244 lh += lh_row * low_filter[i];
245 hl += ll_row * high_filter[i];
246 hh += lh_row * high_filter[i];
247 }
248
249 ll_output[y * (width/2) + x] = ll;
250 lh_output[y * (width/2) + x] = lh;
251 hl_output[y * (width/2) + x] = hl;
252 hh_output[y * (width/2) + x] = hh;
253}
254"#;
255
256const TEMPLATE_MATCHING_KERNEL: &str = r#"
257__kernel void template_matching_ncc(
258 __global const float* image__global const float* template__global float* output,
259 const int image_width,
260 const int image_height,
261 const int template_width,
262 const int template_height,
263 const float template_mean,
264 const float template_norm
265) {
266 int x = get_global_id(0);
267 int y = get_global_id(1);
268
269 int output_width = image_width - template_width + 1;
270 int output_height = image_height - template_height + 1;
271
272 if (x >= output_width || y >= output_height) return;
273
274 // Compute image patch mean
275 float image_sum = 0.0f;
276 for (int ty = 0; ty < template_height; ty++) {
277 for (int tx = 0; tx < template_width; tx++) {
278 image_sum += image[(y + ty) * image_width + (x + tx)];
279 }
280 }
281 float image_mean = image_sum / (template_width * template_height);
282
283 // Compute normalized cross-correlation
284 float correlation = 0.0f;
285 float image_variance = 0.0f;
286
287 for (int ty = 0; ty < template_height; ty++) {
288 for (int tx = 0; tx < template_width; tx++) {
289 float image_val = image[(y + ty) * image_width + (x + tx)];
290 float template_val = template[ty * template_width + tx];
291
292 float image_centered = image_val - image_mean;
293 float template_centered = template_val - template_mean;
294
295 correlation += image_centered * template_centered;
296 image_variance += image_centered * image_centered;
297 }
298 }
299
300 float image_norm = sqrt(image_variance);
301 float ncc = (image_norm > 0.0f) ? correlation / (image_norm * template_norm) : 0.0f;
302
303 output[y * output_width + x] = ncc;
304}
305"#;
306
307const DISTANCE_TRANSFORM_KERNEL: &str = r#"
308__kernel void distance_transform_edt(
309 __global const int* binary_image__global float* distance,
310 const int width,
311 const int height
312) {
313 int x = get_global_id(0);
314 int y = get_global_id(1);
315
316 if (x >= width || y >= height) return;
317
318 int idx = y * width + x;
319
320 if (binary_image[idx] == 0) {
321 distance[idx] = 0.0f;
322 return;
323 }
324
325 float min_dist = INFINITY;
326
327 // Brute force approach - can be optimized with separable algorithm
328 for (int py = 0; py < height; py++) {
329 for (int px = 0; px < width; px++) {
330 if (binary_image[py * width + px] == 0) {
331 float dx = px - x;
332 float dy = py - y;
333 float dist = sqrt(dx * dx + dy * dy);
334 min_dist = fmin(min_dist, dist);
335 }
336 }
337 }
338
339 distance[idx] = min_dist;
340}
341"#;
342
343const GABOR_FILTER_KERNEL: &str = r#"
344__kernel void gabor_filter_2d(
345 __global const float* input__global float* output,
346 const float sigma_x,
347 const float sigma_y,
348 const float theta,
349 const float lambda,
350 const float gamma,
351 const float psi,
352 const int kernel_size,
353 const int width,
354 const int height
355) {
356 int x = get_global_id(0);
357 int y = get_global_id(1);
358
359 if (x >= width || y >= height) return;
360
361 float sum = 0.0f;
362 int half_size = kernel_size / 2;
363
364 float cos_theta = cos(theta);
365 float sin_theta = sin(theta);
366
367 for (int ky = -half_size; ky <= half_size; ky++) {
368 for (int kx = -half_size; kx <= half_size; kx++) {
369 // Rotate coordinates
370 float x_rot = kx * cos_theta + ky * sin_theta;
371 float y_rot = -kx * sin_theta + ky * cos_theta;
372
373 // Gabor function
374 float envelope = exp(-0.5f * (x_rot * x_rot / (sigma_x * sigma_x) +
375 gamma * gamma * y_rot * y_rot / (sigma_y * sigma_y)));
376 float wave = cos(2.0f * M_PI * x_rot / lambda + psi);
377 float gabor_val = envelope * wave;
378
379 // Apply to image
380 int px = clamp(x + kx, 0, width - 1);
381 int py = clamp(y + ky, 0, height - 1);
382 sum += input[py * width + px] * gabor_val;
383 }
384 }
385
386 output[y * width + x] = sum;
387}
388"#;
389
390const LAPLACIAN_KERNEL: &str = r#"
391__kernel void laplacian_filter_2d(
392 __global const float* input__global float* output,
393 const int width,
394 const int height,
395 const int connectivity // 4 or 8
396) {
397 int x = get_global_id(0);
398 int y = get_global_id(1);
399
400 if (x >= width || y >= height) return;
401
402 float center = input[y * width + x];
403 float sum = 0.0f;
404
405 if (connectivity == 4) {
406 // 4-connected Laplacian: [0, -1, 0; -1, 4, -1; 0, -1, 0]
407 sum += 4.0f * center;
408 sum += -1.0f * input[clamp(y-1, 0, height-1) * width + x]; // top
409 sum += -1.0f * input[clamp(y+1, 0, height-1) * width + x]; // bottom
410 sum += -1.0f * input[y * width + clamp(x-1, 0, width-1)]; // left
411 sum += -1.0f * input[y * width + clamp(x+1, 0, width-1)]; // right
412 } else {
413 // 8-connected Laplacian: [-1, -1, -1; -1, 8, -1; -1, -1, -1]
414 sum += 8.0f * center;
415 for (int dy = -1; dy <= 1; dy++) {
416 for (int dx = -1; dx <= 1; dx++) {
417 if (dx == 0 && dy == 0) continue;
418 int px = clamp(x + dx, 0, width - 1);
419 int py = clamp(y + dy, 0, height - 1);
420 sum += -1.0f * input[py * width + px];
421 }
422 }
423 }
424
425 output[y * width + x] = sum;
426}
427"#;
428
429pub struct KernelRegistry {
431 kernels: std::collections::HashMap<String, KernelInfo>,
432}
433
434pub struct KernelInfo {
436 pub name: String,
437 pub source: String,
438 pub entry_point: String,
439 pub work_dimensions: usize,
440}
441
442impl KernelRegistry {
443 pub fn new() -> Self {
444 let mut registry = Self {
445 kernels: std::collections::HashMap::new(),
446 };
447
448 registry.register_builtin_kernels();
450 registry
451 }
452
453 fn register_builtin_kernels(&mut self) {
454 self.register_kernel(
456 "gaussian_blur_2d",
457 GAUSSIAN_BLUR_KERNEL,
458 "gaussian_blur_2d",
459 2,
460 );
461
462 self.register_kernel("convolution_2d", CONVOLUTION_KERNEL, "convolution_2d", 2);
464
465 self.register_kernel(
467 "median_filter_2d",
468 MEDIAN_FILTER_KERNEL,
469 "median_filter_2d",
470 2,
471 );
472
473 self.register_kernel("morphology_erosion", MORPHOLOGY_KERNEL, "erosion_2d", 2);
475 self.register_kernel("morphology_dilation", MORPHOLOGY_KERNEL, "dilation_2d", 2);
476
477 self.register_kernel(
479 "separable_gaussian_1d",
480 SEPARABLE_GAUSSIAN_KERNEL,
481 "separable_gaussian_1d",
482 2,
483 );
484 self.register_kernel(
485 "bilateral_filter_2d",
486 BILATERAL_FILTER_KERNEL,
487 "bilateral_filter_2d",
488 2,
489 );
490 self.register_kernel("sobel_filter_2d", SOBEL_FILTER_KERNEL, "sobel_filter_2d", 2);
491 self.register_kernel(
492 "laplacian_filter_2d",
493 LAPLACIAN_KERNEL,
494 "laplacian_filter_2d",
495 2,
496 );
497
498 self.register_kernel(
500 "hit_or_miss_2d",
501 ADVANCED_MORPHOLOGY_KERNEL,
502 "hit_or_miss_2d",
503 2,
504 );
505 self.register_kernel(
506 "morphological_gradient_2d",
507 ADVANCED_MORPHOLOGY_KERNEL,
508 "morphological_gradient_2d",
509 2,
510 );
511 self.register_kernel(
512 "tophat_transform_2d",
513 ADVANCED_MORPHOLOGY_KERNEL,
514 "tophat_transform_2d",
515 2,
516 );
517 self.register_kernel("skeleton_2d", ADVANCED_MORPHOLOGY_KERNEL, "skeleton_2d", 2);
518 self.register_kernel(
519 "distance_transform_chamfer_2d",
520 ADVANCED_MORPHOLOGY_KERNEL,
521 "distance_transform_chamfer_2d",
522 2,
523 );
524
525 self.register_kernel(
527 "canny_gradient_2d",
528 ADVANCED_EDGE_DETECTION_KERNEL,
529 "canny_gradient_2d",
530 2,
531 );
532 self.register_kernel(
533 "canny_non_maximum_suppression_2d",
534 ADVANCED_EDGE_DETECTION_KERNEL,
535 "canny_non_maximum_suppression_2d",
536 2,
537 );
538 self.register_kernel(
539 "canny_double_threshold_2d",
540 ADVANCED_EDGE_DETECTION_KERNEL,
541 "canny_double_threshold_2d",
542 2,
543 );
544 self.register_kernel(
545 "canny_edge_tracking_2d",
546 ADVANCED_EDGE_DETECTION_KERNEL,
547 "canny_edge_tracking_2d",
548 2,
549 );
550 self.register_kernel(
551 "laplacian_of_gaussian_2d",
552 ADVANCED_EDGE_DETECTION_KERNEL,
553 "laplacian_of_gaussian_2d",
554 2,
555 );
556 self.register_kernel(
557 "zero_crossing_2d",
558 ADVANCED_EDGE_DETECTION_KERNEL,
559 "zero_crossing_2d",
560 2,
561 );
562 self.register_kernel(
563 "harris_corner_response_2d",
564 ADVANCED_EDGE_DETECTION_KERNEL,
565 "harris_corner_response_2d",
566 2,
567 );
568 self.register_kernel(
569 "oriented_fast_keypoints_2d",
570 ADVANCED_EDGE_DETECTION_KERNEL,
571 "oriented_fast_keypoints_2d",
572 2,
573 );
574
575 self.register_kernel(
577 "local_binary_pattern_2d",
578 TEXTURE_ANALYSIS_KERNEL,
579 "local_binary_pattern_2d",
580 2,
581 );
582 self.register_kernel(
583 "uniform_local_binary_pattern_2d",
584 TEXTURE_ANALYSIS_KERNEL,
585 "uniform_local_binary_pattern_2d",
586 2,
587 );
588 self.register_kernel(
589 "glcm_cooccurrence_2d",
590 TEXTURE_ANALYSIS_KERNEL,
591 "glcm_cooccurrence_2d",
592 2,
593 );
594 self.register_kernel(
595 "glcmfeatures_2d",
596 TEXTURE_ANALYSIS_KERNEL,
597 "glcmfeatures_2d",
598 1,
599 );
600 self.register_kernel(
601 "gabor_filter_2d",
602 TEXTURE_ANALYSIS_KERNEL,
603 "gabor_filter_2d",
604 2,
605 );
606 self.register_kernel(
607 "lawstexture_energy_2d",
608 TEXTURE_ANALYSIS_KERNEL,
609 "lawstexture_energy_2d",
610 2,
611 );
612 self.register_kernel(
613 "fractal_dimension_2d",
614 TEXTURE_ANALYSIS_KERNEL,
615 "fractal_dimension_2d",
616 2,
617 );
618
619 self.register_kernel(
621 "watershed_labels_init_2d",
622 ADVANCED_SEGMENTATION_KERNEL,
623 "watershed_labels_init_2d",
624 2,
625 );
626 self.register_kernel(
627 "watershed_propagation_2d",
628 ADVANCED_SEGMENTATION_KERNEL,
629 "watershed_propagation_2d",
630 2,
631 );
632 self.register_kernel(
633 "region_growing_2d",
634 ADVANCED_SEGMENTATION_KERNEL,
635 "region_growing_2d",
636 2,
637 );
638 self.register_kernel(
639 "mean_shift_2d",
640 ADVANCED_SEGMENTATION_KERNEL,
641 "mean_shift_2d",
642 2,
643 );
644 self.register_kernel(
645 "level_set_evolution_2d",
646 ADVANCED_SEGMENTATION_KERNEL,
647 "level_set_evolution_2d",
648 2,
649 );
650 self.register_kernel(
651 "chan_vese_energy_2d",
652 ADVANCED_SEGMENTATION_KERNEL,
653 "chan_vese_energy_2d",
654 2,
655 );
656 self.register_kernel(
657 "active_contour_evolution_2d",
658 ADVANCED_SEGMENTATION_KERNEL,
659 "active_contour_evolution_2d",
660 1,
661 );
662 self.register_kernel(
663 "superpixel_slic_2d",
664 ADVANCED_SEGMENTATION_KERNEL,
665 "superpixel_slic_2d",
666 2,
667 );
668 }
669
670 pub fn register_kernel(&mut self, name: &str, source: &str, entrypoint: &str, dims: usize) {
671 self.kernels.insert(
672 name.to_string(),
673 KernelInfo {
674 name: name.to_string(),
675 source: source.to_string(),
676 entry_point: entrypoint.to_string(),
677 work_dimensions: dims,
678 },
679 );
680 }
681
682 pub fn get_kernel(&self, name: &str) -> Option<&KernelInfo> {
683 self.kernels.get(name)
684 }
685}
686
687pub trait GpuBuffer<T>: Send + Sync {
689 fn size(&self) -> usize;
690 fn copy_from_host(&mut self, data: &[T]) -> NdimageResult<()>;
691 fn copy_to_host(&self, data: &mut [T]) -> NdimageResult<()>;
692 fn as_any(&self) -> &dyn std::any::Any;
693 fn as_any_mut(&mut self) -> &mut dyn std::any::Any;
694}
695
696pub trait GpuKernelExecutor<T>: Send + Sync {
698 fn execute_kernel(
699 &self,
700 kernel: &KernelInfo,
701 inputs: &[&dyn GpuBuffer<T>],
702 outputs: &[&mut dyn GpuBuffer<T>],
703 work_size: &[usize],
704 params: &[T],
705 ) -> NdimageResult<()>;
706}
707
708pub struct CpuFallbackBuffer<T> {
711 data: Vec<T>,
712}
713
714impl<T> CpuFallbackBuffer<T>
715where
716 T: Clone + Default,
717{
718 pub fn from_slice(data: &[T]) -> NdimageResult<Self> {
720 Ok(Self {
721 data: data.to_vec(),
722 })
723 }
724
725 pub fn empty(size: usize) -> NdimageResult<Self> {
727 Ok(Self {
728 data: vec![T::default(); size],
729 })
730 }
731}
732
733impl<T> GpuBuffer<T> for CpuFallbackBuffer<T>
734where
735 T: Clone + Default + Send + Sync + Copy + 'static,
736{
737 fn size(&self) -> usize {
738 self.data.len()
739 }
740
741 fn copy_from_host(&mut self, data: &[T]) -> NdimageResult<()> {
742 if data.len() != self.data.len() {
743 return Err(NdimageError::InvalidInput(format!(
744 "Data size mismatch: expected {}, got {}",
745 self.data.len(),
746 data.len()
747 )));
748 }
749 self.data.copy_from_slice(data);
750 Ok(())
751 }
752
753 fn copy_to_host(&self, data: &mut [T]) -> NdimageResult<()> {
754 if data.len() != self.data.len() {
755 return Err(NdimageError::InvalidInput(format!(
756 "Data size mismatch: expected {}, got {}",
757 self.data.len(),
758 data.len()
759 )));
760 }
761 data.copy_from_slice(&self.data);
762 Ok(())
763 }
764
765 fn as_any(&self) -> &dyn std::any::Any {
766 self
767 }
768
769 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
770 self
771 }
772}
773
774pub struct CpuFallbackExecutor;
777
778impl CpuFallbackExecutor {
779 pub fn new() -> Self {
780 Self
781 }
782}
783
784impl<T> GpuKernelExecutor<T> for CpuFallbackExecutor
785where
786 T: Float + FromPrimitive + Clone + Send + Sync + 'static,
787{
788 fn execute_kernel(
789 &self,
790 kernel: &KernelInfo,
791 inputs: &[&dyn GpuBuffer<T>],
792 outputs: &[&mut dyn GpuBuffer<T>],
793 work_size: &[usize],
794 _params: &[T],
795 ) -> NdimageResult<()> {
796 Err(NdimageError::NotImplementedError(format!(
802 "CPU fallback execution for GPU kernel '{}' is not fully implemented. Work _size: {:?}, {} inputs, {} outputs",
803 kernel.name,
804 work_size,
805 inputs.len(),
806 outputs.len()
807 )))
808 }
809}
810
811#[allow(dead_code)]
813pub fn gpu_gaussian_filter_2d<T>(
814 input: &ArrayView2<T>,
815 sigma: [T; 2],
816 executor: &dyn GpuKernelExecutor<T>,
817) -> NdimageResult<Array<T, scirs2_core::ndarray::Ix2>>
818where
819 T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
820{
821 let (h, w) = input.dim();
822
823 let input_buffer = allocate_gpu_buffer(safe_as_slice(input)?)?;
826 let mut output_buffer = allocate_gpu_buffer_empty::<T>(h * w)?;
827
828 let params = vec![
830 sigma[0],
831 sigma[1],
832 safe_usize_to_float::<T>(h)?,
833 safe_usize_to_float::<T>(w)?,
834 ];
835
836 let registry = KernelRegistry::new();
838 let kernel = registry
839 .get_kernel("gaussian_blur_2d")
840 .ok_or_else(|| NdimageError::ComputationError("Kernel not found".into()))?;
841
842 executor.execute_kernel(
844 kernel,
845 &[input_buffer.as_ref()],
846 &[output_buffer.as_mut()],
847 &[h, w],
848 ¶ms,
849 )?;
850
851 let mut output_data = vec![T::zero(); h * w];
853 output_buffer.copy_to_host(&mut output_data)?;
854
855 Ok(Array::from_shape_vec((h, w), output_data)?)
856}
857
858#[allow(dead_code)]
860pub fn gpu_convolve_2d<T>(
861 input: &ArrayView2<T>,
862 kernel: &ArrayView2<T>,
863 executor: &dyn GpuKernelExecutor<T>,
864) -> NdimageResult<Array<T, scirs2_core::ndarray::Ix2>>
865where
866 T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
867{
868 let (ih, iw) = input.dim();
869 let (kh, kw) = kernel.dim();
870
871 let input_buffer = allocate_gpu_buffer(safe_as_slice(input)?)?;
873 let kernel_buffer = allocate_gpu_buffer(safe_as_slice(kernel)?)?;
874 let mut output_buffer = allocate_gpu_buffer_empty::<T>(ih * iw)?;
875
876 let params = vec![
878 safe_usize_to_float::<T>(ih)?,
879 safe_usize_to_float::<T>(iw)?,
880 safe_usize_to_float::<T>(kh)?,
881 safe_usize_to_float::<T>(kw)?,
882 ];
883
884 let registry = KernelRegistry::new();
886 let gpu_kernel = registry
887 .get_kernel("convolution_2d")
888 .ok_or_else(|| NdimageError::ComputationError("Kernel not found".into()))?;
889
890 executor.execute_kernel(
892 gpu_kernel,
893 &[input_buffer.as_ref(), kernel_buffer.as_ref()],
894 &[output_buffer.as_mut()],
895 &[ih, iw],
896 ¶ms,
897 )?;
898
899 let mut output_data = vec![T::zero(); ih * iw];
901 output_buffer.copy_to_host(&mut output_data)?;
902
903 Ok(Array::from_shape_vec((ih, iw), output_data)?)
904}
905
906#[allow(dead_code)]
908pub fn gpu_median_filter_2d<T>(
909 input: &ArrayView2<T>,
910 size: [usize; 2],
911 executor: &dyn GpuKernelExecutor<T>,
912) -> NdimageResult<Array<T, scirs2_core::ndarray::Ix2>>
913where
914 T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
915{
916 let (h, w) = input.dim();
917
918 let input_buffer = allocate_gpu_buffer(safe_as_slice(input)?)?;
920 let mut output_buffer = allocate_gpu_buffer_empty::<T>(h * w)?;
921
922 let params = vec![
924 safe_usize_to_float::<T>(h)?,
925 safe_usize_to_float::<T>(w)?,
926 safe_usize_to_float::<T>(size[0])?,
927 safe_usize_to_float::<T>(size[1])?,
928 ];
929
930 let registry = KernelRegistry::new();
932 let kernel = registry
933 .get_kernel("median_filter_2d")
934 .ok_or_else(|| NdimageError::ComputationError("Kernel not found".into()))?;
935
936 executor.execute_kernel(
938 kernel,
939 &[input_buffer.as_ref()],
940 &[output_buffer.as_mut()],
941 &[h, w],
942 ¶ms,
943 )?;
944
945 let mut output_data = vec![T::zero(); h * w];
947 output_buffer.copy_to_host(&mut output_data)?;
948
949 Ok(Array::from_shape_vec((h, w), output_data)?)
950}
951
952#[allow(dead_code)]
954pub fn gpu_erosion_2d<T>(
955 input: &ArrayView2<T>,
956 structure: &ArrayView2<bool>,
957 executor: &dyn GpuKernelExecutor<T>,
958) -> NdimageResult<Array<T, scirs2_core::ndarray::Ix2>>
959where
960 T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
961{
962 let (h, w) = input.dim();
963 let (sh, sw) = structure.dim();
964
965 let structure_t: Vec<T> = structure
967 .iter()
968 .map(|&b| if b { T::one() } else { T::zero() })
969 .collect();
970
971 let input_buffer = allocate_gpu_buffer(safe_as_slice(input)?)?;
973 let structure_buffer = allocate_gpu_buffer(&structure_t)?;
974 let mut output_buffer = allocate_gpu_buffer_empty::<T>(h * w)?;
975
976 let params = vec![
978 safe_usize_to_float::<T>(h)?,
979 safe_usize_to_float::<T>(w)?,
980 safe_usize_to_float::<T>(sh)?,
981 safe_usize_to_float::<T>(sw)?,
982 ];
983
984 let registry = KernelRegistry::new();
986 let kernel = registry
987 .get_kernel("morphology_erosion")
988 .ok_or_else(|| NdimageError::ComputationError("Kernel not found".into()))?;
989
990 executor.execute_kernel(
992 kernel,
993 &[input_buffer.as_ref(), structure_buffer.as_ref()],
994 &[output_buffer.as_mut()],
995 &[h, w],
996 ¶ms,
997 )?;
998
999 let mut output_data = vec![T::zero(); h * w];
1001 output_buffer.copy_to_host(&mut output_data)?;
1002
1003 Ok(Array::from_shape_vec((h, w), output_data)?)
1004}
1005
1006#[allow(dead_code)]
1008pub fn gpu_separable_gaussian_filter_2d<T>(
1009 input: &ArrayView2<T>,
1010 sigma: [T; 2],
1011 executor: &dyn GpuKernelExecutor<T>,
1012) -> NdimageResult<Array<T, scirs2_core::ndarray::Ix2>>
1013where
1014 T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
1015{
1016 let (h, w) = input.dim();
1017
1018 let radius_x = (safe_f64_to_float::<T>(3.0)? * sigma[0])
1020 .to_usize()
1021 .ok_or_else(|| {
1022 NdimageError::ComputationError("Failed to convert radius_x to usize".to_string())
1023 })?;
1024 let radius_y = (safe_f64_to_float::<T>(3.0)? * sigma[1])
1025 .to_usize()
1026 .ok_or_else(|| {
1027 NdimageError::ComputationError("Failed to convert radius_y to usize".to_string())
1028 })?;
1029
1030 let max_radius = radius_x.max(radius_y);
1031 let weights_size = 2 * max_radius + 1;
1032
1033 let weights_x: Result<Vec<T>, NdimageError> = (0..weights_size)
1035 .map(|i| -> NdimageResult<T> {
1036 let offset = safe_usize_to_float::<T>(i)? - safe_usize_to_float::<T>(max_radius)?;
1037 let exp_arg = -safe_f64_to_float::<T>(0.5)? * offset * offset / (sigma[0] * sigma[0]);
1038 Ok(exp_arg.exp())
1039 })
1040 .collect();
1041 let weights_x = weights_x?;
1042
1043 let weights_y: Result<Vec<T>, NdimageError> = (0..weights_size)
1045 .map(|i| -> NdimageResult<T> {
1046 let offset = safe_usize_to_float::<T>(i)? - safe_usize_to_float::<T>(max_radius)?;
1047 let exp_arg = -safe_f64_to_float::<T>(0.5)? * offset * offset / (sigma[1] * sigma[1]);
1048 Ok(exp_arg.exp())
1049 })
1050 .collect();
1051 let weights_y = weights_y?;
1052
1053 let input_buffer = allocate_gpu_buffer(safe_as_slice(input)?)?;
1055 let weights_x_buffer = allocate_gpu_buffer(&weights_x)?;
1056 let weights_y_buffer = allocate_gpu_buffer(&weights_y)?;
1057 let mut temp_buffer = allocate_gpu_buffer_empty::<T>(h * w)?;
1058 let mut output_buffer = allocate_gpu_buffer_empty::<T>(h * w)?;
1059
1060 let registry = KernelRegistry::new();
1061 let kernel = registry
1062 .get_kernel("separable_gaussian_1d")
1063 .ok_or_else(|| NdimageError::ComputationError("Kernel not found".into()))?;
1064
1065 let params_h = vec![
1067 safe_usize_to_float::<T>(h * w)?,
1068 safe_usize_to_float::<T>(radius_x)?,
1069 T::zero(), safe_usize_to_float::<T>(w)?,
1071 safe_usize_to_float::<T>(h)?,
1072 ];
1073
1074 executor.execute_kernel(
1075 kernel,
1076 &[input_buffer.as_ref(), weights_x_buffer.as_ref()],
1077 &[temp_buffer.as_mut()],
1078 &[h, w],
1079 ¶ms_h,
1080 )?;
1081
1082 let params_v = vec![
1084 safe_usize_to_float::<T>(h * w)?,
1085 safe_usize_to_float::<T>(radius_y)?,
1086 T::one(), safe_usize_to_float::<T>(w)?,
1088 safe_usize_to_float::<T>(h)?,
1089 ];
1090
1091 executor.execute_kernel(
1092 kernel,
1093 &[temp_buffer.as_ref(), weights_y_buffer.as_ref()],
1094 &[output_buffer.as_mut()],
1095 &[h, w],
1096 ¶ms_v,
1097 )?;
1098
1099 let mut output_data = vec![T::zero(); h * w];
1101 output_buffer.copy_to_host(&mut output_data)?;
1102
1103 Ok(Array::from_shape_vec((h, w), output_data)?)
1104}
1105
1106#[allow(dead_code)]
1108pub fn gpu_bilateral_filter_2d<T>(
1109 input: &ArrayView2<T>,
1110 sigma_spatial: T,
1111 sigma_intensity: T,
1112 radius: usize,
1113 executor: &dyn GpuKernelExecutor<T>,
1114) -> NdimageResult<Array<T, scirs2_core::ndarray::Ix2>>
1115where
1116 T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
1117{
1118 let (h, w) = input.dim();
1119
1120 let input_buffer = allocate_gpu_buffer(safe_as_slice(input)?)?;
1122 let mut output_buffer = allocate_gpu_buffer_empty::<T>(h * w)?;
1123
1124 let params = vec![
1126 sigma_spatial,
1127 sigma_intensity,
1128 safe_usize_to_float::<T>(radius)?,
1129 safe_usize_to_float::<T>(w)?,
1130 safe_usize_to_float::<T>(h)?,
1131 ];
1132
1133 let registry = KernelRegistry::new();
1135 let kernel = registry
1136 .get_kernel("bilateral_filter_2d")
1137 .ok_or_else(|| NdimageError::ComputationError("Kernel not found".into()))?;
1138
1139 executor.execute_kernel(
1141 kernel,
1142 &[input_buffer.as_ref()],
1143 &[output_buffer.as_mut()],
1144 &[h, w],
1145 ¶ms,
1146 )?;
1147
1148 let mut output_data = vec![T::zero(); h * w];
1150 output_buffer.copy_to_host(&mut output_data)?;
1151
1152 Ok(Array::from_shape_vec((h, w), output_data)?)
1153}
1154
1155#[allow(dead_code)]
1157pub fn gpu_sobel_filter_2d<T>(
1158 input: &ArrayView2<T>,
1159 executor: &dyn GpuKernelExecutor<T>,
1160) -> NdimageResult<(
1161 Array<T, scirs2_core::ndarray::Ix2>,
1162 Array<T, scirs2_core::ndarray::Ix2>,
1163 Array<T, scirs2_core::ndarray::Ix2>,
1164)>
1165where
1166 T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
1167{
1168 let (h, w) = input.dim();
1169
1170 let input_buffer = allocate_gpu_buffer(safe_as_slice(input)?)?;
1172 let mut output_x_buffer = allocate_gpu_buffer_empty::<T>(h * w)?;
1173 let mut output_y_buffer = allocate_gpu_buffer_empty::<T>(h * w)?;
1174 let mut magnitude_buffer = allocate_gpu_buffer_empty::<T>(h * w)?;
1175
1176 let params = vec![safe_usize_to_float::<T>(w)?, safe_usize_to_float::<T>(h)?];
1178
1179 let registry = KernelRegistry::new();
1181 let kernel = registry
1182 .get_kernel("sobel_filter_2d")
1183 .ok_or_else(|| NdimageError::ComputationError("Kernel not found".into()))?;
1184
1185 executor.execute_kernel(
1187 kernel,
1188 &[input_buffer.as_ref()],
1189 &[
1190 output_x_buffer.as_mut(),
1191 output_y_buffer.as_mut(),
1192 magnitude_buffer.as_mut(),
1193 ],
1194 &[h, w],
1195 ¶ms,
1196 )?;
1197
1198 let mut output_x_data = vec![T::zero(); h * w];
1200 let mut output_y_data = vec![T::zero(); h * w];
1201 let mut magnitude_data = vec![T::zero(); h * w];
1202
1203 output_x_buffer.copy_to_host(&mut output_x_data)?;
1204 output_y_buffer.copy_to_host(&mut output_y_data)?;
1205 magnitude_buffer.copy_to_host(&mut magnitude_data)?;
1206
1207 Ok((
1208 Array::from_shape_vec((h, w), output_x_data)?,
1209 Array::from_shape_vec((h, w), output_y_data)?,
1210 Array::from_shape_vec((h, w), magnitude_data)?,
1211 ))
1212}
1213
1214#[allow(dead_code)]
1216pub fn gpu_laplacian_filter_2d<T>(
1217 input: &ArrayView2<T>,
1218 connectivity: usize,
1219 executor: &dyn GpuKernelExecutor<T>,
1220) -> NdimageResult<Array<T, scirs2_core::ndarray::Ix2>>
1221where
1222 T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
1223{
1224 let (h, w) = input.dim();
1225
1226 let input_buffer = allocate_gpu_buffer(safe_as_slice(input)?)?;
1228 let mut output_buffer = allocate_gpu_buffer_empty::<T>(h * w)?;
1229
1230 let params = vec![
1232 safe_usize_to_float::<T>(w)?,
1233 safe_usize_to_float::<T>(h)?,
1234 safe_usize_to_float::<T>(connectivity)?,
1235 ];
1236
1237 let registry = KernelRegistry::new();
1239 let kernel = registry
1240 .get_kernel("laplacian_filter_2d")
1241 .ok_or_else(|| NdimageError::ComputationError("Kernel not found".into()))?;
1242
1243 executor.execute_kernel(
1245 kernel,
1246 &[input_buffer.as_ref()],
1247 &[output_buffer.as_mut()],
1248 &[h, w],
1249 ¶ms,
1250 )?;
1251
1252 let mut output_data = vec![T::zero(); h * w];
1254 output_buffer.copy_to_host(&mut output_data)?;
1255
1256 Ok(Array::from_shape_vec((h, w), output_data)?)
1257}
1258
1259#[allow(dead_code)]
1262fn allocate_gpu_buffer<T>(data: &[T]) -> NdimageResult<Box<dyn GpuBuffer<T>>>
1263where
1264 T: Clone + Default + Send + Sync + Copy + 'static,
1265{
1266 #[cfg(feature = "cuda")]
1267 {
1268 return crate::backend::cuda::allocate_gpu_buffer(data);
1269 }
1270
1271 #[cfg(not(feature = "cuda"))]
1272 {
1273 Ok(Box::new(CpuFallbackBuffer::from_slice(data)?))
1275 }
1276}
1277
1278#[allow(dead_code)]
1279fn allocate_gpu_buffer_empty<T>(size: usize) -> NdimageResult<Box<dyn GpuBuffer<T>>>
1280where
1281 T: Clone + Default + Send + Sync + Copy + 'static,
1282{
1283 #[cfg(feature = "cuda")]
1284 {
1285 return crate::backend::cuda::allocate_gpu_buffer_empty(size);
1286 }
1287
1288 #[cfg(not(feature = "cuda"))]
1289 {
1290 Ok(Box::new(CpuFallbackBuffer::empty(size)?))
1292 }
1293}
1294
1295