sklears_kernel_approximation/
computer_vision_kernels.rs

1//! Computer vision kernel approximations
2//!
3//! This module provides kernel approximation methods specifically designed for computer vision tasks,
4//! including spatial pyramid features, texture kernels, convolutional features, and scale-invariant methods.
5
6use scirs2_core::ndarray::{s, Array1, Array2, ArrayView2};
7use scirs2_core::random::essentials::Normal as RandNormal;
8use scirs2_core::random::rngs::StdRng as RealStdRng;
9use scirs2_core::random::Rng;
10use scirs2_core::random::{thread_rng, SeedableRng};
11use serde::{Deserialize, Serialize};
12
13use sklears_core::error::Result;
14use sklears_core::traits::{Fit, Transform};
15
16/// Spatial pyramid kernel approximation for hierarchical spatial feature extraction
17#[derive(Debug, Clone, Serialize, Deserialize)]
18/// SpatialPyramidFeatures
19pub struct SpatialPyramidFeatures {
20    /// levels
21    pub levels: usize,
22    /// feature_dim
23    pub feature_dim: usize,
24    /// pool_method
25    pub pool_method: PoolingMethod,
26    /// pyramid_weighting
27    pub pyramid_weighting: bool,
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31/// PoolingMethod
32pub enum PoolingMethod {
33    /// Max
34    Max,
35    /// Average
36    Average,
37    /// Sum
38    Sum,
39    /// L2Norm
40    L2Norm,
41}
42
43impl SpatialPyramidFeatures {
44    pub fn new(levels: usize, feature_dim: usize) -> Self {
45        Self {
46            levels,
47            feature_dim,
48            pool_method: PoolingMethod::Max,
49            pyramid_weighting: true,
50        }
51    }
52
53    pub fn pool_method(mut self, method: PoolingMethod) -> Self {
54        self.pool_method = method;
55        self
56    }
57
58    pub fn pyramid_weighting(mut self, enable: bool) -> Self {
59        self.pyramid_weighting = enable;
60        self
61    }
62
63    fn spatial_pool(&self, features: &ArrayView2<f64>, grid_size: usize) -> Result<Array1<f64>> {
64        let (height, width) = features.dim();
65        let cell_h = height / grid_size;
66        let cell_w = width / grid_size;
67
68        let mut pooled = Vec::new();
69
70        for i in 0..grid_size {
71            for j in 0..grid_size {
72                let start_h = i * cell_h;
73                let end_h = if i == grid_size - 1 {
74                    height
75                } else {
76                    (i + 1) * cell_h
77                };
78                let start_w = j * cell_w;
79                let end_w = if j == grid_size - 1 {
80                    width
81                } else {
82                    (j + 1) * cell_w
83                };
84
85                let cell = features.slice(s![start_h..end_h, start_w..end_w]);
86
87                let pooled_value = match self.pool_method {
88                    PoolingMethod::Max => cell.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x)),
89                    PoolingMethod::Average => cell.mean().unwrap_or(0.0),
90                    PoolingMethod::Sum => cell.sum(),
91                    PoolingMethod::L2Norm => cell.mapv(|x| x * x).sum().sqrt(),
92                };
93
94                pooled.push(pooled_value);
95            }
96        }
97
98        Ok(Array1::from_vec(pooled))
99    }
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize)]
103/// FittedSpatialPyramidFeatures
104pub struct FittedSpatialPyramidFeatures {
105    /// levels
106    pub levels: usize,
107    /// feature_dim
108    pub feature_dim: usize,
109    /// pool_method
110    pub pool_method: PoolingMethod,
111    /// pyramid_weighting
112    pub pyramid_weighting: bool,
113    /// level_weights
114    pub level_weights: Array1<f64>,
115}
116
117impl Fit<Array2<f64>, ()> for SpatialPyramidFeatures {
118    type Fitted = FittedSpatialPyramidFeatures;
119
120    fn fit(self, _x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
121        let level_weights = if self.pyramid_weighting {
122            Array1::from_vec(
123                (0..self.levels)
124                    .map(|l| 2.0_f64.powi(-(l as i32 + 1)))
125                    .collect(),
126            )
127        } else {
128            Array1::ones(self.levels)
129        };
130
131        Ok(FittedSpatialPyramidFeatures {
132            levels: self.levels,
133            feature_dim: self.feature_dim,
134            pool_method: self.pool_method.clone(),
135            pyramid_weighting: self.pyramid_weighting,
136            level_weights,
137        })
138    }
139}
140
141impl FittedSpatialPyramidFeatures {
142    fn spatial_pool(&self, features: &ArrayView2<f64>, grid_size: usize) -> Result<Array1<f64>> {
143        let (height, width) = features.dim();
144        let cell_h = height / grid_size;
145        let cell_w = width / grid_size;
146
147        let mut pooled = Vec::new();
148
149        for i in 0..grid_size {
150            for j in 0..grid_size {
151                let start_h = i * cell_h;
152                let end_h = if i == grid_size - 1 {
153                    height
154                } else {
155                    (i + 1) * cell_h
156                };
157                let start_w = j * cell_w;
158                let end_w = if j == grid_size - 1 {
159                    width
160                } else {
161                    (j + 1) * cell_w
162                };
163
164                let cell = features.slice(s![start_h..end_h, start_w..end_w]);
165
166                let pooled_value = match self.pool_method {
167                    PoolingMethod::Max => cell.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x)),
168                    PoolingMethod::Average => cell.mean().unwrap_or(0.0),
169                    PoolingMethod::Sum => cell.sum(),
170                    PoolingMethod::L2Norm => cell.mapv(|x| x * x).sum().sqrt(),
171                };
172
173                pooled.push(pooled_value);
174            }
175        }
176
177        Ok(Array1::from_vec(pooled))
178    }
179}
180
181impl Transform<Array2<f64>, Array2<f64>> for FittedSpatialPyramidFeatures {
182    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
183        let n_samples = x.shape()[0];
184        let feature_size = x.shape()[1];
185
186        // Assume square images for spatial pyramid
187        let img_size = (feature_size as f64).sqrt() as usize;
188
189        let mut all_features = Vec::new();
190
191        for i in 0..n_samples {
192            let sample = x.row(i).to_owned().into_shape((img_size, img_size))?;
193            let mut pyramid_features = Vec::new();
194
195            for level in 0..self.levels {
196                let grid_size = 2_usize.pow(level as u32);
197                let pooled = self.spatial_pool(&sample.view(), grid_size)?;
198
199                // Apply pyramid weighting
200                let weighted = &pooled * self.level_weights[level];
201                pyramid_features.extend(weighted.iter().cloned());
202            }
203
204            all_features.push(pyramid_features);
205        }
206
207        // Convert to ndarray
208        let n_features = all_features[0].len();
209        let mut result = Array2::zeros((n_samples, n_features));
210
211        for (i, features) in all_features.iter().enumerate() {
212            for (j, &feature) in features.iter().enumerate() {
213                result[[i, j]] = feature;
214            }
215        }
216
217        Ok(result)
218    }
219}
220
221/// Texture kernel approximation using Local Binary Patterns and Gabor filters
222#[derive(Debug, Clone, Serialize, Deserialize)]
223/// TextureKernelApproximation
224pub struct TextureKernelApproximation {
225    /// n_components
226    pub n_components: usize,
227    /// use_lbp
228    pub use_lbp: bool,
229    /// use_gabor
230    pub use_gabor: bool,
231    /// gabor_frequencies
232    pub gabor_frequencies: Vec<f64>,
233    /// gabor_angles
234    pub gabor_angles: Vec<f64>,
235    /// lbp_radius
236    pub lbp_radius: f64,
237    /// lbp_n_points
238    pub lbp_n_points: usize,
239}
240
241impl TextureKernelApproximation {
242    pub fn new(n_components: usize) -> Self {
243        Self {
244            n_components,
245            use_lbp: true,
246            use_gabor: true,
247            gabor_frequencies: vec![0.1, 0.2, 0.3, 0.4],
248            gabor_angles: vec![
249                0.0,
250                std::f64::consts::PI / 4.0,
251                std::f64::consts::PI / 2.0,
252                3.0 * std::f64::consts::PI / 4.0,
253            ],
254            lbp_radius: 1.0,
255            lbp_n_points: 8,
256        }
257    }
258
259    pub fn use_lbp(mut self, enable: bool) -> Self {
260        self.use_lbp = enable;
261        self
262    }
263
264    pub fn use_gabor(mut self, enable: bool) -> Self {
265        self.use_gabor = enable;
266        self
267    }
268
269    pub fn gabor_frequencies(mut self, frequencies: Vec<f64>) -> Self {
270        self.gabor_frequencies = frequencies;
271        self
272    }
273
274    pub fn gabor_angles(mut self, angles: Vec<f64>) -> Self {
275        self.gabor_angles = angles;
276        self
277    }
278
279    fn compute_lbp(&self, image: &ArrayView2<f64>) -> Result<Array1<f64>> {
280        let (height, width) = image.dim();
281        let mut lbp_histogram = Array1::zeros(256);
282
283        for i in 1..height - 1 {
284            for j in 1..width - 1 {
285                let center = image[[i, j]];
286                let mut lbp_value = 0u8;
287
288                // 8-neighborhood LBP
289                let neighbors = [
290                    image[[i - 1, j - 1]],
291                    image[[i - 1, j]],
292                    image[[i - 1, j + 1]],
293                    image[[i, j + 1]],
294                    image[[i + 1, j + 1]],
295                    image[[i + 1, j]],
296                    image[[i + 1, j - 1]],
297                    image[[i, j - 1]],
298                ];
299
300                for (k, &neighbor) in neighbors.iter().enumerate() {
301                    if neighbor >= center {
302                        lbp_value |= 1 << k;
303                    }
304                }
305
306                lbp_histogram[lbp_value as usize] += 1.0;
307            }
308        }
309
310        // Normalize histogram
311        let sum = lbp_histogram.sum();
312        if sum > 0.0 {
313            lbp_histogram /= sum;
314        }
315
316        Ok(lbp_histogram)
317    }
318
319    fn compute_gabor_features(&self, image: &ArrayView2<f64>) -> Result<Array1<f64>> {
320        let mut gabor_features = Vec::new();
321
322        for &frequency in &self.gabor_frequencies {
323            for &angle in &self.gabor_angles {
324                let filtered = self.apply_gabor_filter(image, frequency, angle)?;
325
326                // Extract statistical features from filtered image
327                let mean = filtered.mean().unwrap_or(0.0);
328                let variance = filtered.var(0.0);
329                let energy = filtered.mapv(|x| x * x).sum();
330
331                gabor_features.extend_from_slice(&[mean, variance, energy]);
332            }
333        }
334
335        Ok(Array1::from_vec(gabor_features))
336    }
337
338    fn apply_gabor_filter(
339        &self,
340        image: &ArrayView2<f64>,
341        frequency: f64,
342        angle: f64,
343    ) -> Result<Array2<f64>> {
344        let (height, width) = image.dim();
345        let mut filtered = Array2::zeros((height, width));
346
347        let sigma_x = 1.0 / (2.0 * std::f64::consts::PI * frequency);
348        let sigma_y = 1.0 / (2.0 * std::f64::consts::PI * frequency);
349
350        let cos_angle = angle.cos();
351        let sin_angle = angle.sin();
352
353        for i in 0..height {
354            for j in 0..width {
355                let x = j as f64 - width as f64 / 2.0;
356                let y = i as f64 - height as f64 / 2.0;
357
358                let x_rot = x * cos_angle + y * sin_angle;
359                let y_rot = -x * sin_angle + y * cos_angle;
360
361                let gaussian = (-0.5
362                    * (x_rot * x_rot / (sigma_x * sigma_x) + y_rot * y_rot / (sigma_y * sigma_y)))
363                    .exp();
364                let sinusoid = (2.0 * std::f64::consts::PI * frequency * x_rot).cos();
365
366                let kernel_value = gaussian * sinusoid;
367
368                if i < height && j < width {
369                    filtered[[i, j]] = kernel_value;
370                }
371            }
372        }
373
374        // Convolve with image (simplified version)
375        let mut result = Array2::zeros((height, width));
376        for i in 1..height - 1 {
377            for j in 1..width - 1 {
378                let mut sum = 0.0;
379                for di in -1..=1 {
380                    for dj in -1..=1 {
381                        let ni = (i as i32 + di) as usize;
382                        let nj = (j as i32 + dj) as usize;
383                        let fi = (1 + di) as usize;
384                        let fj = (1 + dj) as usize;
385                        sum += image[[ni, nj]] * filtered[[fi, fj]];
386                    }
387                }
388                result[[i, j]] = sum;
389            }
390        }
391
392        Ok(result)
393    }
394}
395
396#[derive(Debug, Clone, Serialize, Deserialize)]
397/// FittedTextureKernelApproximation
398pub struct FittedTextureKernelApproximation {
399    /// n_components
400    pub n_components: usize,
401    /// use_lbp
402    pub use_lbp: bool,
403    /// use_gabor
404    pub use_gabor: bool,
405    /// gabor_frequencies
406    pub gabor_frequencies: Vec<f64>,
407    /// gabor_angles
408    pub gabor_angles: Vec<f64>,
409    /// lbp_radius
410    pub lbp_radius: f64,
411    /// lbp_n_points
412    pub lbp_n_points: usize,
413    /// feature_dim
414    pub feature_dim: usize,
415}
416
417impl Fit<Array2<f64>, ()> for TextureKernelApproximation {
418    type Fitted = FittedTextureKernelApproximation;
419
420    fn fit(self, _x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
421        let mut feature_dim = 0;
422
423        if self.use_lbp {
424            feature_dim += 256; // LBP histogram
425        }
426
427        if self.use_gabor {
428            feature_dim += self.gabor_frequencies.len() * self.gabor_angles.len() * 3;
429            // 3 stats per filter
430        }
431
432        Ok(FittedTextureKernelApproximation {
433            n_components: self.n_components,
434            use_lbp: self.use_lbp,
435            use_gabor: self.use_gabor,
436            gabor_frequencies: self.gabor_frequencies.clone(),
437            gabor_angles: self.gabor_angles.clone(),
438            lbp_radius: self.lbp_radius,
439            lbp_n_points: self.lbp_n_points,
440            feature_dim,
441        })
442    }
443}
444
445impl FittedTextureKernelApproximation {
446    fn compute_lbp(&self, image: &ArrayView2<f64>) -> Result<Array1<f64>> {
447        let (height, width) = image.dim();
448        let mut lbp_histogram = Array1::zeros(256);
449
450        for i in 1..height - 1 {
451            for j in 1..width - 1 {
452                let center = image[[i, j]];
453                let mut lbp_value = 0u8;
454
455                // 8-neighborhood LBP
456                let neighbors = [
457                    image[[i - 1, j - 1]],
458                    image[[i - 1, j]],
459                    image[[i - 1, j + 1]],
460                    image[[i, j + 1]],
461                    image[[i + 1, j + 1]],
462                    image[[i + 1, j]],
463                    image[[i + 1, j - 1]],
464                    image[[i, j - 1]],
465                ];
466
467                for (k, &neighbor) in neighbors.iter().enumerate() {
468                    if neighbor >= center {
469                        lbp_value |= 1 << k;
470                    }
471                }
472
473                lbp_histogram[lbp_value as usize] += 1.0;
474            }
475        }
476
477        // Normalize histogram
478        let sum = lbp_histogram.sum();
479        if sum > 0.0 {
480            lbp_histogram /= sum;
481        }
482
483        Ok(lbp_histogram)
484    }
485
486    fn compute_gabor_features(&self, image: &ArrayView2<f64>) -> Result<Array1<f64>> {
487        let mut gabor_features = Vec::new();
488
489        for &frequency in &self.gabor_frequencies {
490            for &angle in &self.gabor_angles {
491                let filtered = self.apply_gabor_filter(image, frequency, angle)?;
492
493                // Extract statistical features from filtered image
494                let mean = filtered.mean().unwrap_or(0.0);
495                let variance = filtered.var(0.0);
496                let energy = filtered.mapv(|x| x * x).sum();
497
498                gabor_features.extend_from_slice(&[mean, variance, energy]);
499            }
500        }
501
502        Ok(Array1::from_vec(gabor_features))
503    }
504
505    fn apply_gabor_filter(
506        &self,
507        image: &ArrayView2<f64>,
508        frequency: f64,
509        angle: f64,
510    ) -> Result<Array2<f64>> {
511        let (height, width) = image.dim();
512        let mut filtered = Array2::zeros((height, width));
513
514        let sigma_x = 1.0 / (2.0 * std::f64::consts::PI * frequency);
515        let sigma_y = 1.0 / (2.0 * std::f64::consts::PI * frequency);
516
517        let cos_angle = angle.cos();
518        let sin_angle = angle.sin();
519
520        for i in 0..height {
521            for j in 0..width {
522                let x = j as f64 - width as f64 / 2.0;
523                let y = i as f64 - height as f64 / 2.0;
524
525                let x_rot = x * cos_angle + y * sin_angle;
526                let y_rot = -x * sin_angle + y * cos_angle;
527
528                let gaussian = (-0.5
529                    * (x_rot * x_rot / (sigma_x * sigma_x) + y_rot * y_rot / (sigma_y * sigma_y)))
530                    .exp();
531                let sinusoid = (2.0 * std::f64::consts::PI * frequency * x_rot).cos();
532
533                let kernel_value = gaussian * sinusoid;
534
535                if i < height && j < width {
536                    filtered[[i, j]] = kernel_value;
537                }
538            }
539        }
540
541        // Convolve with image (simplified version)
542        let mut result = Array2::zeros((height, width));
543        for i in 1..height - 1 {
544            for j in 1..width - 1 {
545                let mut sum = 0.0;
546                for di in -1..=1 {
547                    for dj in -1..=1 {
548                        let ni = (i as i32 + di) as usize;
549                        let nj = (j as i32 + dj) as usize;
550                        let fi = (1 + di) as usize;
551                        let fj = (1 + dj) as usize;
552                        sum += image[[ni, nj]] * filtered[[fi, fj]];
553                    }
554                }
555                result[[i, j]] = sum;
556            }
557        }
558
559        Ok(result)
560    }
561}
562
563impl Transform<Array2<f64>, Array2<f64>> for FittedTextureKernelApproximation {
564    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
565        let n_samples = x.shape()[0];
566        let feature_size = x.shape()[1];
567
568        // Assume square images
569        let img_size = (feature_size as f64).sqrt() as usize;
570
571        let mut all_features = Vec::new();
572
573        for i in 0..n_samples {
574            let sample = x.row(i).to_owned().into_shape((img_size, img_size))?;
575            let mut texture_features = Vec::new();
576
577            if self.use_lbp {
578                let lbp_features = self.compute_lbp(&sample.view())?;
579                texture_features.extend(lbp_features.iter().cloned());
580            }
581
582            if self.use_gabor {
583                let gabor_features = self.compute_gabor_features(&sample.view())?;
584                texture_features.extend(gabor_features.iter().cloned());
585            }
586
587            all_features.push(texture_features);
588        }
589
590        // Convert to ndarray
591        let n_features = all_features[0].len();
592        let mut result = Array2::zeros((n_samples, n_features));
593
594        for (i, features) in all_features.iter().enumerate() {
595            for (j, &feature) in features.iter().enumerate() {
596                result[[i, j]] = feature;
597            }
598        }
599
600        Ok(result)
601    }
602}
603
604/// Scale-invariant feature transform (SIFT-like) kernel approximation
605#[derive(Debug, Clone, Serialize, Deserialize)]
606/// ScaleInvariantFeatures
607pub struct ScaleInvariantFeatures {
608    /// n_components
609    pub n_components: usize,
610    /// n_scales
611    pub n_scales: usize,
612    /// sigma
613    pub sigma: f64,
614    /// contrast_threshold
615    pub contrast_threshold: f64,
616    /// edge_threshold
617    pub edge_threshold: f64,
618}
619
620impl ScaleInvariantFeatures {
621    pub fn new(n_components: usize) -> Self {
622        Self {
623            n_components,
624            n_scales: 3,
625            sigma: 1.6,
626            contrast_threshold: 0.04,
627            edge_threshold: 10.0,
628        }
629    }
630
631    pub fn n_scales(mut self, n_scales: usize) -> Self {
632        self.n_scales = n_scales;
633        self
634    }
635
636    pub fn sigma(mut self, sigma: f64) -> Self {
637        self.sigma = sigma;
638        self
639    }
640
641    fn detect_keypoints(&self, image: &ArrayView2<f64>) -> Result<Vec<(usize, usize, f64)>> {
642        let (height, width) = image.dim();
643        let mut keypoints = Vec::new();
644
645        // Simplified keypoint detection using Harris corner detector
646        for i in 1..height - 1 {
647            for j in 1..width - 1 {
648                let _ix = (image[[i, j + 1]] - image[[i, j - 1]]) / 2.0;
649                let _iy = (image[[i + 1, j]] - image[[i - 1, j]]) / 2.0;
650                let ixx = image[[i, j - 1]] - 2.0 * image[[i, j]] + image[[i, j + 1]];
651                let iyy = image[[i - 1, j]] - 2.0 * image[[i, j]] + image[[i + 1, j]];
652                let ixy = (image[[i - 1, j - 1]] + image[[i + 1, j + 1]]
653                    - image[[i - 1, j + 1]]
654                    - image[[i + 1, j - 1]])
655                    / 4.0;
656
657                let det = ixx * iyy - ixy * ixy;
658                let trace = ixx + iyy;
659
660                if trace != 0.0 {
661                    let harris_response = det - 0.04 * trace * trace;
662
663                    if harris_response > self.contrast_threshold {
664                        keypoints.push((i, j, harris_response));
665                    }
666                }
667            }
668        }
669
670        Ok(keypoints)
671    }
672
673    fn compute_descriptor(
674        &self,
675        image: &ArrayView2<f64>,
676        keypoint: (usize, usize, f64),
677    ) -> Result<Array1<f64>> {
678        let (y, x, _) = keypoint;
679        let (height, width) = image.dim();
680
681        // Simplified descriptor computation
682        let window_size = 16;
683        let half_window = window_size / 2;
684
685        let mut descriptor = Vec::new();
686
687        for i in 0..4 {
688            for j in 0..4 {
689                let start_y = y.saturating_sub(half_window) + i * 4;
690                let start_x = x.saturating_sub(half_window) + j * 4;
691                let end_y = (start_y + 4).min(height);
692                let end_x = (start_x + 4).min(width);
693
694                if start_y < end_y && start_x < end_x {
695                    let patch = image.slice(s![start_y..end_y, start_x..end_x]);
696                    let patch_mean = patch.mean().unwrap_or(0.0);
697                    let patch_var = patch.var(0.0);
698
699                    descriptor.push(patch_mean);
700                    descriptor.push(patch_var);
701                } else {
702                    descriptor.push(0.0);
703                    descriptor.push(0.0);
704                }
705            }
706        }
707
708        Ok(Array1::from_vec(descriptor))
709    }
710}
711
712#[derive(Debug, Clone, Serialize, Deserialize)]
713/// FittedScaleInvariantFeatures
714pub struct FittedScaleInvariantFeatures {
715    /// n_components
716    pub n_components: usize,
717    /// n_scales
718    pub n_scales: usize,
719    /// sigma
720    pub sigma: f64,
721    /// contrast_threshold
722    pub contrast_threshold: f64,
723    /// edge_threshold
724    pub edge_threshold: f64,
725}
726
727impl Fit<Array2<f64>, ()> for ScaleInvariantFeatures {
728    type Fitted = FittedScaleInvariantFeatures;
729
730    fn fit(self, _x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
731        Ok(FittedScaleInvariantFeatures {
732            n_components: self.n_components,
733            n_scales: self.n_scales,
734            sigma: self.sigma,
735            contrast_threshold: self.contrast_threshold,
736            edge_threshold: self.edge_threshold,
737        })
738    }
739}
740
741impl FittedScaleInvariantFeatures {
742    fn detect_keypoints(&self, image: &ArrayView2<f64>) -> Result<Vec<(usize, usize, f64)>> {
743        let (height, width) = image.dim();
744        let mut keypoints = Vec::new();
745
746        // Simplified keypoint detection using Harris corner detector
747        for i in 1..height - 1 {
748            for j in 1..width - 1 {
749                let _ix = (image[[i, j + 1]] - image[[i, j - 1]]) / 2.0;
750                let _iy = (image[[i + 1, j]] - image[[i - 1, j]]) / 2.0;
751                let ixx = image[[i, j - 1]] - 2.0 * image[[i, j]] + image[[i, j + 1]];
752                let iyy = image[[i - 1, j]] - 2.0 * image[[i, j]] + image[[i + 1, j]];
753                let ixy = (image[[i - 1, j - 1]] + image[[i + 1, j + 1]]
754                    - image[[i - 1, j + 1]]
755                    - image[[i + 1, j - 1]])
756                    / 4.0;
757
758                let det = ixx * iyy - ixy * ixy;
759                let trace = ixx + iyy;
760
761                if trace != 0.0 {
762                    let harris_response = det - 0.04 * trace * trace;
763
764                    if harris_response > self.contrast_threshold {
765                        keypoints.push((i, j, harris_response));
766                    }
767                }
768            }
769        }
770
771        Ok(keypoints)
772    }
773
774    fn compute_descriptor(
775        &self,
776        image: &ArrayView2<f64>,
777        keypoint: (usize, usize, f64),
778    ) -> Result<Array1<f64>> {
779        let (y, x, _) = keypoint;
780        let (height, width) = image.dim();
781
782        // Simplified descriptor computation
783        let window_size = 16;
784        let half_window = window_size / 2;
785
786        let mut descriptor = Vec::new();
787
788        for i in 0..4 {
789            for j in 0..4 {
790                let start_y = y.saturating_sub(half_window) + i * 4;
791                let start_x = x.saturating_sub(half_window) + j * 4;
792                let end_y = (start_y + 4).min(height);
793                let end_x = (start_x + 4).min(width);
794
795                if start_y < end_y && start_x < end_x {
796                    let patch = image.slice(s![start_y..end_y, start_x..end_x]);
797                    let patch_mean = patch.mean().unwrap_or(0.0);
798                    let patch_var = patch.var(0.0);
799
800                    descriptor.push(patch_mean);
801                    descriptor.push(patch_var);
802                } else {
803                    descriptor.push(0.0);
804                    descriptor.push(0.0);
805                }
806            }
807        }
808
809        Ok(Array1::from_vec(descriptor))
810    }
811}
812
813impl Transform<Array2<f64>, Array2<f64>> for FittedScaleInvariantFeatures {
814    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
815        let n_samples = x.shape()[0];
816        let feature_size = x.shape()[1];
817
818        // Assume square images
819        let img_size = (feature_size as f64).sqrt() as usize;
820
821        let mut all_features = Vec::new();
822
823        for i in 0..n_samples {
824            let sample = x.row(i).to_owned().into_shape((img_size, img_size))?;
825
826            let keypoints = self.detect_keypoints(&sample.view())?;
827            let mut descriptors = Vec::new();
828
829            for keypoint in keypoints.iter().take(self.n_components) {
830                let descriptor = self.compute_descriptor(&sample.view(), *keypoint)?;
831                descriptors.extend(descriptor.iter().cloned());
832            }
833
834            // Pad with zeros if not enough keypoints
835            let expected_size = self.n_components * 32; // 32 features per keypoint
836            while descriptors.len() < expected_size {
837                descriptors.push(0.0);
838            }
839            descriptors.truncate(expected_size);
840
841            all_features.push(descriptors);
842        }
843
844        // Convert to ndarray
845        let n_features = all_features[0].len();
846        let mut result = Array2::zeros((n_samples, n_features));
847
848        for (i, features) in all_features.iter().enumerate() {
849            for (j, &feature) in features.iter().enumerate() {
850                result[[i, j]] = feature;
851            }
852        }
853
854        Ok(result)
855    }
856}
857
858/// Convolutional kernel features using random convolutions
859#[derive(Debug, Clone, Serialize, Deserialize)]
860/// ConvolutionalKernelFeatures
861pub struct ConvolutionalKernelFeatures {
862    /// n_components
863    pub n_components: usize,
864    /// kernel_size
865    pub kernel_size: usize,
866    /// stride
867    pub stride: usize,
868    /// padding
869    pub padding: usize,
870    /// activation
871    pub activation: ActivationFunction,
872}
873
874#[derive(Debug, Clone, Serialize, Deserialize)]
875/// ActivationFunction
876pub enum ActivationFunction {
877    /// ReLU
878    ReLU,
879    /// Tanh
880    Tanh,
881    /// Sigmoid
882    Sigmoid,
883    /// Linear
884    Linear,
885}
886
887impl ConvolutionalKernelFeatures {
888    pub fn new(n_components: usize, kernel_size: usize) -> Self {
889        Self {
890            n_components,
891            kernel_size,
892            stride: 1,
893            padding: 0,
894            activation: ActivationFunction::ReLU,
895        }
896    }
897
898    pub fn stride(mut self, stride: usize) -> Self {
899        self.stride = stride;
900        self
901    }
902
903    pub fn padding(mut self, padding: usize) -> Self {
904        self.padding = padding;
905        self
906    }
907
908    pub fn activation(mut self, activation: ActivationFunction) -> Self {
909        self.activation = activation;
910        self
911    }
912
913    fn apply_activation(&self, x: f64) -> f64 {
914        match self.activation {
915            ActivationFunction::ReLU => x.max(0.0),
916            ActivationFunction::Tanh => x.tanh(),
917            ActivationFunction::Sigmoid => 1.0 / (1.0 + (-x).exp()),
918            ActivationFunction::Linear => x,
919        }
920    }
921}
922
923#[derive(Debug, Clone, Serialize, Deserialize)]
924/// FittedConvolutionalKernelFeatures
925pub struct FittedConvolutionalKernelFeatures {
926    /// n_components
927    pub n_components: usize,
928    /// kernel_size
929    pub kernel_size: usize,
930    /// stride
931    pub stride: usize,
932    /// padding
933    pub padding: usize,
934    /// activation
935    pub activation: ActivationFunction,
936    /// conv_kernels
937    pub conv_kernels: Array2<f64>,
938    /// biases
939    pub biases: Array1<f64>,
940}
941
942impl Fit<Array2<f64>, ()> for ConvolutionalKernelFeatures {
943    type Fitted = FittedConvolutionalKernelFeatures;
944
945    fn fit(self, _x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
946        let mut rng = RealStdRng::from_seed(thread_rng().gen());
947        let normal = RandNormal::new(0.0, 1.0).unwrap();
948
949        // Generate random convolution kernels
950        let kernel_elements = self.kernel_size * self.kernel_size;
951        let mut conv_kernels = Array2::zeros((self.n_components, kernel_elements));
952
953        for i in 0..self.n_components {
954            for j in 0..kernel_elements {
955                conv_kernels[[i, j]] = rng.sample(normal);
956            }
957        }
958
959        // Initialize biases
960        let biases = Array1::from_vec((0..self.n_components).map(|_| rng.sample(normal)).collect());
961
962        Ok(FittedConvolutionalKernelFeatures {
963            n_components: self.n_components,
964            kernel_size: self.kernel_size,
965            stride: self.stride,
966            padding: self.padding,
967            activation: self.activation,
968            conv_kernels,
969            biases,
970        })
971    }
972}
973
974impl FittedConvolutionalKernelFeatures {
975    fn apply_activation(&self, x: f64) -> f64 {
976        match self.activation {
977            ActivationFunction::ReLU => x.max(0.0),
978            ActivationFunction::Tanh => x.tanh(),
979            ActivationFunction::Sigmoid => 1.0 / (1.0 + (-x).exp()),
980            ActivationFunction::Linear => x,
981        }
982    }
983}
984
985impl Transform<Array2<f64>, Array2<f64>> for FittedConvolutionalKernelFeatures {
986    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
987        let n_samples = x.shape()[0];
988        let feature_size = x.shape()[1];
989
990        // Assume square images
991        let img_size = (feature_size as f64).sqrt() as usize;
992
993        let output_size = (img_size + 2 * self.padding - self.kernel_size) / self.stride + 1;
994        let n_features = self.n_components * output_size * output_size;
995
996        let mut result = Array2::zeros((n_samples, n_features));
997
998        for sample_idx in 0..n_samples {
999            let image = x
1000                .row(sample_idx)
1001                .to_owned()
1002                .into_shape((img_size, img_size))?;
1003            let mut feature_idx = 0;
1004
1005            for kernel_idx in 0..self.n_components {
1006                let kernel = self
1007                    .conv_kernels
1008                    .row(kernel_idx)
1009                    .to_owned()
1010                    .into_shape((self.kernel_size, self.kernel_size))?;
1011
1012                for i in 0..output_size {
1013                    for j in 0..output_size {
1014                        let start_i = i * self.stride;
1015                        let start_j = j * self.stride;
1016
1017                        let mut conv_sum = 0.0;
1018                        for ki in 0..self.kernel_size {
1019                            for kj in 0..self.kernel_size {
1020                                let img_i = start_i + ki;
1021                                let img_j = start_j + kj;
1022
1023                                if img_i < img_size && img_j < img_size {
1024                                    conv_sum += image[[img_i, img_j]] * kernel[[ki, kj]];
1025                                }
1026                            }
1027                        }
1028
1029                        conv_sum += self.biases[kernel_idx];
1030                        let activated = self.apply_activation(conv_sum);
1031
1032                        result[[sample_idx, feature_idx]] = activated;
1033                        feature_idx += 1;
1034                    }
1035                }
1036            }
1037        }
1038
1039        Ok(result)
1040    }
1041}
1042
1043#[allow(non_snake_case)]
1044#[cfg(test)]
1045mod tests {
1046    use super::*;
1047    use scirs2_core::essentials::Normal;
1048
1049    use scirs2_core::ndarray::{Array, Array2};
1050    use scirs2_core::random::thread_rng;
1051
1052    #[test]
1053    fn test_spatial_pyramid_features() {
1054        let x: Array2<f64> = Array::from_shape_fn((10, 64), |_| {
1055            let mut rng = thread_rng();
1056            rng.sample(&Normal::new(0.0, 1.0).unwrap())
1057        });
1058        let pyramid = SpatialPyramidFeatures::new(3, 64);
1059
1060        let fitted = pyramid.fit(&x, &()).unwrap();
1061        let transformed = fitted.transform(&x).unwrap();
1062
1063        assert_eq!(transformed.shape()[0], 10);
1064        assert!(transformed.shape()[1] > 0);
1065    }
1066
1067    #[test]
1068    fn test_texture_kernel_approximation() {
1069        let x: Array2<f64> = Array::from_shape_fn((5, 64), |_| {
1070            let mut rng = thread_rng();
1071            rng.sample(&Normal::new(0.0, 1.0).unwrap())
1072        });
1073        let texture = TextureKernelApproximation::new(50);
1074
1075        let fitted = texture.fit(&x, &()).unwrap();
1076        let transformed = fitted.transform(&x).unwrap();
1077
1078        assert_eq!(transformed.shape()[0], 5);
1079        assert!(transformed.shape()[1] > 0);
1080    }
1081
1082    #[test]
1083    fn test_scale_invariant_features() {
1084        let x: Array2<f64> = Array::from_shape_fn((8, 64), |_| {
1085            let mut rng = thread_rng();
1086            rng.sample(&Normal::new(0.0, 1.0).unwrap())
1087        });
1088        let sift = ScaleInvariantFeatures::new(10);
1089
1090        let fitted = sift.fit(&x, &()).unwrap();
1091        let transformed = fitted.transform(&x).unwrap();
1092
1093        assert_eq!(transformed.shape()[0], 8);
1094        assert_eq!(transformed.shape()[1], 10 * 32);
1095    }
1096
1097    #[test]
1098    fn test_convolutional_kernel_features() {
1099        let x: Array2<f64> = Array::from_shape_fn((6, 64), |_| {
1100            let mut rng = thread_rng();
1101            rng.sample(&Normal::new(0.0, 1.0).unwrap())
1102        });
1103        let conv = ConvolutionalKernelFeatures::new(16, 3);
1104
1105        let fitted = conv.fit(&x, &()).unwrap();
1106        let transformed = fitted.transform(&x).unwrap();
1107
1108        assert_eq!(transformed.shape()[0], 6);
1109        assert!(transformed.shape()[1] > 0);
1110    }
1111}
sklears_kernel_approximation/computer_vision_kernels.rs

sklears_kernel_approximation/
computer_vision_kernels.rs