1use scirs2_core::ndarray::{s, Array1, Array2, ArrayView2};
7use scirs2_core::random::essentials::Normal as RandNormal;
8use scirs2_core::random::rngs::StdRng as RealStdRng;
9use scirs2_core::random::Rng;
10use scirs2_core::random::{thread_rng, SeedableRng};
11use serde::{Deserialize, Serialize};
12
13use sklears_core::error::Result;
14use sklears_core::traits::{Fit, Transform};
15
16#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct SpatialPyramidFeatures {
20 pub levels: usize,
22 pub feature_dim: usize,
24 pub pool_method: PoolingMethod,
26 pub pyramid_weighting: bool,
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub enum PoolingMethod {
33 Max,
35 Average,
37 Sum,
39 L2Norm,
41}
42
43impl SpatialPyramidFeatures {
44 pub fn new(levels: usize, feature_dim: usize) -> Self {
45 Self {
46 levels,
47 feature_dim,
48 pool_method: PoolingMethod::Max,
49 pyramid_weighting: true,
50 }
51 }
52
53 pub fn pool_method(mut self, method: PoolingMethod) -> Self {
54 self.pool_method = method;
55 self
56 }
57
58 pub fn pyramid_weighting(mut self, enable: bool) -> Self {
59 self.pyramid_weighting = enable;
60 self
61 }
62
63 fn spatial_pool(&self, features: &ArrayView2<f64>, grid_size: usize) -> Result<Array1<f64>> {
64 let (height, width) = features.dim();
65 let cell_h = height / grid_size;
66 let cell_w = width / grid_size;
67
68 let mut pooled = Vec::new();
69
70 for i in 0..grid_size {
71 for j in 0..grid_size {
72 let start_h = i * cell_h;
73 let end_h = if i == grid_size - 1 {
74 height
75 } else {
76 (i + 1) * cell_h
77 };
78 let start_w = j * cell_w;
79 let end_w = if j == grid_size - 1 {
80 width
81 } else {
82 (j + 1) * cell_w
83 };
84
85 let cell = features.slice(s![start_h..end_h, start_w..end_w]);
86
87 let pooled_value = match self.pool_method {
88 PoolingMethod::Max => cell.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x)),
89 PoolingMethod::Average => cell.mean().unwrap_or(0.0),
90 PoolingMethod::Sum => cell.sum(),
91 PoolingMethod::L2Norm => cell.mapv(|x| x * x).sum().sqrt(),
92 };
93
94 pooled.push(pooled_value);
95 }
96 }
97
98 Ok(Array1::from_vec(pooled))
99 }
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct FittedSpatialPyramidFeatures {
105 pub levels: usize,
107 pub feature_dim: usize,
109 pub pool_method: PoolingMethod,
111 pub pyramid_weighting: bool,
113 pub level_weights: Array1<f64>,
115}
116
117impl Fit<Array2<f64>, ()> for SpatialPyramidFeatures {
118 type Fitted = FittedSpatialPyramidFeatures;
119
120 fn fit(self, _x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
121 let level_weights = if self.pyramid_weighting {
122 Array1::from_vec(
123 (0..self.levels)
124 .map(|l| 2.0_f64.powi(-(l as i32 + 1)))
125 .collect(),
126 )
127 } else {
128 Array1::ones(self.levels)
129 };
130
131 Ok(FittedSpatialPyramidFeatures {
132 levels: self.levels,
133 feature_dim: self.feature_dim,
134 pool_method: self.pool_method.clone(),
135 pyramid_weighting: self.pyramid_weighting,
136 level_weights,
137 })
138 }
139}
140
141impl FittedSpatialPyramidFeatures {
142 fn spatial_pool(&self, features: &ArrayView2<f64>, grid_size: usize) -> Result<Array1<f64>> {
143 let (height, width) = features.dim();
144 let cell_h = height / grid_size;
145 let cell_w = width / grid_size;
146
147 let mut pooled = Vec::new();
148
149 for i in 0..grid_size {
150 for j in 0..grid_size {
151 let start_h = i * cell_h;
152 let end_h = if i == grid_size - 1 {
153 height
154 } else {
155 (i + 1) * cell_h
156 };
157 let start_w = j * cell_w;
158 let end_w = if j == grid_size - 1 {
159 width
160 } else {
161 (j + 1) * cell_w
162 };
163
164 let cell = features.slice(s![start_h..end_h, start_w..end_w]);
165
166 let pooled_value = match self.pool_method {
167 PoolingMethod::Max => cell.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x)),
168 PoolingMethod::Average => cell.mean().unwrap_or(0.0),
169 PoolingMethod::Sum => cell.sum(),
170 PoolingMethod::L2Norm => cell.mapv(|x| x * x).sum().sqrt(),
171 };
172
173 pooled.push(pooled_value);
174 }
175 }
176
177 Ok(Array1::from_vec(pooled))
178 }
179}
180
181impl Transform<Array2<f64>, Array2<f64>> for FittedSpatialPyramidFeatures {
182 fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
183 let n_samples = x.shape()[0];
184 let feature_size = x.shape()[1];
185
186 let img_size = (feature_size as f64).sqrt() as usize;
188
189 let mut all_features = Vec::new();
190
191 for i in 0..n_samples {
192 let sample = x.row(i).to_owned().into_shape((img_size, img_size))?;
193 let mut pyramid_features = Vec::new();
194
195 for level in 0..self.levels {
196 let grid_size = 2_usize.pow(level as u32);
197 let pooled = self.spatial_pool(&sample.view(), grid_size)?;
198
199 let weighted = &pooled * self.level_weights[level];
201 pyramid_features.extend(weighted.iter().cloned());
202 }
203
204 all_features.push(pyramid_features);
205 }
206
207 let n_features = all_features[0].len();
209 let mut result = Array2::zeros((n_samples, n_features));
210
211 for (i, features) in all_features.iter().enumerate() {
212 for (j, &feature) in features.iter().enumerate() {
213 result[[i, j]] = feature;
214 }
215 }
216
217 Ok(result)
218 }
219}
220
221#[derive(Debug, Clone, Serialize, Deserialize)]
223pub struct TextureKernelApproximation {
225 pub n_components: usize,
227 pub use_lbp: bool,
229 pub use_gabor: bool,
231 pub gabor_frequencies: Vec<f64>,
233 pub gabor_angles: Vec<f64>,
235 pub lbp_radius: f64,
237 pub lbp_n_points: usize,
239}
240
241impl TextureKernelApproximation {
242 pub fn new(n_components: usize) -> Self {
243 Self {
244 n_components,
245 use_lbp: true,
246 use_gabor: true,
247 gabor_frequencies: vec![0.1, 0.2, 0.3, 0.4],
248 gabor_angles: vec![
249 0.0,
250 std::f64::consts::PI / 4.0,
251 std::f64::consts::PI / 2.0,
252 3.0 * std::f64::consts::PI / 4.0,
253 ],
254 lbp_radius: 1.0,
255 lbp_n_points: 8,
256 }
257 }
258
259 pub fn use_lbp(mut self, enable: bool) -> Self {
260 self.use_lbp = enable;
261 self
262 }
263
264 pub fn use_gabor(mut self, enable: bool) -> Self {
265 self.use_gabor = enable;
266 self
267 }
268
269 pub fn gabor_frequencies(mut self, frequencies: Vec<f64>) -> Self {
270 self.gabor_frequencies = frequencies;
271 self
272 }
273
274 pub fn gabor_angles(mut self, angles: Vec<f64>) -> Self {
275 self.gabor_angles = angles;
276 self
277 }
278
279 fn compute_lbp(&self, image: &ArrayView2<f64>) -> Result<Array1<f64>> {
280 let (height, width) = image.dim();
281 let mut lbp_histogram = Array1::zeros(256);
282
283 for i in 1..height - 1 {
284 for j in 1..width - 1 {
285 let center = image[[i, j]];
286 let mut lbp_value = 0u8;
287
288 let neighbors = [
290 image[[i - 1, j - 1]],
291 image[[i - 1, j]],
292 image[[i - 1, j + 1]],
293 image[[i, j + 1]],
294 image[[i + 1, j + 1]],
295 image[[i + 1, j]],
296 image[[i + 1, j - 1]],
297 image[[i, j - 1]],
298 ];
299
300 for (k, &neighbor) in neighbors.iter().enumerate() {
301 if neighbor >= center {
302 lbp_value |= 1 << k;
303 }
304 }
305
306 lbp_histogram[lbp_value as usize] += 1.0;
307 }
308 }
309
310 let sum = lbp_histogram.sum();
312 if sum > 0.0 {
313 lbp_histogram /= sum;
314 }
315
316 Ok(lbp_histogram)
317 }
318
319 fn compute_gabor_features(&self, image: &ArrayView2<f64>) -> Result<Array1<f64>> {
320 let mut gabor_features = Vec::new();
321
322 for &frequency in &self.gabor_frequencies {
323 for &angle in &self.gabor_angles {
324 let filtered = self.apply_gabor_filter(image, frequency, angle)?;
325
326 let mean = filtered.mean().unwrap_or(0.0);
328 let variance = filtered.var(0.0);
329 let energy = filtered.mapv(|x| x * x).sum();
330
331 gabor_features.extend_from_slice(&[mean, variance, energy]);
332 }
333 }
334
335 Ok(Array1::from_vec(gabor_features))
336 }
337
338 fn apply_gabor_filter(
339 &self,
340 image: &ArrayView2<f64>,
341 frequency: f64,
342 angle: f64,
343 ) -> Result<Array2<f64>> {
344 let (height, width) = image.dim();
345 let mut filtered = Array2::zeros((height, width));
346
347 let sigma_x = 1.0 / (2.0 * std::f64::consts::PI * frequency);
348 let sigma_y = 1.0 / (2.0 * std::f64::consts::PI * frequency);
349
350 let cos_angle = angle.cos();
351 let sin_angle = angle.sin();
352
353 for i in 0..height {
354 for j in 0..width {
355 let x = j as f64 - width as f64 / 2.0;
356 let y = i as f64 - height as f64 / 2.0;
357
358 let x_rot = x * cos_angle + y * sin_angle;
359 let y_rot = -x * sin_angle + y * cos_angle;
360
361 let gaussian = (-0.5
362 * (x_rot * x_rot / (sigma_x * sigma_x) + y_rot * y_rot / (sigma_y * sigma_y)))
363 .exp();
364 let sinusoid = (2.0 * std::f64::consts::PI * frequency * x_rot).cos();
365
366 let kernel_value = gaussian * sinusoid;
367
368 if i < height && j < width {
369 filtered[[i, j]] = kernel_value;
370 }
371 }
372 }
373
374 let mut result = Array2::zeros((height, width));
376 for i in 1..height - 1 {
377 for j in 1..width - 1 {
378 let mut sum = 0.0;
379 for di in -1..=1 {
380 for dj in -1..=1 {
381 let ni = (i as i32 + di) as usize;
382 let nj = (j as i32 + dj) as usize;
383 let fi = (1 + di) as usize;
384 let fj = (1 + dj) as usize;
385 sum += image[[ni, nj]] * filtered[[fi, fj]];
386 }
387 }
388 result[[i, j]] = sum;
389 }
390 }
391
392 Ok(result)
393 }
394}
395
396#[derive(Debug, Clone, Serialize, Deserialize)]
397pub struct FittedTextureKernelApproximation {
399 pub n_components: usize,
401 pub use_lbp: bool,
403 pub use_gabor: bool,
405 pub gabor_frequencies: Vec<f64>,
407 pub gabor_angles: Vec<f64>,
409 pub lbp_radius: f64,
411 pub lbp_n_points: usize,
413 pub feature_dim: usize,
415}
416
417impl Fit<Array2<f64>, ()> for TextureKernelApproximation {
418 type Fitted = FittedTextureKernelApproximation;
419
420 fn fit(self, _x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
421 let mut feature_dim = 0;
422
423 if self.use_lbp {
424 feature_dim += 256; }
426
427 if self.use_gabor {
428 feature_dim += self.gabor_frequencies.len() * self.gabor_angles.len() * 3;
429 }
431
432 Ok(FittedTextureKernelApproximation {
433 n_components: self.n_components,
434 use_lbp: self.use_lbp,
435 use_gabor: self.use_gabor,
436 gabor_frequencies: self.gabor_frequencies.clone(),
437 gabor_angles: self.gabor_angles.clone(),
438 lbp_radius: self.lbp_radius,
439 lbp_n_points: self.lbp_n_points,
440 feature_dim,
441 })
442 }
443}
444
445impl FittedTextureKernelApproximation {
446 fn compute_lbp(&self, image: &ArrayView2<f64>) -> Result<Array1<f64>> {
447 let (height, width) = image.dim();
448 let mut lbp_histogram = Array1::zeros(256);
449
450 for i in 1..height - 1 {
451 for j in 1..width - 1 {
452 let center = image[[i, j]];
453 let mut lbp_value = 0u8;
454
455 let neighbors = [
457 image[[i - 1, j - 1]],
458 image[[i - 1, j]],
459 image[[i - 1, j + 1]],
460 image[[i, j + 1]],
461 image[[i + 1, j + 1]],
462 image[[i + 1, j]],
463 image[[i + 1, j - 1]],
464 image[[i, j - 1]],
465 ];
466
467 for (k, &neighbor) in neighbors.iter().enumerate() {
468 if neighbor >= center {
469 lbp_value |= 1 << k;
470 }
471 }
472
473 lbp_histogram[lbp_value as usize] += 1.0;
474 }
475 }
476
477 let sum = lbp_histogram.sum();
479 if sum > 0.0 {
480 lbp_histogram /= sum;
481 }
482
483 Ok(lbp_histogram)
484 }
485
486 fn compute_gabor_features(&self, image: &ArrayView2<f64>) -> Result<Array1<f64>> {
487 let mut gabor_features = Vec::new();
488
489 for &frequency in &self.gabor_frequencies {
490 for &angle in &self.gabor_angles {
491 let filtered = self.apply_gabor_filter(image, frequency, angle)?;
492
493 let mean = filtered.mean().unwrap_or(0.0);
495 let variance = filtered.var(0.0);
496 let energy = filtered.mapv(|x| x * x).sum();
497
498 gabor_features.extend_from_slice(&[mean, variance, energy]);
499 }
500 }
501
502 Ok(Array1::from_vec(gabor_features))
503 }
504
505 fn apply_gabor_filter(
506 &self,
507 image: &ArrayView2<f64>,
508 frequency: f64,
509 angle: f64,
510 ) -> Result<Array2<f64>> {
511 let (height, width) = image.dim();
512 let mut filtered = Array2::zeros((height, width));
513
514 let sigma_x = 1.0 / (2.0 * std::f64::consts::PI * frequency);
515 let sigma_y = 1.0 / (2.0 * std::f64::consts::PI * frequency);
516
517 let cos_angle = angle.cos();
518 let sin_angle = angle.sin();
519
520 for i in 0..height {
521 for j in 0..width {
522 let x = j as f64 - width as f64 / 2.0;
523 let y = i as f64 - height as f64 / 2.0;
524
525 let x_rot = x * cos_angle + y * sin_angle;
526 let y_rot = -x * sin_angle + y * cos_angle;
527
528 let gaussian = (-0.5
529 * (x_rot * x_rot / (sigma_x * sigma_x) + y_rot * y_rot / (sigma_y * sigma_y)))
530 .exp();
531 let sinusoid = (2.0 * std::f64::consts::PI * frequency * x_rot).cos();
532
533 let kernel_value = gaussian * sinusoid;
534
535 if i < height && j < width {
536 filtered[[i, j]] = kernel_value;
537 }
538 }
539 }
540
541 let mut result = Array2::zeros((height, width));
543 for i in 1..height - 1 {
544 for j in 1..width - 1 {
545 let mut sum = 0.0;
546 for di in -1..=1 {
547 for dj in -1..=1 {
548 let ni = (i as i32 + di) as usize;
549 let nj = (j as i32 + dj) as usize;
550 let fi = (1 + di) as usize;
551 let fj = (1 + dj) as usize;
552 sum += image[[ni, nj]] * filtered[[fi, fj]];
553 }
554 }
555 result[[i, j]] = sum;
556 }
557 }
558
559 Ok(result)
560 }
561}
562
563impl Transform<Array2<f64>, Array2<f64>> for FittedTextureKernelApproximation {
564 fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
565 let n_samples = x.shape()[0];
566 let feature_size = x.shape()[1];
567
568 let img_size = (feature_size as f64).sqrt() as usize;
570
571 let mut all_features = Vec::new();
572
573 for i in 0..n_samples {
574 let sample = x.row(i).to_owned().into_shape((img_size, img_size))?;
575 let mut texture_features = Vec::new();
576
577 if self.use_lbp {
578 let lbp_features = self.compute_lbp(&sample.view())?;
579 texture_features.extend(lbp_features.iter().cloned());
580 }
581
582 if self.use_gabor {
583 let gabor_features = self.compute_gabor_features(&sample.view())?;
584 texture_features.extend(gabor_features.iter().cloned());
585 }
586
587 all_features.push(texture_features);
588 }
589
590 let n_features = all_features[0].len();
592 let mut result = Array2::zeros((n_samples, n_features));
593
594 for (i, features) in all_features.iter().enumerate() {
595 for (j, &feature) in features.iter().enumerate() {
596 result[[i, j]] = feature;
597 }
598 }
599
600 Ok(result)
601 }
602}
603
604#[derive(Debug, Clone, Serialize, Deserialize)]
606pub struct ScaleInvariantFeatures {
608 pub n_components: usize,
610 pub n_scales: usize,
612 pub sigma: f64,
614 pub contrast_threshold: f64,
616 pub edge_threshold: f64,
618}
619
620impl ScaleInvariantFeatures {
621 pub fn new(n_components: usize) -> Self {
622 Self {
623 n_components,
624 n_scales: 3,
625 sigma: 1.6,
626 contrast_threshold: 0.04,
627 edge_threshold: 10.0,
628 }
629 }
630
631 pub fn n_scales(mut self, n_scales: usize) -> Self {
632 self.n_scales = n_scales;
633 self
634 }
635
636 pub fn sigma(mut self, sigma: f64) -> Self {
637 self.sigma = sigma;
638 self
639 }
640
641 fn detect_keypoints(&self, image: &ArrayView2<f64>) -> Result<Vec<(usize, usize, f64)>> {
642 let (height, width) = image.dim();
643 let mut keypoints = Vec::new();
644
645 for i in 1..height - 1 {
647 for j in 1..width - 1 {
648 let _ix = (image[[i, j + 1]] - image[[i, j - 1]]) / 2.0;
649 let _iy = (image[[i + 1, j]] - image[[i - 1, j]]) / 2.0;
650 let ixx = image[[i, j - 1]] - 2.0 * image[[i, j]] + image[[i, j + 1]];
651 let iyy = image[[i - 1, j]] - 2.0 * image[[i, j]] + image[[i + 1, j]];
652 let ixy = (image[[i - 1, j - 1]] + image[[i + 1, j + 1]]
653 - image[[i - 1, j + 1]]
654 - image[[i + 1, j - 1]])
655 / 4.0;
656
657 let det = ixx * iyy - ixy * ixy;
658 let trace = ixx + iyy;
659
660 if trace != 0.0 {
661 let harris_response = det - 0.04 * trace * trace;
662
663 if harris_response > self.contrast_threshold {
664 keypoints.push((i, j, harris_response));
665 }
666 }
667 }
668 }
669
670 Ok(keypoints)
671 }
672
673 fn compute_descriptor(
674 &self,
675 image: &ArrayView2<f64>,
676 keypoint: (usize, usize, f64),
677 ) -> Result<Array1<f64>> {
678 let (y, x, _) = keypoint;
679 let (height, width) = image.dim();
680
681 let window_size = 16;
683 let half_window = window_size / 2;
684
685 let mut descriptor = Vec::new();
686
687 for i in 0..4 {
688 for j in 0..4 {
689 let start_y = y.saturating_sub(half_window) + i * 4;
690 let start_x = x.saturating_sub(half_window) + j * 4;
691 let end_y = (start_y + 4).min(height);
692 let end_x = (start_x + 4).min(width);
693
694 if start_y < end_y && start_x < end_x {
695 let patch = image.slice(s![start_y..end_y, start_x..end_x]);
696 let patch_mean = patch.mean().unwrap_or(0.0);
697 let patch_var = patch.var(0.0);
698
699 descriptor.push(patch_mean);
700 descriptor.push(patch_var);
701 } else {
702 descriptor.push(0.0);
703 descriptor.push(0.0);
704 }
705 }
706 }
707
708 Ok(Array1::from_vec(descriptor))
709 }
710}
711
712#[derive(Debug, Clone, Serialize, Deserialize)]
713pub struct FittedScaleInvariantFeatures {
715 pub n_components: usize,
717 pub n_scales: usize,
719 pub sigma: f64,
721 pub contrast_threshold: f64,
723 pub edge_threshold: f64,
725}
726
727impl Fit<Array2<f64>, ()> for ScaleInvariantFeatures {
728 type Fitted = FittedScaleInvariantFeatures;
729
730 fn fit(self, _x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
731 Ok(FittedScaleInvariantFeatures {
732 n_components: self.n_components,
733 n_scales: self.n_scales,
734 sigma: self.sigma,
735 contrast_threshold: self.contrast_threshold,
736 edge_threshold: self.edge_threshold,
737 })
738 }
739}
740
741impl FittedScaleInvariantFeatures {
742 fn detect_keypoints(&self, image: &ArrayView2<f64>) -> Result<Vec<(usize, usize, f64)>> {
743 let (height, width) = image.dim();
744 let mut keypoints = Vec::new();
745
746 for i in 1..height - 1 {
748 for j in 1..width - 1 {
749 let _ix = (image[[i, j + 1]] - image[[i, j - 1]]) / 2.0;
750 let _iy = (image[[i + 1, j]] - image[[i - 1, j]]) / 2.0;
751 let ixx = image[[i, j - 1]] - 2.0 * image[[i, j]] + image[[i, j + 1]];
752 let iyy = image[[i - 1, j]] - 2.0 * image[[i, j]] + image[[i + 1, j]];
753 let ixy = (image[[i - 1, j - 1]] + image[[i + 1, j + 1]]
754 - image[[i - 1, j + 1]]
755 - image[[i + 1, j - 1]])
756 / 4.0;
757
758 let det = ixx * iyy - ixy * ixy;
759 let trace = ixx + iyy;
760
761 if trace != 0.0 {
762 let harris_response = det - 0.04 * trace * trace;
763
764 if harris_response > self.contrast_threshold {
765 keypoints.push((i, j, harris_response));
766 }
767 }
768 }
769 }
770
771 Ok(keypoints)
772 }
773
774 fn compute_descriptor(
775 &self,
776 image: &ArrayView2<f64>,
777 keypoint: (usize, usize, f64),
778 ) -> Result<Array1<f64>> {
779 let (y, x, _) = keypoint;
780 let (height, width) = image.dim();
781
782 let window_size = 16;
784 let half_window = window_size / 2;
785
786 let mut descriptor = Vec::new();
787
788 for i in 0..4 {
789 for j in 0..4 {
790 let start_y = y.saturating_sub(half_window) + i * 4;
791 let start_x = x.saturating_sub(half_window) + j * 4;
792 let end_y = (start_y + 4).min(height);
793 let end_x = (start_x + 4).min(width);
794
795 if start_y < end_y && start_x < end_x {
796 let patch = image.slice(s![start_y..end_y, start_x..end_x]);
797 let patch_mean = patch.mean().unwrap_or(0.0);
798 let patch_var = patch.var(0.0);
799
800 descriptor.push(patch_mean);
801 descriptor.push(patch_var);
802 } else {
803 descriptor.push(0.0);
804 descriptor.push(0.0);
805 }
806 }
807 }
808
809 Ok(Array1::from_vec(descriptor))
810 }
811}
812
813impl Transform<Array2<f64>, Array2<f64>> for FittedScaleInvariantFeatures {
814 fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
815 let n_samples = x.shape()[0];
816 let feature_size = x.shape()[1];
817
818 let img_size = (feature_size as f64).sqrt() as usize;
820
821 let mut all_features = Vec::new();
822
823 for i in 0..n_samples {
824 let sample = x.row(i).to_owned().into_shape((img_size, img_size))?;
825
826 let keypoints = self.detect_keypoints(&sample.view())?;
827 let mut descriptors = Vec::new();
828
829 for keypoint in keypoints.iter().take(self.n_components) {
830 let descriptor = self.compute_descriptor(&sample.view(), *keypoint)?;
831 descriptors.extend(descriptor.iter().cloned());
832 }
833
834 let expected_size = self.n_components * 32; while descriptors.len() < expected_size {
837 descriptors.push(0.0);
838 }
839 descriptors.truncate(expected_size);
840
841 all_features.push(descriptors);
842 }
843
844 let n_features = all_features[0].len();
846 let mut result = Array2::zeros((n_samples, n_features));
847
848 for (i, features) in all_features.iter().enumerate() {
849 for (j, &feature) in features.iter().enumerate() {
850 result[[i, j]] = feature;
851 }
852 }
853
854 Ok(result)
855 }
856}
857
858#[derive(Debug, Clone, Serialize, Deserialize)]
860pub struct ConvolutionalKernelFeatures {
862 pub n_components: usize,
864 pub kernel_size: usize,
866 pub stride: usize,
868 pub padding: usize,
870 pub activation: ActivationFunction,
872}
873
874#[derive(Debug, Clone, Serialize, Deserialize)]
875pub enum ActivationFunction {
877 ReLU,
879 Tanh,
881 Sigmoid,
883 Linear,
885}
886
887impl ConvolutionalKernelFeatures {
888 pub fn new(n_components: usize, kernel_size: usize) -> Self {
889 Self {
890 n_components,
891 kernel_size,
892 stride: 1,
893 padding: 0,
894 activation: ActivationFunction::ReLU,
895 }
896 }
897
898 pub fn stride(mut self, stride: usize) -> Self {
899 self.stride = stride;
900 self
901 }
902
903 pub fn padding(mut self, padding: usize) -> Self {
904 self.padding = padding;
905 self
906 }
907
908 pub fn activation(mut self, activation: ActivationFunction) -> Self {
909 self.activation = activation;
910 self
911 }
912
913 fn apply_activation(&self, x: f64) -> f64 {
914 match self.activation {
915 ActivationFunction::ReLU => x.max(0.0),
916 ActivationFunction::Tanh => x.tanh(),
917 ActivationFunction::Sigmoid => 1.0 / (1.0 + (-x).exp()),
918 ActivationFunction::Linear => x,
919 }
920 }
921}
922
923#[derive(Debug, Clone, Serialize, Deserialize)]
924pub struct FittedConvolutionalKernelFeatures {
926 pub n_components: usize,
928 pub kernel_size: usize,
930 pub stride: usize,
932 pub padding: usize,
934 pub activation: ActivationFunction,
936 pub conv_kernels: Array2<f64>,
938 pub biases: Array1<f64>,
940}
941
942impl Fit<Array2<f64>, ()> for ConvolutionalKernelFeatures {
943 type Fitted = FittedConvolutionalKernelFeatures;
944
945 fn fit(self, _x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
946 let mut rng = RealStdRng::from_seed(thread_rng().gen());
947 let normal = RandNormal::new(0.0, 1.0).unwrap();
948
949 let kernel_elements = self.kernel_size * self.kernel_size;
951 let mut conv_kernels = Array2::zeros((self.n_components, kernel_elements));
952
953 for i in 0..self.n_components {
954 for j in 0..kernel_elements {
955 conv_kernels[[i, j]] = rng.sample(normal);
956 }
957 }
958
959 let biases = Array1::from_vec((0..self.n_components).map(|_| rng.sample(normal)).collect());
961
962 Ok(FittedConvolutionalKernelFeatures {
963 n_components: self.n_components,
964 kernel_size: self.kernel_size,
965 stride: self.stride,
966 padding: self.padding,
967 activation: self.activation,
968 conv_kernels,
969 biases,
970 })
971 }
972}
973
974impl FittedConvolutionalKernelFeatures {
975 fn apply_activation(&self, x: f64) -> f64 {
976 match self.activation {
977 ActivationFunction::ReLU => x.max(0.0),
978 ActivationFunction::Tanh => x.tanh(),
979 ActivationFunction::Sigmoid => 1.0 / (1.0 + (-x).exp()),
980 ActivationFunction::Linear => x,
981 }
982 }
983}
984
985impl Transform<Array2<f64>, Array2<f64>> for FittedConvolutionalKernelFeatures {
986 fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
987 let n_samples = x.shape()[0];
988 let feature_size = x.shape()[1];
989
990 let img_size = (feature_size as f64).sqrt() as usize;
992
993 let output_size = (img_size + 2 * self.padding - self.kernel_size) / self.stride + 1;
994 let n_features = self.n_components * output_size * output_size;
995
996 let mut result = Array2::zeros((n_samples, n_features));
997
998 for sample_idx in 0..n_samples {
999 let image = x
1000 .row(sample_idx)
1001 .to_owned()
1002 .into_shape((img_size, img_size))?;
1003 let mut feature_idx = 0;
1004
1005 for kernel_idx in 0..self.n_components {
1006 let kernel = self
1007 .conv_kernels
1008 .row(kernel_idx)
1009 .to_owned()
1010 .into_shape((self.kernel_size, self.kernel_size))?;
1011
1012 for i in 0..output_size {
1013 for j in 0..output_size {
1014 let start_i = i * self.stride;
1015 let start_j = j * self.stride;
1016
1017 let mut conv_sum = 0.0;
1018 for ki in 0..self.kernel_size {
1019 for kj in 0..self.kernel_size {
1020 let img_i = start_i + ki;
1021 let img_j = start_j + kj;
1022
1023 if img_i < img_size && img_j < img_size {
1024 conv_sum += image[[img_i, img_j]] * kernel[[ki, kj]];
1025 }
1026 }
1027 }
1028
1029 conv_sum += self.biases[kernel_idx];
1030 let activated = self.apply_activation(conv_sum);
1031
1032 result[[sample_idx, feature_idx]] = activated;
1033 feature_idx += 1;
1034 }
1035 }
1036 }
1037 }
1038
1039 Ok(result)
1040 }
1041}
1042
1043#[allow(non_snake_case)]
1044#[cfg(test)]
1045mod tests {
1046 use super::*;
1047 use scirs2_core::essentials::Normal;
1048
1049 use scirs2_core::ndarray::{Array, Array2};
1050 use scirs2_core::random::thread_rng;
1051
1052 #[test]
1053 fn test_spatial_pyramid_features() {
1054 let x: Array2<f64> = Array::from_shape_fn((10, 64), |_| {
1055 let mut rng = thread_rng();
1056 rng.sample(&Normal::new(0.0, 1.0).unwrap())
1057 });
1058 let pyramid = SpatialPyramidFeatures::new(3, 64);
1059
1060 let fitted = pyramid.fit(&x, &()).unwrap();
1061 let transformed = fitted.transform(&x).unwrap();
1062
1063 assert_eq!(transformed.shape()[0], 10);
1064 assert!(transformed.shape()[1] > 0);
1065 }
1066
1067 #[test]
1068 fn test_texture_kernel_approximation() {
1069 let x: Array2<f64> = Array::from_shape_fn((5, 64), |_| {
1070 let mut rng = thread_rng();
1071 rng.sample(&Normal::new(0.0, 1.0).unwrap())
1072 });
1073 let texture = TextureKernelApproximation::new(50);
1074
1075 let fitted = texture.fit(&x, &()).unwrap();
1076 let transformed = fitted.transform(&x).unwrap();
1077
1078 assert_eq!(transformed.shape()[0], 5);
1079 assert!(transformed.shape()[1] > 0);
1080 }
1081
1082 #[test]
1083 fn test_scale_invariant_features() {
1084 let x: Array2<f64> = Array::from_shape_fn((8, 64), |_| {
1085 let mut rng = thread_rng();
1086 rng.sample(&Normal::new(0.0, 1.0).unwrap())
1087 });
1088 let sift = ScaleInvariantFeatures::new(10);
1089
1090 let fitted = sift.fit(&x, &()).unwrap();
1091 let transformed = fitted.transform(&x).unwrap();
1092
1093 assert_eq!(transformed.shape()[0], 8);
1094 assert_eq!(transformed.shape()[1], 10 * 32);
1095 }
1096
1097 #[test]
1098 fn test_convolutional_kernel_features() {
1099 let x: Array2<f64> = Array::from_shape_fn((6, 64), |_| {
1100 let mut rng = thread_rng();
1101 rng.sample(&Normal::new(0.0, 1.0).unwrap())
1102 });
1103 let conv = ConvolutionalKernelFeatures::new(16, 3);
1104
1105 let fitted = conv.fit(&x, &()).unwrap();
1106 let transformed = fitted.transform(&x).unwrap();
1107
1108 assert_eq!(transformed.shape()[0], 6);
1109 assert!(transformed.shape()[1] > 0);
1110 }
1111}