1use std::cmp::min;
24use std::f32::consts::PI as PI32;
25
26use image::buffer::ConvertBuffer;
27use image::imageops::{resize, FilterType};
28use image::{GrayImage, ImageBuffer, Luma};
29use imageproc::filter::gaussian_blur_f32;
30use itertools::{izip, Itertools};
31use ndarray::{prelude::*, s, Array2, Array3, Axis};
32use nshare::AsNdarray2;
33
34#[cfg(test)]
35mod opencv_processing;
36#[cfg(test)]
37pub use opencv_processing::*;
38
39#[derive(Debug, Clone, PartialEq)]
40#[cfg_attr(test, derive(serde::Serialize))]
41pub struct SiftResult {
42 pub keypoints: Vec<KeyPoint>,
43 pub descriptors: Array2<u8>,
46}
47
48#[derive(Debug, Clone, PartialEq, PartialOrd)]
49#[cfg_attr(test, derive(serde::Serialize))]
50pub struct KeyPoint {
51 pub x: f32,
52 pub y: f32,
53 pub size: f32,
54 pub angle: f32,
55 pub response: f32,
56}
57
58#[doc(hidden)]
59#[derive(Debug, Clone, PartialEq, PartialOrd)]
60pub struct SiftKeyPoint {
61 pub x: f32,
62 pub y: f32,
63 pub size: f32,
64 pub angle: f32,
65 pub response: f32,
66 pub octave: usize,
67 pub scale: usize,
68}
69
70pub fn sift(img: &GrayImage, features_limit: Option<usize>) -> SiftResult {
72 sift_with_processing::<ImageprocProcessing>(img, features_limit)
73}
74
75pub fn sift_with_processing<P: Processing>(
77 img: &GrayImage,
78 features_limit: Option<usize>,
79) -> SiftResult {
80 sift_with_precomputed(&precompute_images::<P>(img), features_limit)
81}
82
83pub trait Processing {
87 fn gaussian_blur(img: &LumaFImage, sigma: f64) -> LumaFImage;
88 fn resize_linear(img: &LumaFImage, width: u32, height: u32) -> LumaFImage;
89 fn resize_nearest(img: &LumaFImage, width: u32, height: u32) -> LumaFImage;
90}
91
92const SCALES_PER_OCTAVE: usize = 3;
93const CONTRAST_THRESHOLD: f32 = 0.04;
94const EDGE_THRESHOLD: f32 = 10.0;
95
96const ORIENTATION_HISTOGRAM_RADIUS: f32 = 1.5;
98const IMAGE_BORDER: i32 = 5;
101
102const ORIENTATION_HISTOGRAM_BINS: usize = 36;
103const LAMBDA_ORI: f32 = 1.5;
105const LAMBDA_DESCR: f32 = 3.0;
106
107const DESCRIPTOR_N_HISTOGRAMS: usize = 4;
109const DESCRIPTOR_N_BINS: usize = 8;
111const DESCRIPTOR_SIZE: usize =
112 DESCRIPTOR_N_HISTOGRAMS * DESCRIPTOR_N_HISTOGRAMS * DESCRIPTOR_N_BINS;
113
114type LumaFImage = ImageBuffer<Luma<f32>, Vec<f32>>;
115
116#[derive(Debug, Copy, Clone, PartialEq, Eq)]
117struct ScaleSpacePoint {
118 pub scale: usize,
119 pub x: usize,
120 pub y: usize,
121}
122
123#[doc(hidden)]
124pub struct PrecomputedImages {
125 pub scale_space: Vec<Array3<f32>>,
126 pub dog: Vec<Array3<f32>>,
127 pub n_octaves: usize,
128}
129
130#[doc(hidden)]
131pub fn precompute_images<P: Processing>(img: &GrayImage) -> PrecomputedImages {
132 let seed = create_seed_image::<P>(img);
133 let min_axis = min(seed.width(), seed.height());
134 let n_octaves: usize = ((min_axis as f32).log2() - 2.0).round() as usize + 1;
135
136 let scale_space = build_gaussian_scale_space::<P>(seed, n_octaves);
137 let dog = build_dog(n_octaves, &scale_space);
138 PrecomputedImages {
139 n_octaves,
140 scale_space,
141 dog,
142 }
143}
144
145#[doc(hidden)]
147pub fn sift_with_precomputed(
148 PrecomputedImages {
149 scale_space,
150 dog,
151 n_octaves,
152 }: &PrecomputedImages,
153 features_limit: Option<usize>,
154) -> SiftResult {
155 let mut keypoints: Vec<SiftKeyPoint> = find_keypoints(*n_octaves, scale_space, dog).collect();
156 if let Some(limit) = features_limit {
157 if limit < keypoints.len() {
158 keypoints.sort_unstable_by(|kp1, kp2| kp2.response.total_cmp(&kp1.response));
159 keypoints.truncate(limit);
160 }
161 }
162 let desc = compute_descriptors(scale_space, &keypoints);
163 SiftResult {
164 keypoints: keypoints
165 .into_iter()
166 .map(|kp| KeyPoint {
167 x: kp.x * DELTA_MIN,
169 y: kp.y * DELTA_MIN,
170 size: kp.size * DELTA_MIN,
171 angle: kp.angle,
172 response: kp.response,
173 })
174 .collect(),
175 descriptors: desc,
176 }
177}
178
179const SIGMA_IN: f64 = 0.5;
182
183const SIGMA_MIN: f64 = 0.8;
186
187const INV_DELTA_MIN: u32 = 2;
190
191const DELTA_MIN: f32 = 0.5;
194
195fn create_seed_image<P: Processing>(img: &GrayImage) -> ImageBuffer<Luma<f32>, Vec<f32>> {
197 let img_f32: LumaFImage = img.convert();
199 let img_2x = P::resize_linear(
202 &img_f32,
203 img_f32.width() * INV_DELTA_MIN,
204 img_f32.height() * INV_DELTA_MIN,
205 );
206
207 let sigma = (SIGMA_MIN * SIGMA_MIN - SIGMA_IN * SIGMA_IN).sqrt() * INV_DELTA_MIN as f64;
208
209 P::gaussian_blur(&img_2x, sigma)
210}
211
212fn build_gaussian_scale_space<P: Processing>(
214 seed_img: LumaFImage,
215 n_octaves: usize,
216) -> Vec<Array3<f32>> {
217 let m: f64 = 2_f64.powf(2.0 / SCALES_PER_OCTAVE as f64);
221 let sigmas: Vec<f64> = (0..(SCALES_PER_OCTAVE as i32 + 3))
222 .map(|s| {
223 let a = m.powi(s - 1);
225 let b = a * m;
227 (b - a).sqrt() * SIGMA_MIN * INV_DELTA_MIN as f64
228 })
229 .collect();
230 let create_octave = |initial: LumaFImage| {
231 let mut imgs = Vec::with_capacity(SCALES_PER_OCTAVE + 3);
232 imgs.push(initial);
233 sigmas.iter().skip(1).for_each(|sigma| {
234 let prev = imgs.last().unwrap();
235 imgs.push(P::gaussian_blur(prev, *sigma));
236 });
237 imgs
238 };
239 let mut scale_space: Vec<Vec<LumaFImage>> = Vec::with_capacity(n_octaves);
240 scale_space.push(create_octave(seed_img));
241 for _ in 1..n_octaves {
242 let last_octave = &scale_space.last().unwrap();
246 let initial = &last_octave[last_octave.len() - 3];
247 let scaled_half = P::resize_nearest(initial, initial.width() / 2, initial.height() / 2);
248 scale_space.push(create_octave(scaled_half));
249 }
250 assert_eq!(scale_space.len(), n_octaves);
251 scale_space
252 .iter()
253 .map(|octave| {
254 assert!(octave
255 .windows(2)
256 .all(|w| w[0].width() == w[1].width() && w[0].height() == w[1].height()));
257 assert_eq!(octave.len(), SCALES_PER_OCTAVE + 3);
258 let width = octave[0].width() as usize;
259 let height = octave[0].height() as usize;
260 let mut mat: Array3<f32> = Array3::zeros((SCALES_PER_OCTAVE + 3, height, width));
261 octave.iter().enumerate().for_each(|(i, img)| {
262 mat.slice_mut(s![i, .., ..]).assign(&img.as_ndarray2());
263 });
264 mat
265 })
266 .collect()
267}
268
269fn build_dog(n_octaves: usize, scale_space: &[Array3<f32>]) -> Vec<Array3<f32>> {
272 assert!(scale_space.len() == n_octaves);
273 let dog: Vec<Array3<f32>> = scale_space
274 .iter()
275 .map(|octave| &octave.slice(s![1.., .., ..]) - &octave.slice(s![..-1, .., ..]))
276 .collect();
277 assert!(dog.iter().all(|d| d.shape()[0] == SCALES_PER_OCTAVE + 2));
278 dog
279}
280
281fn find_keypoints<'a>(
282 n_octaves: usize,
283 scale_space: &'a [Array3<f32>],
284 dogs: &'a [Array3<f32>],
285) -> impl Iterator<Item = SiftKeyPoint> + 'a {
286 assert!(scale_space.len() == n_octaves);
287 (0..n_octaves).flat_map(move |octave| {
288 let dog = &dogs[octave];
289 assert!(dog.shape()[0] == SCALES_PER_OCTAVE + 2);
290 (1..=SCALES_PER_OCTAVE).flat_map(move |scale_in_octave| {
291 find_extrema_in_dog_img(dog, scale_space, octave, scale_in_octave)
292 })
293 })
294}
295
296const ORIENTATION_HISTOGRAM_LOCALMAX_RATIO: f32 = 0.8;
298
299fn find_extrema_in_dog_img<'a>(
300 dog: &'a Array3<f32>,
301 scale_space: &'a [Array3<f32>],
302 octave: usize,
303 scale_in_octave: usize,
304) -> Box<dyn Iterator<Item = SiftKeyPoint> + 'a> {
305 assert!(dog.shape()[0] == SCALES_PER_OCTAVE + 2);
306 assert!(scale_in_octave > 0);
307 assert!(scale_in_octave < dog.shape()[0] - 1);
308 let dogslice = dog.slice(s![scale_in_octave - 1..scale_in_octave + 2, .., ..]);
309 assert!(dogslice.shape()[0] == 3);
310 let curr = dogslice.index_axis(Axis(0), 1);
311
312 let height = curr.nrows() as i32;
313 let width = curr.ncols() as i32;
314
315 if height < 2 * IMAGE_BORDER || width < 2 * IMAGE_BORDER {
316 return Box::new(std::iter::empty());
317 }
318 let y_end = height - IMAGE_BORDER;
319 let y_begin = IMAGE_BORDER;
320
321 let x_end = width - IMAGE_BORDER;
322 let x_begin = IMAGE_BORDER;
323 assert!(x_end >= IMAGE_BORDER);
324 let extrema: Vec<_> = (y_begin..y_end)
325 .flat_map(move |y_initial| {
326 (x_begin..x_end)
327 .filter(move |x_initial| {
328 point_is_local_extremum(dogslice, *x_initial as usize, y_initial as usize)
329 })
330 .map(move |x_initial| (x_initial, y_initial))
331 })
332 .collect();
333
334 let result_iter = extrema.into_iter().flat_map(move |(x_initial, y_initial)| {
335 let dogslice = dog.slice(s![scale_in_octave - 1..scale_in_octave + 2, .., ..]);
336 assert!(dogslice.shape()[0] == 3);
337 let InterpolateResult {
338 offset_scale,
339 offset_x,
340 offset_y,
341 point,
342 } = match interpolate_extremum(
343 dog.into(),
344 ScaleSpacePoint {
345 scale: scale_in_octave,
346 x: x_initial as usize,
347 y: y_initial as usize,
348 },
349 ) {
350 Some(r) => r,
351 None => return Box::new(std::iter::empty()) as Box<dyn Iterator<Item = _>>,
352 };
353
354 let dogslice = dog.slice(s![(point.scale - 1)..(point.scale + 2), .., ..]);
355 let curr = dogslice.index_axis(Axis(0), 1);
356 let contrast =
358 extremum_contrast(dogslice, point.x, point.y, offset_scale, offset_x, offset_y).abs();
359
360 if contrast * SCALES_PER_OCTAVE as f32 <= CONTRAST_THRESHOLD {
361 return Box::new(std::iter::empty()) as Box<dyn Iterator<Item = _>>;
362 }
363
364 if extremum_is_on_edge(curr, point) {
366 return Box::new(std::iter::empty()) as Box<dyn Iterator<Item = _>>;
367 }
368
369 let octave_scale_factor = 2_f32.powi(octave as i32);
370
371 let kp_scale = SIGMA_MIN as f32
373 * 2_f32.powf((point.scale as f32 + offset_scale) / SCALES_PER_OCTAVE as f32)
374 * 2.;
375
376 let kp_x = (point.x as f32 + offset_x) * octave_scale_factor;
377 let kp_y = (point.y as f32 + offset_y) * octave_scale_factor;
378 let radius: i32 = (3. * ORIENTATION_HISTOGRAM_RADIUS * kp_scale).round() as i32;
381 let hist = gradient_direction_histogram(
382 scale_space[octave].slice(s![point.scale, .., ..]),
383 point.x as u32,
384 point.y as u32,
385 radius,
386 LAMBDA_ORI * kp_scale,
387 ORIENTATION_HISTOGRAM_BINS,
388 );
389 let histogram_max = hist
390 .iter()
391 .copied()
392 .max_by(f32::total_cmp)
393 .expect("vec is not empty");
394 let localmax_threshold = histogram_max * ORIENTATION_HISTOGRAM_LOCALMAX_RATIO;
395
396 let kps_with_ref_orientation = (0..hist.len()).filter_map(move |k| {
398 let k_minus = if k > 0 { k - 1 } else { hist.len() - 1 };
401 let k_plus = if k < hist.len() - 1 { k + 1 } else { 0 };
402 let is_local_max = hist[k] > hist[k_minus] && hist[k] > hist[k_plus];
403 let is_close_to_global_max = hist[k] >= localmax_threshold;
404 if is_local_max && is_close_to_global_max {
405 let interp =
408 (hist[k_minus] - hist[k_plus]) / (hist[k_minus] - 2.0 * hist[k] + hist[k_plus]);
409 let bin: f32 = k as f32 + 0.5 * interp;
410 let bin = if bin < 0.0 {
411 hist.len() as f32 + bin
412 } else if bin >= hist.len() as f32 {
413 bin - hist.len() as f32
414 } else {
415 bin
416 };
417 let kp_angle: f32 = 360.0 - (360.0 / hist.len() as f32) * bin;
419 Some(SiftKeyPoint {
420 x: kp_x,
421 y: kp_y,
422 size: kp_scale * octave_scale_factor,
423 response: contrast,
424 octave,
425 scale: point.scale,
426 angle: kp_angle,
427 })
428 } else {
429 None
430 }
431 });
432 Box::new(kps_with_ref_orientation)
433 });
434 Box::new(result_iter)
435}
436
437fn point_is_local_extremum(dogslice: ArrayView3<f32>, x: usize, y: usize) -> bool {
438 #[inline(always)]
439 fn values_around(arr: &ArrayView2<f32>, y: usize, x: usize) -> impl Iterator<Item = f32> {
440 [
441 arr[(y - 1, x - 1)],
442 arr[(y - 1, x)],
443 arr[(y - 1, x + 1)],
444 arr[(y, x - 1)],
445 arr[(y, x + 1)],
446 arr[(y + 1, x - 1)],
447 arr[(y + 1, x)],
448 arr[(y + 1, x + 1)],
449 ]
450 .into_iter()
451 }
452
453 assert!(dogslice.shape()[0] == 3);
454 let prev = dogslice.index_axis(Axis(0), 0);
455 let curr = dogslice.index_axis(Axis(0), 1);
456 let next = dogslice.index_axis(Axis(0), 2);
457
458 let threshold: f32 = (0.5 * CONTRAST_THRESHOLD / SCALES_PER_OCTAVE as f32).floor();
461
462 assert!(x > 0 && y > 0 && x < curr.shape()[1] && y < curr.shape()[0]);
463
464 let val = curr[(y, x)];
465 if val.abs() <= threshold {
466 return false;
467 } else if val > 0.0 {
468 if val
469 >= values_around(&curr, y, x)
470 .max_by(f32::total_cmp)
471 .expect("sequence not empty")
472 && val
473 >= values_around(&prev, y, x)
474 .max_by(f32::total_cmp)
475 .expect("sequence not empty")
476 && val
477 >= values_around(&next, y, x)
478 .max_by(f32::total_cmp)
479 .expect("sequence not empty")
480 {
481 let _11p = prev[(y, x)];
482 let _11n = next[(y, x)];
483 return val >= _11p.max(_11n);
484 }
485 } else {
486 debug_assert!(val < 0.0);
487 if val
488 <= values_around(&curr, y, x)
489 .min_by(f32::total_cmp)
490 .expect("sequence not empty")
491 && val
492 <= values_around(&prev, y, x)
493 .min_by(f32::total_cmp)
494 .expect("sequence not empty")
495 && val
496 <= values_around(&next, y, x)
497 .min_by(f32::total_cmp)
498 .expect("sequence not empty")
499 {
500 let _11p = prev[(y, x)];
501 let _11n = next[(y, x)];
502 return val <= _11p.min(_11n);
503 }
504 }
505 false
506}
507
508#[derive(Copy, Clone)]
509struct InterpolateResult {
510 pub point: ScaleSpacePoint,
511 pub offset_scale: f32,
512 pub offset_x: f32,
513 pub offset_y: f32,
514}
515
516const MAX_INTERPOLATION_STEPS: usize = 5;
517
518fn interpolate_extremum(
526 dog: ArrayView3<f32>,
527 ScaleSpacePoint {
528 mut scale,
529 mut x,
530 mut y,
531 }: ScaleSpacePoint,
532) -> Option<InterpolateResult> {
533 assert!(dog.shape()[0] == SCALES_PER_OCTAVE + 2);
534 for _ in 0..MAX_INTERPOLATION_STEPS {
535 let prev = &dog.slice(s![scale - 1, .., ..]);
536 let curr = &dog.slice(s![scale, .., ..]);
537 let next = &dog.slice(s![scale + 1, .., ..]);
538
539 let g1 = (next[(y, x)] - prev[(y, x)]) / 2.;
541 let g2 = (curr[(y + 1, x)] - curr[(y - 1, x)]) / 2.;
542 let g3 = (curr[(y, x + 1)] - curr[(y, x - 1)]) / 2.;
543
544 let value2x = curr[(y, x)] * 2.;
546 let h11 = next[(y, x)] + prev[(y, x)] - value2x;
547 let h12 = (next[(y + 1, x)] - next[(y - 1, x)] - prev[(y + 1, x)] + prev[(y - 1, x)]) / 4.;
548 let h13 = (next[(y, x + 1)] - next[(y, x - 1)] - prev[(y, x + 1)] + prev[(y, x - 1)]) / 4.;
549 let h22 = curr[(y + 1, x)] + curr[(y - 1, x)] - value2x;
550 let h33 = curr[(y, x + 1)] + curr[(y, x - 1)] - value2x;
551 let h23 = (curr[(y + 1, x + 1)] - curr[(y + 1, x - 1)] - curr[(y - 1, x + 1)]
552 + curr[(y - 1, x - 1)])
553 / 4.;
554
555 let det = h11 * h22 * h33 - h11 * h23 * h23 - h12 * h12 * h33 + 2. * h12 * h13 * h23
557 - h13 * h13 * h22;
558 let hinv11 = (h22 * h33 - h23 * h23) / det;
559 let hinv12 = (h13 * h23 - h12 * h33) / det;
560 let hinv13 = (h12 * h23 - h13 * h22) / det;
561 let hinv22 = (h11 * h33 - h13 * h13) / det;
562 let hinv23 = (h12 * h13 - h11 * h23) / det;
563 let hinv33 = (h11 * h22 - h12 * h12) / det;
564
565 let offset_scale = -(hinv11 * g1 + hinv12 * g2 + hinv13 * g3);
568 let offset_x = -(hinv13 * g1 + hinv23 * g2 + hinv33 * g3);
569 let offset_y = -(hinv12 * g1 + hinv22 * g2 + hinv23 * g3);
570
571 let valid_interval = 0.5;
574 if offset_scale.abs() < valid_interval
575 && offset_x.abs() < valid_interval
576 && offset_y.abs() < valid_interval
577 {
578 return Some(InterpolateResult {
580 offset_scale,
581 offset_y,
582 offset_x,
583 point: ScaleSpacePoint { scale, y, x },
584 });
585 }
586 x = (x as isize + offset_x.round() as isize) as usize;
589 y = (y as isize + offset_y.round() as isize) as usize;
590 scale = (scale as isize + offset_scale.round() as isize) as usize;
591
592 if !(1..=SCALES_PER_OCTAVE).contains(&scale)
593 || x < IMAGE_BORDER as usize
594 || x >= curr.shape()[1] - IMAGE_BORDER as usize
595 || y < IMAGE_BORDER as usize
596 || y >= curr.shape()[0] - IMAGE_BORDER as usize
597 {
598 return None;
599 }
600 }
601 None
603}
604
605fn extremum_contrast(
607 dogslice: ArrayView3<f32>,
608 x: usize,
609 y: usize,
610 interp_offset_scale: f32,
611 interp_offset_x: f32,
612 interp_offset_y: f32,
613) -> f32 {
614 assert!(dogslice.shape()[0] == 3);
615 let prev = dogslice.index_axis(Axis(0), 0);
616 let curr = dogslice.index_axis(Axis(0), 1);
617 let next = dogslice.index_axis(Axis(0), 2);
618 let g1 = (next[(y, x)] - prev[(y, x)]) / 2.;
620 let g2 = (curr[(y + 1, x)] - curr[(y - 1, x)]) / 2.;
621 let g3 = (curr[(y, x + 1)] - curr[(y, x - 1)]) / 2.;
622 let interp = interp_offset_scale * g1 + interp_offset_y * g2 + interp_offset_x * g3;
624 curr[(y, x)] + interp / 2.
625 }
627
628fn extremum_is_on_edge(
631 dog_curr: ArrayView2<f32>,
632 ScaleSpacePoint { scale: _, y, x }: ScaleSpacePoint,
633) -> bool {
634 assert!(x > 0 && x < dog_curr.shape()[1] - 1);
635 assert!(y > 0 && y < dog_curr.shape()[0] - 1);
636 let val2x = dog_curr[(y, x)] * 2.0;
637 let h11 = dog_curr[(y + 1, x)] + dog_curr[(y - 1, x)] - val2x;
638 let d22 = dog_curr[(y, x + 1)] + dog_curr[(y, x - 1)] - val2x;
639
640 let h12 = (dog_curr[(y + 1, x + 1)] - dog_curr[(y + 1, x - 1)] - dog_curr[(y - 1, x + 1)]
641 + dog_curr[(y - 1, x - 1)])
642 / 4.;
643
644 let tr = d22 + h11;
645 let det = d22 * h11 - h12 * h12;
646 if det <= 0. {
647 return true;
648 }
649 (tr * tr * EDGE_THRESHOLD) > (EDGE_THRESHOLD + 1.0).powi(2) * det
653}
654
655fn gradient_direction_histogram(
658 img: ArrayView2<f32>,
659 x: u32,
660 y: u32,
661 radius: i32,
662 sigma: f32,
663 n_bins: usize,
664) -> Vec<f32> {
665 assert!(n_bins >= 2);
666 let grad_weight_scale = -1.0 / (2.0 * sigma * sigma);
668
669 let (grads_x, grads_y, grad_weights): (Vec<f32>, Vec<f32>, Vec<f32>) = (-radius..=radius)
672 .filter_map(|y_patch| {
673 if y_patch <= -(y as i32) {
674 return None;
675 }
676 let y: i64 = i64::from(y) + i64::from(y_patch);
677 if y <= 0 || y as usize >= img.shape()[0] - 1 {
678 return None;
679 }
680 Some((y as usize, y_patch))
681 })
682 .flat_map(|(y_img, y_patch)| {
683 (-radius..=radius)
684 .filter_map(|x_patch| {
685 if x_patch <= -(x as i32) {
686 return None;
687 }
688 let x = x as isize + x_patch as isize;
689 if x <= 0 || x as usize >= img.shape()[1] - 1 {
690 return None;
691 }
692 Some((x as usize, x_patch))
693 })
694 .map(move |(x_img, x_patch)| {
695 let dx = img[(y_img, x_img + 1)] - img[(y_img, x_img - 1)];
696 let dy = img[(y_img - 1, x_img)] - img[(y_img + 1, x_img)];
697 let w = (y_patch * y_patch + x_patch * x_patch) as f32 * grad_weight_scale;
699 (dx, dy, w)
700 })
701 })
702 .multiunzip();
703
704 let grad_weights = grad_weights.into_iter().map(|x| x.exp());
706 let magnitudes = grads_x
708 .iter()
709 .zip(&grads_y)
710 .map(|(x, y)| (x * x + y * y).sqrt());
711 let orientations = grads_x
712 .iter()
713 .copied()
714 .zip(&grads_y)
715 .map(|(x, y)| f64::from(*y).atan2(x.into()) as f32);
716
717 let bin_angle_step = n_bins as f32 / (PI32 * 2.);
719 let hist_bin = orientations.into_iter().map(|ori| {
721 assert!((-PI32..=PI32).contains(&ori));
722 let raw_bin = bin_angle_step * ori;
723 assert!(-(n_bins as f32) / 2. <= raw_bin && raw_bin <= n_bins as f32 / 2.);
726 let bin: i32 = raw_bin.round() as i32;
727 if bin >= n_bins as i32 {
728 (bin - n_bins as i32) as usize
729 } else if bin < 0 {
730 assert!(bin + n_bins as i32 >= 0);
731 (bin + n_bins as i32) as usize
732 } else {
733 bin as usize
734 }
735 });
736
737 let mut raw_hist = vec![0.0; n_bins + 4];
743 izip!(hist_bin, magnitudes, grad_weights).for_each(|(bin, mag, weight)| {
744 raw_hist[bin + 2] += weight * mag;
745 });
746 raw_hist[1] = raw_hist[n_bins + 1];
747 raw_hist[0] = raw_hist[n_bins];
748 raw_hist[n_bins + 2] = raw_hist[2];
749 raw_hist[n_bins + 3] = raw_hist[3];
750 let mut hist = vec![0.; n_bins];
751 for i in 2..n_bins + 2 {
752 hist[i - 2] = (raw_hist[i - 2] + raw_hist[i + 2]) * (1. / 16.)
753 + (raw_hist[i - 1] + raw_hist[i + 1]) * (4. / 16.)
754 + raw_hist[i] * 6. / 16.;
755 }
756 hist
757}
758
759fn compute_descriptors(scale_space: &[Array3<f32>], keypoints: &[SiftKeyPoint]) -> Array2<u8> {
760 let mut desc = Array2::zeros((keypoints.len(), DESCRIPTOR_SIZE));
761 desc.rows_mut()
762 .into_iter()
763 .zip(keypoints)
764 .for_each(|(row, kp)| {
765 let img = &scale_space[kp.octave].index_axis(Axis(0), kp.scale);
766 let angle = 360.0 - kp.angle;
767 let octave_scale_factor = 2_f32.powi(-(kp.octave as i32));
769 let kp_size = kp.size * octave_scale_factor;
770 let kpdesc = compute_descriptor(
771 img,
772 kp.x * octave_scale_factor,
773 kp.y * octave_scale_factor,
774 kp_size,
775 angle,
776 );
777 row.into_iter()
778 .zip(kpdesc)
779 .for_each(|(el, descriptor_component)| *el = descriptor_component);
780 });
781 desc
782}
783
784#[doc(hidden)]
785pub fn compute_descriptor(
786 img: &ArrayView2<f32>,
787 x: f32,
788 y: f32,
789 scale: f32,
790 orientation: f32,
791) -> impl IntoIterator<Item = u8> {
792 let n_hist = DESCRIPTOR_N_HISTOGRAMS;
793 let n_bins = DESCRIPTOR_N_BINS;
794 let height = img.shape()[0];
795 let width = img.shape()[1];
796 let x = x.round() as usize;
797 let y = y.round() as usize;
798 const BIN_ANGLE_STEP: f32 = DESCRIPTOR_N_BINS as f32 / 360.0;
799 let hist_width = LAMBDA_DESCR * scale;
800 let radius = (LAMBDA_DESCR * scale * 2_f32.sqrt() * (n_hist + 1) as f32 * 0.5).round() as i32;
801 let (sin_ori, cos_ori) = orientation.to_radians().sin_cos();
802 let (sin_ori_scaled, cos_ori_scaled) = (sin_ori / hist_width, cos_ori / hist_width);
803
804 let mut hist: Array3<f32> = Array3::zeros((n_hist + 2, n_hist + 2, DESCRIPTOR_N_BINS));
808
809 let (gradients_x, gradients_y, row_bins, col_bins, weights): (
810 Vec<_>,
811 Vec<_>,
812 Vec<_>,
813 Vec<_>,
814 Vec<_>,
815 ) = (-radius..=radius)
816 .flat_map(|y_in_window| {
817 (-radius..=radius).filter_map(move |x_in_window| {
818 let col_rotated: f32 =
820 x_in_window as f32 * cos_ori_scaled - y_in_window as f32 * sin_ori_scaled;
821 let row_rotated: f32 =
822 x_in_window as f32 * sin_ori_scaled + y_in_window as f32 * cos_ori_scaled;
823 let row_bin = row_rotated + (n_hist / 2) as f32;
826 let col_bin = col_rotated + (n_hist / 2) as f32;
827
828 let abs_y = y as i32 + y_in_window;
830 let abs_x = x as i32 + x_in_window;
831
832 if row_bin > -0.5
835 && row_bin < n_hist as f32 + 0.5
836 && col_bin > -0.5
837 && col_bin < n_hist as f32 + 0.5
838 && abs_y > 0
839 && abs_y < (height - 1) as i32
840 && abs_x > 0
841 && abs_x < (width - 1) as i32
842 {
843 let abs_y = abs_y as usize;
844 let abs_x = abs_x as usize;
845 let dx = img[(abs_y, abs_x + 1)] - img[(abs_y, abs_x - 1)];
846 let dy = img[(abs_y - 1, abs_x)] - img[(abs_y + 1, abs_x)];
847
848 let weight = col_rotated.powi(2) + row_rotated.powi(2);
851 Some((dx, dy, row_bin, col_bin, weight))
852 } else {
853 None
854 }
855 })
856 })
857 .multiunzip();
858 let weight_scale = -2. / (n_hist.pow(2) as f32);
860 let weights = weights
861 .into_iter()
862 .map(|x| (x * weight_scale).exp())
863 .collect_vec();
864 let normalized_orienations = gradients_x
866 .iter()
867 .zip(&gradients_y)
868 .map(|(x, y)| {
869 let x: f64 = *x as f64;
870 let y: f64 = *y as f64;
871 ((y.atan2(x).to_degrees() + 360.0) % 360.0) as f32 - orientation
872 })
873 .collect_vec();
874 let magnitude = gradients_x
876 .into_iter()
877 .zip(&gradients_y)
878 .map(|(x, y)| (x * x + y * y).sqrt())
879 .collect_vec();
880
881 izip!(
884 row_bins,
885 col_bins,
886 normalized_orienations,
887 magnitude,
888 weights
889 )
890 .for_each(|(row_bin, col_bin, orientation, mag, weight)| {
891 let row_bin = row_bin - 0.5;
894 let col_bin = col_bin - 0.5;
895 let mag = mag * weight;
896 let obin = orientation * BIN_ANGLE_STEP;
897 let row_floor = row_bin.floor();
898 let col_floor = col_bin.floor();
899 let ori_floor = obin.floor();
900 let row_frac = row_bin - row_floor;
901 let col_frac = col_bin - col_floor;
902 let ori_frac = obin - ori_floor;
903
904 let c1 = mag * row_frac;
907 let c0 = mag - c1;
908 let c11 = c1 * col_frac;
909 let c10 = c1 - c11;
910 let c01 = c0 * col_frac;
911 let c00 = c0 - c01;
912 let c111 = c11 * ori_frac;
913 let c110 = c11 - c111;
914 let c101 = c10 * ori_frac;
915 let c100 = c10 - c101;
916 let c011 = c01 * ori_frac;
917 let c010 = c01 - c011;
918 let c001 = c00 * ori_frac;
919 let c000 = c00 - c001;
920
921 let row_floor_p1 = (row_floor + 1.) as usize;
922 let col_floor_p1 = (col_floor + 1.) as usize;
923 let row_floor_p2 = (row_floor + 2.) as usize;
924 let col_floor_p2 = (col_floor + 2.) as usize;
925 let ori_floor = if ori_floor < 0. {
927 ori_floor + n_bins as f32
928 } else if ori_floor >= n_bins as f32 {
929 ori_floor - n_bins as f32
930 } else {
931 ori_floor
932 } as usize;
933 let ori_floor_p1 = if ori_floor + 1 >= n_bins {
934 0
936 } else {
937 ori_floor + 1
938 };
939
940 hist[(row_floor_p1, col_floor_p1, ori_floor)] += c000;
941 hist[(row_floor_p1, col_floor_p1, ori_floor_p1)] += c001;
942 hist[(row_floor_p1, col_floor_p2, ori_floor)] += c010;
943 hist[(row_floor_p1, col_floor_p2, ori_floor_p1)] += c011;
944 hist[(row_floor_p2, col_floor_p1, ori_floor)] += c100;
945 hist[(row_floor_p2, col_floor_p1, ori_floor_p1)] += c101;
946 hist[(row_floor_p2, col_floor_p2, ori_floor)] += c110;
947 hist[(row_floor_p2, col_floor_p2, ori_floor_p1)] += c111;
948 });
949
950 #[allow(clippy::reversed_empty_ranges)]
951 let mut hist_flat = hist.slice_move(s![1..-1, 1..-1, ..]).into_flat();
952 let hist_sl = hist_flat.as_slice().expect("array is flat");
953
954 const DESCRIPTOR_MAGNITUDE_CAP: f32 = 0.2;
955 let mut l2_sq = 0.0;
956 hist_flat.iter().for_each(|x| l2_sq += x.powi(2));
957 let l2_uncapped = hist_sl
958 .chunks(4)
959 .map(|xs| xs.iter().map(|x| x.powi(2)).sum())
960 .reduce(|acc: f32, xs| acc + xs)
961 .expect("array is not empty")
962 .sqrt();
963 let component_cap = l2_uncapped * DESCRIPTOR_MAGNITUDE_CAP;
964
965 hist_flat.mapv_inplace(|v| v.min(component_cap));
967
968 let l2_capped = hist_flat
969 .iter()
970 .copied()
971 .chunks(4)
972 .into_iter()
973 .map(|xs| xs.into_iter().map(|x| x.powi(2)).sum())
974 .reduce(|acc: f32, xs| acc + xs)
975 .expect("array is not empty")
976 .sqrt();
977
978 const DESCRIPTOR_L2_NORM: f32 = 512.0;
979 let l2_normalizer = DESCRIPTOR_L2_NORM / l2_capped.max(f32::EPSILON);
980
981 hist_flat.into_iter().map(move |x| {
983 let x = (x * l2_normalizer).round() as i32;
984 if x > (u8::MAX as i32) {
985 u8::MAX
986 } else {
987 x as u8
988 }
989 })
990}
991
992pub struct ImageprocProcessing;
994
995impl Processing for ImageprocProcessing {
996 fn gaussian_blur(img: &LumaFImage, sigma: f64) -> LumaFImage {
997 gaussian_blur_f32(img, sigma as f32)
998 }
999
1000 fn resize_linear(img: &LumaFImage, width: u32, height: u32) -> LumaFImage {
1001 resize(img, width, height, FilterType::Triangle)
1002 }
1003
1004 fn resize_nearest(img: &LumaFImage, width: u32, height: u32) -> LumaFImage {
1005 resize(img, width, height, FilterType::Nearest)
1006 }
1007}
1008
1009#[test]
1010fn sift_end2end() {
1011 fn do_test(img_bytes: &[u8]) -> (Vec<KeyPoint>, Vec<Vec<u8>>) {
1012 let img = match image::load_from_memory(img_bytes).unwrap().grayscale() {
1013 image::DynamicImage::ImageLuma8(img) => img,
1014 _ => panic!("wrong image type"),
1015 };
1016 let SiftResult {
1017 keypoints,
1018 descriptors,
1019 } = sift_with_processing::<OpenCVProcessing>(&img, None);
1020 let argsort = {
1021 let mut idxs = keypoints.iter().enumerate().collect_vec();
1022 idxs.sort_by(|(_, kp1), (_, kp2)| match kp1.x.total_cmp(&kp2.x) {
1023 std::cmp::Ordering::Equal => match kp1.y.total_cmp(&kp2.y) {
1024 std::cmp::Ordering::Equal => kp1.size.total_cmp(&kp2.size),
1025 o => o,
1026 },
1027 o => o,
1028 });
1029 idxs.into_iter().map(|(i, _)| i).collect_vec()
1030 };
1031 let keypoints = argsort.iter().map(|i| keypoints[*i].clone()).collect_vec();
1032 let descriptors = argsort
1033 .iter()
1034 .map(|i| descriptors.row(*i).to_vec())
1035 .collect_vec();
1036 (keypoints, descriptors)
1037 }
1038 let tree_bytes = include_bytes!("../images/tree_small.jpg");
1039 let (keypoints, descriptors) = do_test(tree_bytes);
1040 insta::assert_yaml_snapshot!(
1041 keypoints,
1042 {
1043 ".*" => insta::rounded_redaction(4),
1044 }
1045 );
1046 insta::assert_yaml_snapshot!(descriptors,);
1047 let bird_bytes = include_bytes!("../images/bird_small.jpg");
1048 let (keypoints, descriptors) = do_test(bird_bytes);
1049 insta::assert_yaml_snapshot!(
1050 keypoints,
1051 {
1052 ".*" => insta::rounded_redaction(4),
1053 }
1054 );
1055 insta::assert_yaml_snapshot!(descriptors,);
1056}