sift_features/
lib.rs

1// This implementation of SIFT is derived from works by Rob Hess and Willow Garage Inc.
2// It is made available under the terms of the MIT license included in the root of this repository.
3//
4// Copyright 2006-2010 Rob Hess
5// Copyright 2009 Willow Garage Inc.
6// Copyright 2024 Thomas Nibler
7
8//! This crate contains an implemenation of the SIFT image descriptor.
9//! It aims to be compatible with the implementation found in OpenCV's `feature2d` module
10//! and you should be able to match features extracted with OpenCV and this crate.
11//!
12//! Useful resources:
13//! - [1]: [Lowe 1999](https://www.cs.ubc.ca/~lowe/papers/iccv99.pdf)
14//! - [2]: [Lowe 2004](https://www.cs.ubc.ca/~lowe/papers/ijcv04.pdf)
15//! - [3]: [Mikolajczyk 2004](https://robots.ox.ac.uk/~vgg/research/affine/det_eval_files/mikolajczyk_ijcv2004.pdf)
16//! - [4]: [Rey-Otero 2014](https://www.ipol.im/pub/art/2014/82/article.pdf)
17//!
18//! The code tries to follow [4] (Anatomy of the SIFT Method) in particular.
19//! It deviates in a few places to be compatible with the SIFT implementation OpenCV,
20//! namely how histograms are smoothed, angle computations and some details in how the final
21//! descriptor vector is calculated.
22
23use std::cmp::min;
24use std::f32::consts::PI as PI32;
25
26use image::buffer::ConvertBuffer;
27use image::imageops::{resize, FilterType};
28use image::{GrayImage, ImageBuffer, Luma};
29use imageproc::filter::gaussian_blur_f32;
30use itertools::{izip, Itertools};
31use ndarray::{prelude::*, s, Array2, Array3, Axis};
32use nshare::AsNdarray2;
33
34#[cfg(test)]
35mod opencv_processing;
36#[cfg(test)]
37pub use opencv_processing::*;
38
39#[derive(Debug, Clone, PartialEq)]
40#[cfg_attr(test, derive(serde::Serialize))]
41pub struct SiftResult {
42    pub keypoints: Vec<KeyPoint>,
43    /// Array of shape `(keypoints.len(), 128)` containing the SIFT feature vectors in the same
44    /// order as `keypoints`.
45    pub descriptors: Array2<u8>,
46}
47
48#[derive(Debug, Clone, PartialEq, PartialOrd)]
49#[cfg_attr(test, derive(serde::Serialize))]
50pub struct KeyPoint {
51    pub x: f32,
52    pub y: f32,
53    pub size: f32,
54    pub angle: f32,
55    pub response: f32,
56}
57
58#[doc(hidden)]
59#[derive(Debug, Clone, PartialEq, PartialOrd)]
60pub struct SiftKeyPoint {
61    pub x: f32,
62    pub y: f32,
63    pub size: f32,
64    pub angle: f32,
65    pub response: f32,
66    pub octave: usize,
67    pub scale: usize,
68}
69
70/// Extract SIFT features using default blur and resize implementations.
71pub fn sift(img: &GrayImage, features_limit: Option<usize>) -> SiftResult {
72    sift_with_processing::<ImageprocProcessing>(img, features_limit)
73}
74
75/// Extract SIFT features using provided blur and resize implementations.
76pub fn sift_with_processing<P: Processing>(
77    img: &GrayImage,
78    features_limit: Option<usize>,
79) -> SiftResult {
80    sift_with_precomputed(&precompute_images::<P>(img), features_limit)
81}
82
83/// Basic image operations used by SIFT.
84/// For testing or benchmarking, it's useful to use exactly the same blur and interpolation
85/// procedures to obtain identical results and performance.
86pub trait Processing {
87    fn gaussian_blur(img: &LumaFImage, sigma: f64) -> LumaFImage;
88    fn resize_linear(img: &LumaFImage, width: u32, height: u32) -> LumaFImage;
89    fn resize_nearest(img: &LumaFImage, width: u32, height: u32) -> LumaFImage;
90}
91
92const SCALES_PER_OCTAVE: usize = 3;
93const CONTRAST_THRESHOLD: f32 = 0.04;
94const EDGE_THRESHOLD: f32 = 10.0;
95
96/// λori in [4], radius around a keypoint considered for the gradient orientation histogram.
97const ORIENTATION_HISTOGRAM_RADIUS: f32 = 1.5;
98/// 3λori rounded up. For points closer to the image bounds than this, no gradient orientation
99/// histogram can be compuited.
100const IMAGE_BORDER: i32 = 5;
101
102const ORIENTATION_HISTOGRAM_BINS: usize = 36;
103/// λ_ori in Eq. (19) in [4]
104const LAMBDA_ORI: f32 = 1.5;
105const LAMBDA_DESCR: f32 = 3.0;
106
107// See Section 4.2 in [4]
108const DESCRIPTOR_N_HISTOGRAMS: usize = 4;
109// See Section 4.2 in [4]
110const DESCRIPTOR_N_BINS: usize = 8;
111const DESCRIPTOR_SIZE: usize =
112    DESCRIPTOR_N_HISTOGRAMS * DESCRIPTOR_N_HISTOGRAMS * DESCRIPTOR_N_BINS;
113
114type LumaFImage = ImageBuffer<Luma<f32>, Vec<f32>>;
115
116#[derive(Debug, Copy, Clone, PartialEq, Eq)]
117struct ScaleSpacePoint {
118    pub scale: usize,
119    pub x: usize,
120    pub y: usize,
121}
122
123#[doc(hidden)]
124pub struct PrecomputedImages {
125    pub scale_space: Vec<Array3<f32>>,
126    pub dog: Vec<Array3<f32>>,
127    pub n_octaves: usize,
128}
129
130#[doc(hidden)]
131pub fn precompute_images<P: Processing>(img: &GrayImage) -> PrecomputedImages {
132    let seed = create_seed_image::<P>(img);
133    let min_axis = min(seed.width(), seed.height());
134    let n_octaves: usize = ((min_axis as f32).log2() - 2.0).round() as usize + 1;
135
136    let scale_space = build_gaussian_scale_space::<P>(seed, n_octaves);
137    let dog = build_dog(n_octaves, &scale_space);
138    PrecomputedImages {
139        n_octaves,
140        scale_space,
141        dog,
142    }
143}
144
145// Image preprocessing pulled out for easier benchmarking
146#[doc(hidden)]
147pub fn sift_with_precomputed(
148    PrecomputedImages {
149        scale_space,
150        dog,
151        n_octaves,
152    }: &PrecomputedImages,
153    features_limit: Option<usize>,
154) -> SiftResult {
155    let mut keypoints: Vec<SiftKeyPoint> = find_keypoints(*n_octaves, scale_space, dog).collect();
156    if let Some(limit) = features_limit {
157        if limit < keypoints.len() {
158            keypoints.sort_unstable_by(|kp1, kp2| kp2.response.total_cmp(&kp1.response));
159            keypoints.truncate(limit);
160        }
161    }
162    let desc = compute_descriptors(scale_space, &keypoints);
163    SiftResult {
164        keypoints: keypoints
165            .into_iter()
166            .map(|kp| KeyPoint {
167                // Undo initial upsampling by 2x for seed image
168                x: kp.x * DELTA_MIN,
169                y: kp.y * DELTA_MIN,
170                size: kp.size * DELTA_MIN,
171                angle: kp.angle,
172                response: kp.response,
173            })
174            .collect(),
175        descriptors: desc,
176    }
177}
178
179/// Assumed blur level of input image
180/// See Section 2.2 in [4].
181const SIGMA_IN: f64 = 0.5;
182
183/// Blur level of the seed image.
184/// See Section 2.2 in [4].
185const SIGMA_MIN: f64 = 0.8;
186
187/// Inverse of the subsampling factor of first image in the gaussian scale space.
188/// See Section 2.2 in [4].
189const INV_DELTA_MIN: u32 = 2;
190
191/// Subsampling factor of first image in the gaussian scale space.
192/// See Section 2.2 in [4].
193const DELTA_MIN: f32 = 0.5;
194
195/// Compute upsampled and blurred seed image as described in [4], Eq. (6).
196fn create_seed_image<P: Processing>(img: &GrayImage) -> ImageBuffer<Luma<f32>, Vec<f32>> {
197    // float image with pixel values [0; 1];
198    let img_f32: LumaFImage = img.convert();
199    // Initial upsampling step by DELTA_MIN
200    // OpenCV doesn't use their normal resize function but instead `warpAffine` to upscale 2x. Why?
201    let img_2x = P::resize_linear(
202        &img_f32,
203        img_f32.width() * INV_DELTA_MIN,
204        img_f32.height() * INV_DELTA_MIN,
205    );
206
207    let sigma = (SIGMA_MIN * SIGMA_MIN - SIGMA_IN * SIGMA_IN).sqrt() * INV_DELTA_MIN as f64;
208
209    P::gaussian_blur(&img_2x, sigma)
210}
211
212/// See Section 2.2 in [4]
213fn build_gaussian_scale_space<P: Processing>(
214    seed_img: LumaFImage,
215    n_octaves: usize,
216) -> Vec<Array3<f32>> {
217    // Geometric series of blur sigmas within an octave as given in Eq. (7).
218    // Each octave contains 3 additional images 0, SCALES_PER_OCTAVE+1, SCALES_PER_OCTAVE+2,
219    // hence the +3 here and everywhere else.
220    let m: f64 = 2_f64.powf(2.0 / SCALES_PER_OCTAVE as f64);
221    let sigmas: Vec<f64> = (0..(SCALES_PER_OCTAVE as i32 + 3))
222        .map(|s| {
223            // right term under square root
224            let a = m.powi(s - 1);
225            // left term under square root
226            let b = a * m;
227            (b - a).sqrt() * SIGMA_MIN * INV_DELTA_MIN as f64
228        })
229        .collect();
230    let create_octave = |initial: LumaFImage| {
231        let mut imgs = Vec::with_capacity(SCALES_PER_OCTAVE + 3);
232        imgs.push(initial);
233        sigmas.iter().skip(1).for_each(|sigma| {
234            let prev = imgs.last().unwrap();
235            imgs.push(P::gaussian_blur(prev, *sigma));
236        });
237        imgs
238    };
239    let mut scale_space: Vec<Vec<LumaFImage>> = Vec::with_capacity(n_octaves);
240    scale_space.push(create_octave(seed_img));
241    for _ in 1..n_octaves {
242        // The first image of each octave is the last (ignoring the two additional posterior ones)
243        // image of the previous octave subsampled by a factor of 2.
244        // See Eq. (8) in [4].
245        let last_octave = &scale_space.last().unwrap();
246        let initial = &last_octave[last_octave.len() - 3];
247        let scaled_half = P::resize_nearest(initial, initial.width() / 2, initial.height() / 2);
248        scale_space.push(create_octave(scaled_half));
249    }
250    assert_eq!(scale_space.len(), n_octaves);
251    scale_space
252        .iter()
253        .map(|octave| {
254            assert!(octave
255                .windows(2)
256                .all(|w| w[0].width() == w[1].width() && w[0].height() == w[1].height()));
257            assert_eq!(octave.len(), SCALES_PER_OCTAVE + 3);
258            let width = octave[0].width() as usize;
259            let height = octave[0].height() as usize;
260            let mut mat: Array3<f32> = Array3::zeros((SCALES_PER_OCTAVE + 3, height, width));
261            octave.iter().enumerate().for_each(|(i, img)| {
262                mat.slice_mut(s![i, .., ..]).assign(&img.as_ndarray2());
263            });
264            mat
265        })
266        .collect()
267}
268
269/// Difference of Gaussians, woof.
270/// See Section 3.1 in [4]
271fn build_dog(n_octaves: usize, scale_space: &[Array3<f32>]) -> Vec<Array3<f32>> {
272    assert!(scale_space.len() == n_octaves);
273    let dog: Vec<Array3<f32>> = scale_space
274        .iter()
275        .map(|octave| &octave.slice(s![1.., .., ..]) - &octave.slice(s![..-1, .., ..]))
276        .collect();
277    assert!(dog.iter().all(|d| d.shape()[0] == SCALES_PER_OCTAVE + 2));
278    dog
279}
280
281fn find_keypoints<'a>(
282    n_octaves: usize,
283    scale_space: &'a [Array3<f32>],
284    dogs: &'a [Array3<f32>],
285) -> impl Iterator<Item = SiftKeyPoint> + 'a {
286    assert!(scale_space.len() == n_octaves);
287    (0..n_octaves).flat_map(move |octave| {
288        let dog = &dogs[octave];
289        assert!(dog.shape()[0] == SCALES_PER_OCTAVE + 2);
290        (1..=SCALES_PER_OCTAVE).flat_map(move |scale_in_octave| {
291            find_extrema_in_dog_img(dog, scale_space, octave, scale_in_octave)
292        })
293    })
294}
295
296/// t in Section 4.1.C
297const ORIENTATION_HISTOGRAM_LOCALMAX_RATIO: f32 = 0.8;
298
299fn find_extrema_in_dog_img<'a>(
300    dog: &'a Array3<f32>,
301    scale_space: &'a [Array3<f32>],
302    octave: usize,
303    scale_in_octave: usize,
304) -> Box<dyn Iterator<Item = SiftKeyPoint> + 'a> {
305    assert!(dog.shape()[0] == SCALES_PER_OCTAVE + 2);
306    assert!(scale_in_octave > 0);
307    assert!(scale_in_octave < dog.shape()[0] - 1);
308    let dogslice = dog.slice(s![scale_in_octave - 1..scale_in_octave + 2, .., ..]);
309    assert!(dogslice.shape()[0] == 3);
310    let curr = dogslice.index_axis(Axis(0), 1);
311
312    let height = curr.nrows() as i32;
313    let width = curr.ncols() as i32;
314
315    if height < 2 * IMAGE_BORDER || width < 2 * IMAGE_BORDER {
316        return Box::new(std::iter::empty());
317    }
318    let y_end = height - IMAGE_BORDER;
319    let y_begin = IMAGE_BORDER;
320
321    let x_end = width - IMAGE_BORDER;
322    let x_begin = IMAGE_BORDER;
323    assert!(x_end >= IMAGE_BORDER);
324    let extrema: Vec<_> = (y_begin..y_end)
325        .flat_map(move |y_initial| {
326            (x_begin..x_end)
327                .filter(move |x_initial| {
328                    point_is_local_extremum(dogslice, *x_initial as usize, y_initial as usize)
329                })
330                .map(move |x_initial| (x_initial, y_initial))
331        })
332        .collect();
333
334    let result_iter = extrema.into_iter().flat_map(move |(x_initial, y_initial)| {
335        let dogslice = dog.slice(s![scale_in_octave - 1..scale_in_octave + 2, .., ..]);
336        assert!(dogslice.shape()[0] == 3);
337        let InterpolateResult {
338            offset_scale,
339            offset_x,
340            offset_y,
341            point,
342        } = match interpolate_extremum(
343            dog.into(),
344            ScaleSpacePoint {
345                scale: scale_in_octave,
346                x: x_initial as usize,
347                y: y_initial as usize,
348            },
349        ) {
350            Some(r) => r,
351            None => return Box::new(std::iter::empty()) as Box<dyn Iterator<Item = _>>,
352        };
353
354        let dogslice = dog.slice(s![(point.scale - 1)..(point.scale + 2), .., ..]);
355        let curr = dogslice.index_axis(Axis(0), 1);
356        // discard low contrast extrema
357        let contrast =
358            extremum_contrast(dogslice, point.x, point.y, offset_scale, offset_x, offset_y).abs();
359
360        if contrast * SCALES_PER_OCTAVE as f32 <= CONTRAST_THRESHOLD {
361            return Box::new(std::iter::empty()) as Box<dyn Iterator<Item = _>>;
362        }
363
364        // Discard extrema located on edges
365        if extremum_is_on_edge(curr, point) {
366            return Box::new(std::iter::empty()) as Box<dyn Iterator<Item = _>>;
367        }
368
369        let octave_scale_factor = 2_f32.powi(octave as i32);
370
371        // Called sigma in [4]
372        let kp_scale = SIGMA_MIN as f32
373            * 2_f32.powf((point.scale as f32 + offset_scale) / SCALES_PER_OCTAVE as f32)
374            * 2.;
375
376        let kp_x = (point.x as f32 + offset_x) * octave_scale_factor;
377        let kp_y = (point.y as f32 + offset_y) * octave_scale_factor;
378        // Side length of patch over which gradient orientation histogram is computed.
379        // See Eq. (19) in [4]
380        let radius: i32 = (3. * ORIENTATION_HISTOGRAM_RADIUS * kp_scale).round() as i32;
381        let hist = gradient_direction_histogram(
382            scale_space[octave].slice(s![point.scale, .., ..]),
383            point.x as u32,
384            point.y as u32,
385            radius,
386            LAMBDA_ORI * kp_scale,
387            ORIENTATION_HISTOGRAM_BINS,
388        );
389        let histogram_max = hist
390            .iter()
391            .copied()
392            .max_by(f32::total_cmp)
393            .expect("vec is not empty");
394        let localmax_threshold = histogram_max * ORIENTATION_HISTOGRAM_LOCALMAX_RATIO;
395
396        // Extract keypoint reference orientations, Section 4.1.C in [4]
397        let kps_with_ref_orientation = (0..hist.len()).filter_map(move |k| {
398            // h_k- and h_k+ in [4].
399            // Histogram indices wrap around since bins correspond to angles.
400            let k_minus = if k > 0 { k - 1 } else { hist.len() - 1 };
401            let k_plus = if k < hist.len() - 1 { k + 1 } else { 0 };
402            let is_local_max = hist[k] > hist[k_minus] && hist[k] > hist[k_plus];
403            let is_close_to_global_max = hist[k] >= localmax_threshold;
404            if is_local_max && is_close_to_global_max {
405                // argmax of the quadratic function interpolating h_k-, h_k, h_k+
406                // See Eq. (23) in [4]
407                let interp =
408                    (hist[k_minus] - hist[k_plus]) / (hist[k_minus] - 2.0 * hist[k] + hist[k_plus]);
409                let bin: f32 = k as f32 + 0.5 * interp;
410                let bin = if bin < 0.0 {
411                    hist.len() as f32 + bin
412                } else if bin >= hist.len() as f32 {
413                    bin - hist.len() as f32
414                } else {
415                    bin
416                };
417                // The angles are shuffled around to match OpenCV
418                let kp_angle: f32 = 360.0 - (360.0 / hist.len() as f32) * bin;
419                Some(SiftKeyPoint {
420                    x: kp_x,
421                    y: kp_y,
422                    size: kp_scale * octave_scale_factor,
423                    response: contrast,
424                    octave,
425                    scale: point.scale,
426                    angle: kp_angle,
427                })
428            } else {
429                None
430            }
431        });
432        Box::new(kps_with_ref_orientation)
433    });
434    Box::new(result_iter)
435}
436
437fn point_is_local_extremum(dogslice: ArrayView3<f32>, x: usize, y: usize) -> bool {
438    #[inline(always)]
439    fn values_around(arr: &ArrayView2<f32>, y: usize, x: usize) -> impl Iterator<Item = f32> {
440        [
441            arr[(y - 1, x - 1)],
442            arr[(y - 1, x)],
443            arr[(y - 1, x + 1)],
444            arr[(y, x - 1)],
445            arr[(y, x + 1)],
446            arr[(y + 1, x - 1)],
447            arr[(y + 1, x)],
448            arr[(y + 1, x + 1)],
449        ]
450        .into_iter()
451    }
452
453    assert!(dogslice.shape()[0] == 3);
454    let prev = dogslice.index_axis(Axis(0), 0);
455    let curr = dogslice.index_axis(Axis(0), 1);
456    let next = dogslice.index_axis(Axis(0), 2);
457
458    // Discard extrema with values below this threshold.
459    // This is taken from OpenCV, but Section 3.3 in [4] uses different values.
460    let threshold: f32 = (0.5 * CONTRAST_THRESHOLD / SCALES_PER_OCTAVE as f32).floor();
461
462    assert!(x > 0 && y > 0 && x < curr.shape()[1] && y < curr.shape()[0]);
463
464    let val = curr[(y, x)];
465    if val.abs() <= threshold {
466        return false;
467    } else if val > 0.0 {
468        if val
469            >= values_around(&curr, y, x)
470                .max_by(f32::total_cmp)
471                .expect("sequence not empty")
472            && val
473                >= values_around(&prev, y, x)
474                    .max_by(f32::total_cmp)
475                    .expect("sequence not empty")
476            && val
477                >= values_around(&next, y, x)
478                    .max_by(f32::total_cmp)
479                    .expect("sequence not empty")
480        {
481            let _11p = prev[(y, x)];
482            let _11n = next[(y, x)];
483            return val >= _11p.max(_11n);
484        }
485    } else {
486        debug_assert!(val < 0.0);
487        if val
488            <= values_around(&curr, y, x)
489                .min_by(f32::total_cmp)
490                .expect("sequence not empty")
491            && val
492                <= values_around(&prev, y, x)
493                    .min_by(f32::total_cmp)
494                    .expect("sequence not empty")
495            && val
496                <= values_around(&next, y, x)
497                    .min_by(f32::total_cmp)
498                    .expect("sequence not empty")
499        {
500            let _11p = prev[(y, x)];
501            let _11n = next[(y, x)];
502            return val <= _11p.min(_11n);
503        }
504    }
505    false
506}
507
508#[derive(Copy, Clone)]
509struct InterpolateResult {
510    pub point: ScaleSpacePoint,
511    pub offset_scale: f32,
512    pub offset_x: f32,
513    pub offset_y: f32,
514}
515
516const MAX_INTERPOLATION_STEPS: usize = 5;
517
518/// Scale space extrema are initially identified on the grid of discrete pixels in a particular
519/// image in the scale space. The real DoG function approximated by the stack of DoG images is continous
520/// though, and the actual extremum may not fall exactly on a sampling point (scale, row, column).
521/// To get a better approximation of the extremum's location, the second order Taylor expansion is
522/// used to fit the DoG function around a point and the local extremum of this quadratic is used as
523/// a keypoint.
524/// See P18-19 in [4].
525fn interpolate_extremum(
526    dog: ArrayView3<f32>,
527    ScaleSpacePoint {
528        mut scale,
529        mut x,
530        mut y,
531    }: ScaleSpacePoint,
532) -> Option<InterpolateResult> {
533    assert!(dog.shape()[0] == SCALES_PER_OCTAVE + 2);
534    for _ in 0..MAX_INTERPOLATION_STEPS {
535        let prev = &dog.slice(s![scale - 1, .., ..]);
536        let curr = &dog.slice(s![scale, .., ..]);
537        let next = &dog.slice(s![scale + 1, .., ..]);
538
539        // 3D Gradient
540        let g1 = (next[(y, x)] - prev[(y, x)]) / 2.;
541        let g2 = (curr[(y + 1, x)] - curr[(y - 1, x)]) / 2.;
542        let g3 = (curr[(y, x + 1)] - curr[(y, x - 1)]) / 2.;
543
544        // Hessian matrix
545        let value2x = curr[(y, x)] * 2.;
546        let h11 = next[(y, x)] + prev[(y, x)] - value2x;
547        let h12 = (next[(y + 1, x)] - next[(y - 1, x)] - prev[(y + 1, x)] + prev[(y - 1, x)]) / 4.;
548        let h13 = (next[(y, x + 1)] - next[(y, x - 1)] - prev[(y, x + 1)] + prev[(y, x - 1)]) / 4.;
549        let h22 = curr[(y + 1, x)] + curr[(y - 1, x)] - value2x;
550        let h33 = curr[(y, x + 1)] + curr[(y, x - 1)] - value2x;
551        let h23 = (curr[(y + 1, x + 1)] - curr[(y + 1, x - 1)] - curr[(y - 1, x + 1)]
552            + curr[(y - 1, x - 1)])
553            / 4.;
554
555        // Solve for α* as shown in Eq. (14) by inverting the hessian
556        let det = h11 * h22 * h33 - h11 * h23 * h23 - h12 * h12 * h33 + 2. * h12 * h13 * h23
557            - h13 * h13 * h22;
558        let hinv11 = (h22 * h33 - h23 * h23) / det;
559        let hinv12 = (h13 * h23 - h12 * h33) / det;
560        let hinv13 = (h12 * h23 - h13 * h22) / det;
561        let hinv22 = (h11 * h33 - h13 * h13) / det;
562        let hinv23 = (h12 * h13 - h11 * h23) / det;
563        let hinv33 = (h11 * h22 - h12 * h12) / det;
564
565        // dot product of gradient vector with hessian inverse.
566        // Solution vector α* is (offset_scale, offset_row, offset_col)
567        let offset_scale = -(hinv11 * g1 + hinv12 * g2 + hinv13 * g3);
568        let offset_x = -(hinv13 * g1 + hinv23 * g2 + hinv33 * g3);
569        let offset_y = -(hinv12 * g1 + hinv22 * g2 + hinv23 * g3);
570
571        // If offsets are outside the interval [-0.5; 0.5] the extremum belongs
572        // to a different pixel or scale and should be rejected here.
573        let valid_interval = 0.5;
574        if offset_scale.abs() < valid_interval
575            && offset_x.abs() < valid_interval
576            && offset_y.abs() < valid_interval
577        {
578            // extremum of quadratic function is valid
579            return Some(InterpolateResult {
580                offset_scale,
581                offset_y,
582                offset_x,
583                point: ScaleSpacePoint { scale, y, x },
584            });
585        }
586        // Interpolation step rejected, update discrete extremum coordinates
587        // and retry interpolation.
588        x = (x as isize + offset_x.round() as isize) as usize;
589        y = (y as isize + offset_y.round() as isize) as usize;
590        scale = (scale as isize + offset_scale.round() as isize) as usize;
591
592        if !(1..=SCALES_PER_OCTAVE).contains(&scale)
593            || x < IMAGE_BORDER as usize
594            || x >= curr.shape()[1] - IMAGE_BORDER as usize
595            || y < IMAGE_BORDER as usize
596            || y >= curr.shape()[0] - IMAGE_BORDER as usize
597        {
598            return None;
599        }
600    }
601    // did not converge with in iteration limit
602    None
603}
604
605/// Based on P11, Eq. (2) and (3) in [2]. This step is not mentioned in [4].
606fn extremum_contrast(
607    dogslice: ArrayView3<f32>,
608    x: usize,
609    y: usize,
610    interp_offset_scale: f32,
611    interp_offset_x: f32,
612    interp_offset_y: f32,
613) -> f32 {
614    assert!(dogslice.shape()[0] == 3);
615    let prev = dogslice.index_axis(Axis(0), 0);
616    let curr = dogslice.index_axis(Axis(0), 1);
617    let next = dogslice.index_axis(Axis(0), 2);
618    // 3D Gradient
619    let g1 = (next[(y, x)] - prev[(y, x)]) / 2.;
620    let g2 = (curr[(y + 1, x)] - curr[(y - 1, x)]) / 2.;
621    let g3 = (curr[(y, x + 1)] - curr[(y, x - 1)]) / 2.;
622    // Value of the interpolating function at x̂ in [2], or α* in [4].
623    let interp = interp_offset_scale * g1 + interp_offset_y * g2 + interp_offset_x * g3;
624    curr[(y, x)] + interp / 2.
625    // extremum_contrast.abs() * SCALES_PER_OCTAVE as f32 > CONTRAST_THRESHOLD
626}
627
628/// Measures "edgeness" of a point the ratio between eigenvalues of the Hessian matrix.
629/// P382, Eq. (17) and Eq. (18) in [4]
630fn extremum_is_on_edge(
631    dog_curr: ArrayView2<f32>,
632    ScaleSpacePoint { scale: _, y, x }: ScaleSpacePoint,
633) -> bool {
634    assert!(x > 0 && x < dog_curr.shape()[1] - 1);
635    assert!(y > 0 && y < dog_curr.shape()[0] - 1);
636    let val2x = dog_curr[(y, x)] * 2.0;
637    let h11 = dog_curr[(y + 1, x)] + dog_curr[(y - 1, x)] - val2x;
638    let d22 = dog_curr[(y, x + 1)] + dog_curr[(y, x - 1)] - val2x;
639
640    let h12 = (dog_curr[(y + 1, x + 1)] - dog_curr[(y + 1, x - 1)] - dog_curr[(y - 1, x + 1)]
641        + dog_curr[(y - 1, x - 1)])
642        / 4.;
643
644    let tr = d22 + h11;
645    let det = d22 * h11 - h12 * h12;
646    if det <= 0. {
647        return true;
648    }
649    // edgeness = tr^2 / det
650    //     edgeness > (C_edge + 1)^2 / C_edge
651    // <=> tr^2 * C_edge > (C_edge + 1)^2 * det
652    (tr * tr * EDGE_THRESHOLD) > (EDGE_THRESHOLD + 1.0).powi(2) * det
653}
654
655/// Histogram of gradient directions in square patch of side length 2*radius around (row, col).
656/// See Section 4.1 in [4].
657fn gradient_direction_histogram(
658    img: ArrayView2<f32>,
659    x: u32,
660    y: u32,
661    radius: i32,
662    sigma: f32,
663    n_bins: usize,
664) -> Vec<f32> {
665    assert!(n_bins >= 2);
666    // Denominator of exponent in Eq. (20) in [4], used to compute weights
667    let grad_weight_scale = -1.0 / (2.0 * sigma * sigma);
668
669    // x/y gradients are weighted by their distance from the point at (row, col).
670    // weights holds the exponent of the weighting factor in Eq. (20) in [4]
671    let (grads_x, grads_y, grad_weights): (Vec<f32>, Vec<f32>, Vec<f32>) = (-radius..=radius)
672        .filter_map(|y_patch| {
673            if y_patch <= -(y as i32) {
674                return None;
675            }
676            let y: i64 = i64::from(y) + i64::from(y_patch);
677            if y <= 0 || y as usize >= img.shape()[0] - 1 {
678                return None;
679            }
680            Some((y as usize, y_patch))
681        })
682        .flat_map(|(y_img, y_patch)| {
683            (-radius..=radius)
684                .filter_map(|x_patch| {
685                    if x_patch <= -(x as i32) {
686                        return None;
687                    }
688                    let x = x as isize + x_patch as isize;
689                    if x <= 0 || x as usize >= img.shape()[1] - 1 {
690                        return None;
691                    }
692                    Some((x as usize, x_patch))
693                })
694                .map(move |(x_img, x_patch)| {
695                    let dx = img[(y_img, x_img + 1)] - img[(y_img, x_img - 1)];
696                    let dy = img[(y_img - 1, x_img)] - img[(y_img + 1, x_img)];
697                    // squared euclidian distance from (row, col) * weighting factor
698                    let w = (y_patch * y_patch + x_patch * x_patch) as f32 * grad_weight_scale;
699                    (dx, dy, w)
700                })
701        })
702        .multiunzip();
703
704    // Finalizing the term in Eq. (20) in [4]
705    let grad_weights = grad_weights.into_iter().map(|x| x.exp());
706    // gradient magnitudes
707    let magnitudes = grads_x
708        .iter()
709        .zip(&grads_y)
710        .map(|(x, y)| (x * x + y * y).sqrt());
711    let orientations = grads_x
712        .iter()
713        .copied()
714        .zip(&grads_y)
715        .map(|(x, y)| f64::from(*y).atan2(x.into()) as f32);
716
717    // Range of angles (radians) assigned to one histogram bin
718    let bin_angle_step = n_bins as f32 / (PI32 * 2.);
719    // Histogram bin index as given by Eq. (21) in [4]
720    let hist_bin = orientations.into_iter().map(|ori| {
721        assert!((-PI32..=PI32).contains(&ori));
722        let raw_bin = bin_angle_step * ori;
723        // raw_bin is in range [-PI * (n_bins / 2PI); PI * (n_bins / 2PI)]
724        //                    =[-n_bins / 2; n_bins / 2];
725        assert!(-(n_bins as f32) / 2. <= raw_bin && raw_bin <= n_bins as f32 / 2.);
726        let bin: i32 = raw_bin.round() as i32;
727        if bin >= n_bins as i32 {
728            (bin - n_bins as i32) as usize
729        } else if bin < 0 {
730            assert!(bin + n_bins as i32 >= 0);
731            (bin + n_bins as i32) as usize
732        } else {
733            bin as usize
734        }
735    });
736
737    // The gradient orientation histogram undergoes a final smoothing step.
738    // In [4], smoothing is done by convolving 6 times with a kernel of [1/3, 1/3, 1/3].
739    // In OpenCV, the kernel [1/16, 4/16, 6/16, 4/16, 1/16] is instead used one time only.
740    // raw_hist has length n_bins + 4 because the convolution is circular/cyclic and  wraps around,
741    // so we copy the first and last 2 values to the other end of the histogram to get this wrapping.
742    let mut raw_hist = vec![0.0; n_bins + 4];
743    izip!(hist_bin, magnitudes, grad_weights).for_each(|(bin, mag, weight)| {
744        raw_hist[bin + 2] += weight * mag;
745    });
746    raw_hist[1] = raw_hist[n_bins + 1];
747    raw_hist[0] = raw_hist[n_bins];
748    raw_hist[n_bins + 2] = raw_hist[2];
749    raw_hist[n_bins + 3] = raw_hist[3];
750    let mut hist = vec![0.; n_bins];
751    for i in 2..n_bins + 2 {
752        hist[i - 2] = (raw_hist[i - 2] + raw_hist[i + 2]) * (1. / 16.)
753            + (raw_hist[i - 1] + raw_hist[i + 1]) * (4. / 16.)
754            + raw_hist[i] * 6. / 16.;
755    }
756    hist
757}
758
759fn compute_descriptors(scale_space: &[Array3<f32>], keypoints: &[SiftKeyPoint]) -> Array2<u8> {
760    let mut desc = Array2::zeros((keypoints.len(), DESCRIPTOR_SIZE));
761    desc.rows_mut()
762        .into_iter()
763        .zip(keypoints)
764        .for_each(|(row, kp)| {
765            let img = &scale_space[kp.octave].index_axis(Axis(0), kp.scale);
766            let angle = 360.0 - kp.angle;
767            // δ_o in Eq. (9) in [4] with δ_min = 2
768            let octave_scale_factor = 2_f32.powi(-(kp.octave as i32));
769            let kp_size = kp.size * octave_scale_factor;
770            let kpdesc = compute_descriptor(
771                img,
772                kp.x * octave_scale_factor,
773                kp.y * octave_scale_factor,
774                kp_size,
775                angle,
776            );
777            row.into_iter()
778                .zip(kpdesc)
779                .for_each(|(el, descriptor_component)| *el = descriptor_component);
780        });
781    desc
782}
783
784#[doc(hidden)]
785pub fn compute_descriptor(
786    img: &ArrayView2<f32>,
787    x: f32,
788    y: f32,
789    scale: f32,
790    orientation: f32,
791) -> impl IntoIterator<Item = u8> {
792    let n_hist = DESCRIPTOR_N_HISTOGRAMS;
793    let n_bins = DESCRIPTOR_N_BINS;
794    let height = img.shape()[0];
795    let width = img.shape()[1];
796    let x = x.round() as usize;
797    let y = y.round() as usize;
798    const BIN_ANGLE_STEP: f32 = DESCRIPTOR_N_BINS as f32 / 360.0;
799    let hist_width = LAMBDA_DESCR * scale;
800    let radius = (LAMBDA_DESCR * scale * 2_f32.sqrt() * (n_hist + 1) as f32 * 0.5).round() as i32;
801    let (sin_ori, cos_ori) = orientation.to_radians().sin_cos();
802    let (sin_ori_scaled, cos_ori_scaled) = (sin_ori / hist_width, cos_ori / hist_width);
803
804    // Instead of 4*4 histograms, we work with 5*5 here so that the interpolation works out simpler
805    // at the borders (surely possible to do differently as well).
806    // The outermost histograms will be discarded.
807    let mut hist: Array3<f32> = Array3::zeros((n_hist + 2, n_hist + 2, DESCRIPTOR_N_BINS));
808
809    let (gradients_x, gradients_y, row_bins, col_bins, weights): (
810        Vec<_>,
811        Vec<_>,
812        Vec<_>,
813        Vec<_>,
814        Vec<_>,
815    ) = (-radius..=radius)
816        .flat_map(|y_in_window| {
817            (-radius..=radius).filter_map(move |x_in_window| {
818                // row and col in the keypoint's coordinates wrt its reference orientation
819                let col_rotated: f32 =
820                    x_in_window as f32 * cos_ori_scaled - y_in_window as f32 * sin_ori_scaled;
821                let row_rotated: f32 =
822                    x_in_window as f32 * sin_ori_scaled + y_in_window as f32 * cos_ori_scaled;
823                // Bin here means which of the 4*4 histograms the gradient at this point will
824                // contribute to. It is not a bin within a histogram.
825                let row_bin = row_rotated + (n_hist / 2) as f32;
826                let col_bin = col_rotated + (n_hist / 2) as f32;
827
828                // coordinates to read pixels from. No resampling here
829                let abs_y = y as i32 + y_in_window;
830                let abs_x = x as i32 + x_in_window;
831
832                // +/- 0.5 to check if the sample would contribute anything to the 4*4 histograms
833                // of interest with interpolation.
834                if row_bin > -0.5
835                    && row_bin < n_hist as f32 + 0.5
836                    && col_bin > -0.5
837                    && col_bin < n_hist as f32 + 0.5
838                    && abs_y > 0
839                    && abs_y < (height - 1) as i32
840                    && abs_x > 0
841                    && abs_x < (width - 1) as i32
842                {
843                    let abs_y = abs_y as usize;
844                    let abs_x = abs_x as usize;
845                    let dx = img[(abs_y, abs_x + 1)] - img[(abs_y, abs_x - 1)];
846                    let dy = img[(abs_y - 1, abs_x)] - img[(abs_y + 1, abs_x)];
847
848                    // Samples contribute less to histogram as they get further away.
849                    // Exponents in Eq. (27) in [4]
850                    let weight = col_rotated.powi(2) + row_rotated.powi(2);
851                    Some((dx, dy, row_bin, col_bin, weight))
852                } else {
853                    None
854                }
855            })
856        })
857        .multiunzip();
858    // Different weighting than in [4]
859    let weight_scale = -2. / (n_hist.pow(2) as f32);
860    let weights = weights
861        .into_iter()
862        .map(|x| (x * weight_scale).exp())
863        .collect_vec();
864    // Gradient orientations in patch normalized wrt to the keypoint's reference orientation.
865    let normalized_orienations = gradients_x
866        .iter()
867        .zip(&gradients_y)
868        .map(|(x, y)| {
869            let x: f64 = *x as f64;
870            let y: f64 = *y as f64;
871            ((y.atan2(x).to_degrees() + 360.0) % 360.0) as f32 - orientation
872        })
873        .collect_vec();
874    // Gradient magnitudes
875    let magnitude = gradients_x
876        .into_iter()
877        .zip(&gradients_y)
878        .map(|(x, y)| (x * x + y * y).sqrt())
879        .collect_vec();
880
881    // Spread each sample point's contribution to its 8 neighbouring histograms based on its distance
882    // from the histogram window's center and weighted by the sample's gradient magnitude.
883    izip!(
884        row_bins,
885        col_bins,
886        normalized_orienations,
887        magnitude,
888        weights
889    )
890    .for_each(|(row_bin, col_bin, orientation, mag, weight)| {
891        // Subtracting 0.5 here because the trilinear interpolation (the reverse actually)
892        // below works on the {-0.5, 0.5}^3 cube, but our histograms are located in {0, 1}^ cubes.
893        let row_bin = row_bin - 0.5;
894        let col_bin = col_bin - 0.5;
895        let mag = mag * weight;
896        let obin = orientation * BIN_ANGLE_STEP;
897        let row_floor = row_bin.floor();
898        let col_floor = col_bin.floor();
899        let ori_floor = obin.floor();
900        let row_frac = row_bin - row_floor;
901        let col_frac = col_bin - col_floor;
902        let ori_frac = obin - ori_floor;
903
904        // The numbers are to be seen as coordinates on a cube.
905        // Notation taken from https://en.wikipedia.org/wiki/Trilinear_interpolation.
906        let c1 = mag * row_frac;
907        let c0 = mag - c1;
908        let c11 = c1 * col_frac;
909        let c10 = c1 - c11;
910        let c01 = c0 * col_frac;
911        let c00 = c0 - c01;
912        let c111 = c11 * ori_frac;
913        let c110 = c11 - c111;
914        let c101 = c10 * ori_frac;
915        let c100 = c10 - c101;
916        let c011 = c01 * ori_frac;
917        let c010 = c01 - c011;
918        let c001 = c00 * ori_frac;
919        let c000 = c00 - c001;
920
921        let row_floor_p1 = (row_floor + 1.) as usize;
922        let col_floor_p1 = (col_floor + 1.) as usize;
923        let row_floor_p2 = (row_floor + 2.) as usize;
924        let col_floor_p2 = (col_floor + 2.) as usize;
925        // Histogram bin indices wrap around because angles
926        let ori_floor = if ori_floor < 0. {
927            ori_floor + n_bins as f32
928        } else if ori_floor >= n_bins as f32 {
929            ori_floor - n_bins as f32
930        } else {
931            ori_floor
932        } as usize;
933        let ori_floor_p1 = if ori_floor + 1 >= n_bins {
934            // wrap around to ori_floor + 1 - n_bins, can only be 0
935            0
936        } else {
937            ori_floor + 1
938        };
939
940        hist[(row_floor_p1, col_floor_p1, ori_floor)] += c000;
941        hist[(row_floor_p1, col_floor_p1, ori_floor_p1)] += c001;
942        hist[(row_floor_p1, col_floor_p2, ori_floor)] += c010;
943        hist[(row_floor_p1, col_floor_p2, ori_floor_p1)] += c011;
944        hist[(row_floor_p2, col_floor_p1, ori_floor)] += c100;
945        hist[(row_floor_p2, col_floor_p1, ori_floor_p1)] += c101;
946        hist[(row_floor_p2, col_floor_p2, ori_floor)] += c110;
947        hist[(row_floor_p2, col_floor_p2, ori_floor_p1)] += c111;
948    });
949
950    #[allow(clippy::reversed_empty_ranges)]
951    let mut hist_flat = hist.slice_move(s![1..-1, 1..-1, ..]).into_flat();
952    let hist_sl = hist_flat.as_slice().expect("array is flat");
953
954    const DESCRIPTOR_MAGNITUDE_CAP: f32 = 0.2;
955    let mut l2_sq = 0.0;
956    hist_flat.iter().for_each(|x| l2_sq += x.powi(2));
957    let l2_uncapped = hist_sl
958        .chunks(4)
959        .map(|xs| xs.iter().map(|x| x.powi(2)).sum())
960        .reduce(|acc: f32, xs| acc + xs)
961        .expect("array is not empty")
962        .sqrt();
963    let component_cap = l2_uncapped * DESCRIPTOR_MAGNITUDE_CAP;
964
965    // Components of the vector can not be larger than 0.2 * l2_norm
966    hist_flat.mapv_inplace(|v| v.min(component_cap));
967
968    let l2_capped = hist_flat
969        .iter()
970        .copied()
971        .chunks(4)
972        .into_iter()
973        .map(|xs| xs.into_iter().map(|x| x.powi(2)).sum())
974        .reduce(|acc: f32, xs| acc + xs)
975        .expect("array is not empty")
976        .sqrt();
977
978    const DESCRIPTOR_L2_NORM: f32 = 512.0;
979    let l2_normalizer = DESCRIPTOR_L2_NORM / l2_capped.max(f32::EPSILON);
980
981    // Saturating cast to u8
982    hist_flat.into_iter().map(move |x| {
983        let x = (x * l2_normalizer).round() as i32;
984        if x > (u8::MAX as i32) {
985            u8::MAX
986        } else {
987            x as u8
988        }
989    })
990}
991
992/// Uses `imageproc` implementations of gaussian blur and resizing.
993pub struct ImageprocProcessing;
994
995impl Processing for ImageprocProcessing {
996    fn gaussian_blur(img: &LumaFImage, sigma: f64) -> LumaFImage {
997        gaussian_blur_f32(img, sigma as f32)
998    }
999
1000    fn resize_linear(img: &LumaFImage, width: u32, height: u32) -> LumaFImage {
1001        resize(img, width, height, FilterType::Triangle)
1002    }
1003
1004    fn resize_nearest(img: &LumaFImage, width: u32, height: u32) -> LumaFImage {
1005        resize(img, width, height, FilterType::Nearest)
1006    }
1007}
1008
1009#[test]
1010fn sift_end2end() {
1011    fn do_test(img_bytes: &[u8]) -> (Vec<KeyPoint>, Vec<Vec<u8>>) {
1012        let img = match image::load_from_memory(img_bytes).unwrap().grayscale() {
1013            image::DynamicImage::ImageLuma8(img) => img,
1014            _ => panic!("wrong image type"),
1015        };
1016        let SiftResult {
1017            keypoints,
1018            descriptors,
1019        } = sift_with_processing::<OpenCVProcessing>(&img, None);
1020        let argsort = {
1021            let mut idxs = keypoints.iter().enumerate().collect_vec();
1022            idxs.sort_by(|(_, kp1), (_, kp2)| match kp1.x.total_cmp(&kp2.x) {
1023                std::cmp::Ordering::Equal => match kp1.y.total_cmp(&kp2.y) {
1024                    std::cmp::Ordering::Equal => kp1.size.total_cmp(&kp2.size),
1025                    o => o,
1026                },
1027                o => o,
1028            });
1029            idxs.into_iter().map(|(i, _)| i).collect_vec()
1030        };
1031        let keypoints = argsort.iter().map(|i| keypoints[*i].clone()).collect_vec();
1032        let descriptors = argsort
1033            .iter()
1034            .map(|i| descriptors.row(*i).to_vec())
1035            .collect_vec();
1036        (keypoints, descriptors)
1037    }
1038    let tree_bytes = include_bytes!("../images/tree_small.jpg");
1039    let (keypoints, descriptors) = do_test(tree_bytes);
1040    insta::assert_yaml_snapshot!(
1041        keypoints,
1042        {
1043            ".*" => insta::rounded_redaction(4),
1044        }
1045    );
1046    insta::assert_yaml_snapshot!(descriptors,);
1047    let bird_bytes = include_bytes!("../images/bird_small.jpg");
1048    let (keypoints, descriptors) = do_test(bird_bytes);
1049    insta::assert_yaml_snapshot!(
1050        keypoints,
1051        {
1052            ".*" => insta::rounded_redaction(4),
1053        }
1054    );
1055    insta::assert_yaml_snapshot!(descriptors,);
1056}