find_subimage/
lib.rs

1//! This crate provides basic functionality to find likely positions of a subimage within a larger
2//! image by calculating an image distance. It has a naive scalar implementation in rust, and a simd
3//! implementation that selects the best implementation based on cpu features at runtime in rust
4//! using the [simdeez](https://github.com/jackmott/simdeez) crate. It also provides an implementation which uses
5//! [OpenCV](https://opencv.org/)'s (A C++ library) matchTemplate function using the
6//! [opencv-rust](https://github.com/twistedfall/opencv-rust) crate through an optional off-by-default
7//! feature. It can also optionally convert images to grayscale before applying the algorithms.
8//!
9//! Here's a simple example showing how to use the API:
10//!
11//! ```
12//! # // If you modify this example, please also update it's copy in README.md
13//! use find_subimage::{Image, SubImageFinderState};
14//! // Make a dummy 128x128 black image with a red dot at (50, 0)
15//! let (w, h) = (128, 128);
16//! let mut rgb_image = vec![0u8; w * h * 3];
17//! rgb_image[50 * 3] = 250;
18//! // Make a dummy 32x32 black image
19//! // with a red dot at (0, 0)
20//! let (sub_w, sub_h) = (32, 32);
21//! let mut rgb_subimage = vec![0u8; sub_w * sub_h * 3];
22//! rgb_subimage[0] = 250;
23//!
24//! let mut finder = SubImageFinderState::new();
25//! // These are (x, y, distance) where x and y are the position within the larger image
26//! // and distance is the distance value, where a smaller distance means a more precise match
27//! let positions: &[(usize, usize, f32)] =
28//!     finder.find_subimage_positions((&rgb_image, w, h), (&rgb_subimage, sub_w, sub_h), 3);
29//! let max: Option<&(usize, usize, f32)> = positions
30//!     .iter()
31//!     .min_by(|(_, _, dist), (_, _, dist2)| dist.partial_cmp(dist2).unwrap());
32//! println!("The subimage was found at position {:?}", &max);
33//! assert_eq!(Some((50, 0)), max.map(|max| (max.0, max.1)));
34//! // find_subimage_positions actually returns the results sorted by distance already,
35//! // so we can skip finding the minimum
36//! assert_eq!(Some((50, 0)), positions.get(0).map(|max| (max.0, max.1)));
37//! ```
38//!
39//! The most important functions provided are [find_subimage_positions] and
40//! [find_subimage_positions_as_grayscale].
41//!
42//! You may find their "_with_backend" versions useful.
43//!
44//! By default, this library prunes results that are close together. You can disable (Set to 0) or
45//! tweak this using [with_pruning].
46//!
47//! You can look at the page for the [Backend] enum to learn about the possible backends.
48//!
49//! There are some examples in the /examples folder in the repository.
50//!
51//! [with_pruning]: SubImageFinderState::with_pruning
52//! [find_subimage_positions]: SubImageFinderState::find_subimage_positions
53//! [find_subimage_positions_as_grayscale]: SubImageFinderState::find_subimage_positions_as_grayscale
54
55/// A simple struct to group (bytes, width, height) arguments
56pub struct Image<'a> {
57    pub bytes: &'a [u8],
58    pub width: usize,
59    pub height: usize,
60}
61impl<'a> Image<'a> {
62    fn new(bytes: &'a [u8], width: usize, height: usize) -> Self {
63        Self {
64            bytes,
65            width,
66            height,
67        }
68    }
69}
70impl<'a, T: AsRef<[u8]>, A: Into<usize>, B: Into<usize>> From<(&'a T, A, B)> for Image<'a> {
71    fn from((bytes, width, height): (&'a T, A, B)) -> Self {
72        Image::new(bytes.as_ref(), width.into(), height.into())
73    }
74}
75
76/// The main context struct. This stores the necessary buffers for the search, eters.
77///
78/// u8 buffers are used if conversion to grayscale is necessary, and f32 buffers for the backends
79/// that require them.
80///
81/// There is also a Vec<(usize, usize, f32)> used to store results.
82pub struct SubImageFinderState {
83    positions_buffer: Vec<(usize, usize, f32)>,
84    backend: Backend,
85
86    prune_width_scale: f32,
87    prune_height_scale: f32,
88
89    f32buf_search_image: Vec<f32>,
90    f32buf_subimage: Vec<f32>,
91
92    u8buf_search_image: Vec<u8>,
93    u8buf_subimage: Vec<u8>,
94}
95
96/// The backend/algorithm to use.
97///
98/// There is an optional opencv backend, that uses the opencv-rust crate which depends on the OpenCV
99/// C++ library. This requires enabling the opencv feature in find-subimage.
100///
101/// There is another simdeez optional dependency, which uses the simdeez crate for a rust SIMD
102/// implementation. This is enabled by default.
103///
104/// The only implementation which cannot be disabled at present is the scalar one.
105#[derive(Clone, Copy)]
106pub enum Backend {
107    /// OpenCV SQDIFF_NORMED MatchTemplate
108    ///
109    /// Note that the threshold values for this backend use a different scale than the others.
110    #[cfg(feature = "opencv")]
111    OpenCV { threshold: f32 },
112    /// This should detect CPU features at runtime and use the best possible rust SIMD
113    /// implementation of SQDIFF_NORMED (square difference).
114    ///
115    /// step_x and y let you customize it to skip every Nth x or y coordinate in case you need less
116    /// accurate results, potentially giving large speedups.
117    #[cfg(feature = "simdeez")]
118    RuntimeDetectedSimd {
119        threshold: f32,
120        step_x: usize,
121        step_y: usize,
122    },
123    /// Scalar SQDIFF_NORMED (square difference) implementation.
124    ///
125    /// Slowest, should work anywhere and be reliable.
126    ///
127    /// Smallest in terms of generated code size.
128    ///
129    /// step_x and y let you customize it to skip every Nth x or y coordinate in case you need less
130    /// accurate results, potentially giving large speedups.
131    Scalar {
132        threshold: f32,
133        step_x: usize,
134        step_y: usize,
135    },
136}
137/// The default value used in [fn@SubImageFinderState::new_opencv]
138pub const OPENCV_DEFAULT_THRESHOLD: f32 = 0.05;
139/// The default value used in [SubImageFinderState::new] and [SubImageFinderState::default]
140pub const NONOPENCV_DEFAULT_THRESHOLD: f32 = 0.1;
141
142impl SubImageFinderState {
143    /// Create a SubImageFinderState
144    ///
145    /// This uses the [Backend::Scalar] backend by default, unless the "simdeez-default-new" e is
146    /// enabled (It is currently enabled by default).
147    ///
148    /// See the backend and with_backend methods to change the backend.
149    pub fn new() -> Self {
150        #[cfg(feature = "simdeez-default-new")]
151        let backend = Backend::RuntimeDetectedSimd {
152            threshold: NONOPENCV_DEFAULT_THRESHOLD,
153            step_x: 1,
154            step_y: 1,
155        };
156        #[cfg(not(feature = "simdeez-default-new"))]
157        let backend = Backend::Scalar {
158            threshold: NONOPENCV_DEFAULT_THRESHOLD,
159            step_x: 1,
160            step_y: 1,
161        };
162        Self {
163            positions_buffer: vec![],
164            f32buf_search_image: vec![],
165            f32buf_subimage: vec![],
166            u8buf_search_image: vec![],
167            u8buf_subimage: vec![],
168            prune_width_scale: 0.5f32,
169            prune_height_scale: 0.5f32,
170            backend,
171        }
172    }
173
174    /// Like [Self::new] but uses [Backend::OpenCV]
175    #[cfg(feature = "opencv")]
176    pub fn new_opencv(threshold: Option<f32>) -> Self {
177        let mut ret = Self::new();
178        ret.backend = Backend::OpenCV {
179            threshold: threshold.unwrap_or(OPENCV_DEFAULT_THRESHOLD),
180        };
181        ret
182    }
183
184    pub fn backend(&mut self) -> &Backend {
185        &self.backend
186    }
187
188    pub fn backend_mut(&mut self) -> &mut Backend {
189        &mut self.backend
190    }
191
192    /// Set the currently configured backend.
193    ///
194    /// See also [Self::with_backend]
195    pub fn set_backend(&mut self, new_backend: Backend) {
196        self.backend = new_backend;
197    }
198
199    /// Set the currently configured prune width/height scaling.
200    ///
201    /// For more information see [Self::with_pruning]
202    pub fn set_pruning(&mut self, prune_width_scale: f32, prune_height_scale: f32) {
203        self.prune_height_scale = prune_height_scale;
204        self.prune_width_scale = prune_width_scale;
205    }
206
207    /// Return a new state with the given backend
208    /// ```
209    /// use find_subimage::{Backend, SubImageFinderState};
210    /// let state = SubImageFinderState::new().with_backend(Backend::Scalar {
211    ///     threshold: 0.5,
212    ///     step_x: 2,
213    ///     step_y: 1,
214    /// });
215    /// ```
216    #[must_use]
217    pub fn with_backend(mut self, new_backend: Backend) -> Self {
218        self.set_backend(new_backend);
219        self
220    }
221
222    /// Return a new state with the given pruning width/height scaling parameters.
223    ///
224    /// These default to 0.5
225    #[must_use]
226    pub fn with_pruning(mut self, prune_width_scale: f32, prune_height_scale: f32) -> Self {
227        self.set_pruning(prune_width_scale, prune_height_scale);
228        self
229    }
230
231    /// Finds positions where the subimage is found within the search image. These positions
232    /// represent the top-right corner of the subimage.
233    ///
234    /// You can tweak the likelyhood of positions found with the backend's threshold. Note that the
235    /// threshold is backend-dependant.
236    ///
237    /// The `channel_count` argument should be the number of channels for both input images (For
238    /// example, 3 for an RGB image or 1 for grayscale).
239    ///
240    /// The input image can optionally be converted to grayscale before applying the algorithm, see
241    /// [Self::find_subimage_positions_as_grayscale].
242    ///
243    /// The third field of the tuples in the returned slice is the matching/distance value. Values
244    /// closer to 1 mean a fuzzier match, and closer to 0 a more exact match. These values are
245    /// returned sorted by distance, with the best matches first.
246    pub fn find_subimage_positions<'a, 'b, T: Into<Image<'a>>, U: Into<Image<'b>>>(
247        &mut self,
248        search_image: T,
249        subimage: U,
250        channel_count: u8,
251    ) -> &[(usize, usize, f32)] {
252        let backend = self.backend;
253        self.find_subimage_positions_with_backend(
254            search_image.into(),
255            subimage.into(),
256            &backend,
257            channel_count,
258        )
259    }
260
261    /// Like [Self::find_subimage_positions_as_grayscale] but lets you use a different backend
262    /// than the currently configured one.
263    pub fn find_subimage_positions_with_backend<'a, 'b, T: Into<Image<'a>>, U: Into<Image<'b>>>(
264        &mut self,
265        search_image: T,
266        subimage: U,
267        backend: &Backend,
268        channel_count: u8,
269    ) -> &[(usize, usize, f32)] {
270        self.find_subimage_positions_with_backend_impl(
271            search_image.into(),
272            subimage.into(),
273            backend,
274            false,
275            channel_count,
276            channel_count,
277        )
278    }
279
280    /// Like [Self::find_subimage_positions], but before finding positions it converts the images to
281    /// grayscale. This can speed up runtime, but depending on the images it can be harmful to
282    /// results.
283    ///
284    /// This is done using internal buffers. If you reuse a [SubImageFinderState] for multiple
285    /// images of the same size, it should only need to allocate once.
286    ///
287    /// If channel_count_subimage is None, channel_count_search is used in its place.
288    pub fn find_subimage_positions_as_grayscale<'a, 'b, T: Into<Image<'a>>, U: Into<Image<'b>>>(
289        &mut self,
290        search_image: T,
291        subimage: U,
292        channel_count_search: u8,
293        channel_count_subimage: Option<NonZeroU8>,
294    ) -> &[(usize, usize, f32)] {
295        let backend = self.backend;
296        self.find_subimage_positions_as_grayscale_with_backend(
297            search_image.into(),
298            subimage.into(),
299            &backend,
300            channel_count_search,
301            channel_count_subimage,
302        )
303    }
304
305    /// Like [Self::find_subimage_positions_as_grayscale] but lets you use a different backend
306    /// than the currently configured one.
307    pub fn find_subimage_positions_as_grayscale_with_backend<
308        'a,
309        'b,
310        T: Into<Image<'a>>,
311        U: Into<Image<'b>>,
312    >(
313        &mut self,
314        search_image: T,
315        subimage: U,
316        backend: &Backend,
317        channel_count_search: u8,
318        channel_count_subimage: Option<NonZeroU8>,
319    ) -> &[(usize, usize, f32)] {
320        self.find_subimage_positions_with_backend_impl(
321            search_image.into(),
322            subimage.into(),
323            backend,
324            true,
325            channel_count_search,
326            channel_count_subimage
327                .map(|x| x.get())
328                .unwrap_or(channel_count_search),
329        )
330    }
331
332    /// The main implementation of the algorithm.
333    ///
334    /// This runs the hot loop, performs grayscale conversion, calls the appropiate backend, and
335    /// prunes results at the end.
336    ///
337    /// All the public functions that find positions call into this.
338    fn find_subimage_positions_with_backend_impl(
339        &mut self,
340        search_image: Image,
341        subimage: Image,
342        backend: &Backend,
343        to_grayscale: bool,
344        search_image_channel_count: u8,
345        subimage_channel_count: u8,
346    ) -> &[(usize, usize, f32)] {
347        // If there is no grayscale conversion, channel counts should match
348        if !to_grayscale && search_image_channel_count != subimage_channel_count {
349            panic!(
350              "Attempted to find_subimage_positions with different channel counts. search:{} subimage:{}",
351              search_image_channel_count, subimage_channel_count
352            );
353        }
354
355        self.positions_buffer.clear();
356
357        let Image {
358            bytes: search_image,
359            width: search_width,
360            height: search_height,
361        } = search_image;
362        let Image {
363            bytes: subimage,
364            width: subimage_width,
365            height: subimage_height,
366        } = subimage;
367
368        let to_gray_sub = move |rgb: &[u8]| {
369            rgb.iter()
370                .map(|x| (*x as f32) / (subimage_channel_count as f32))
371                .sum::<f32>() as u8
372        };
373        let to_gray_search = move |rgb: &[u8]| {
374            rgb.iter()
375                .map(|x| (*x as f32) / (search_image_channel_count as f32))
376                .sum::<f32>() as u8
377        };
378        let to_f32 = |x: u8| x as f32;
379        let ref_to_f32 = |&x: &u8| x as f32;
380
381        match *backend {
382            #[cfg(feature = "simdeez")]
383            Backend::RuntimeDetectedSimd {
384                threshold,
385                step_x,
386                step_y,
387            } => {
388                self.f32buf_subimage.clear();
389                if to_grayscale && subimage_channel_count != 1 {
390                    self.f32buf_subimage.extend(
391                        subimage
392                            .chunks_exact(subimage_channel_count as usize)
393                            .map(to_gray_sub)
394                            .map(to_f32),
395                    );
396                } else {
397                    self.f32buf_subimage.extend(subimage.iter().map(ref_to_f32));
398                }
399
400                self.f32buf_search_image.clear();
401                if to_grayscale && search_image_channel_count != 1 {
402                    self.f32buf_search_image.extend(
403                        search_image
404                            .chunks_exact(search_image_channel_count as usize)
405                            .map(to_gray_search)
406                            .map(to_f32),
407                    );
408                } else {
409                    self.f32buf_search_image
410                        .extend(search_image.iter().map(ref_to_f32));
411                }
412
413                let simdeez_width = simdeez_width_runtime_select();
414                let dist_function = if subimage_width % simdeez_width == 0 {
415                    image_dist_simdeez_runtime_select
416                } else {
417                    image_dist_simdeez_with_remainder_runtime_select
418                };
419
420                let width_multiplier =
421                    if to_grayscale { 1 } else { subimage_channel_count as usize };
422                for y in (0..(search_height - subimage_height)).step_by(step_y) {
423                    for x in (0..(search_width - subimage_width)).step_by(step_x) {
424                        let dist = dist_function(
425                            x * width_multiplier,
426                            y,
427                            &self.f32buf_search_image,
428                            search_width * width_multiplier,
429                            &self.f32buf_subimage,
430                            subimage_width * width_multiplier,
431                            subimage_height,
432                        );
433                        if dist < threshold {
434                            self.positions_buffer.push((x, y, dist));
435                        }
436                    }
437                }
438            }
439            Backend::Scalar {
440                threshold,
441                step_x,
442                step_y,
443            } => {
444                let subimage_bytes: &[u8] = if to_grayscale && subimage_channel_count != 1 {
445                    self.u8buf_subimage.clear();
446                    self.u8buf_subimage.extend(
447                        subimage
448                            .chunks_exact(subimage_channel_count as usize)
449                            .map(to_gray_sub),
450                    );
451                    &self.u8buf_subimage
452                } else {
453                    subimage
454                };
455
456                let search_bytes: &[u8] = if to_grayscale && search_image_channel_count != 1 {
457                    self.u8buf_search_image.clear();
458                    self.u8buf_search_image.extend(
459                        search_image
460                            .chunks_exact(search_image_channel_count as usize)
461                            .map(to_gray_search),
462                    );
463                    &self.u8buf_search_image
464                } else {
465                    search_image
466                };
467
468                for y in (0..(search_height - subimage_height)).step_by(step_y) {
469                    for x in (0..(search_width - subimage_width)).step_by(step_x) {
470                        let dist = image_dist_naive(
471                            (x, y),
472                            (search_bytes, search_width),
473                            (subimage_bytes, subimage_width, subimage_height),
474                            if to_grayscale { 1 } else { subimage_channel_count as usize },
475                        );
476                        if dist < threshold {
477                            self.positions_buffer.push((x, y, dist));
478                        }
479                    }
480                }
481            }
482            #[cfg(feature = "opencv")]
483            Backend::OpenCV { threshold } => {
484                let subimage_ptr: *mut std::ffi::c_void =
485                    if to_grayscale && subimage_channel_count != 1 {
486                        self.u8buf_subimage.clear();
487                        self.u8buf_subimage.extend(
488                            subimage
489                                .chunks_exact(subimage_channel_count as usize)
490                                .map(to_gray_sub),
491                        );
492                        self.u8buf_subimage.as_mut_ptr() as *mut _
493                    } else {
494                        subimage.as_ptr() as *mut _
495                    };
496
497                let search_ptr: *mut std::ffi::c_void =
498                    if to_grayscale && search_image_channel_count != 1 {
499                        self.u8buf_search_image.clear();
500                        self.u8buf_search_image.extend(
501                            search_image
502                                .chunks_exact(search_image_channel_count as usize)
503                                .map(to_gray_search),
504                        );
505                        self.u8buf_search_image.as_mut_ptr() as *mut _
506                    } else {
507                        search_image.as_ptr() as *mut _
508                    };
509
510                let ch_count_to_mat_typ = |channels| match channels {
511                    1 => opencv::core::CV_8UC1,
512                    2 => opencv::core::CV_8UC2,
513                    3 => opencv::core::CV_8UC3,
514                    4 => opencv::core::CV_8UC4,
515                    _ => panic!(
516            "opencv matrices do not support more than 4 channels (Tried to use {} channels)",
517            channels
518          ),
519                };
520                let opencv_mat_typ_search: i32 = if to_grayscale {
521                    opencv::core::CV_8UC1
522                } else {
523                    ch_count_to_mat_typ(search_image_channel_count)
524                };
525                let opencv_mat_typ_sub: i32 = if to_grayscale {
526                    opencv::core::CV_8UC1
527                } else {
528                    ch_count_to_mat_typ(subimage_channel_count)
529                };
530                unsafe {
531                    let mut out_mat = opencv::core::Mat::default();
532                    opencv::imgproc::match_template(
533                        &opencv::core::Mat::new_rows_cols_with_data(
534                            search_height as i32,
535                            search_width as i32,
536                            opencv_mat_typ_search,
537                            search_ptr,
538                            0,
539                        )
540                        .unwrap(),
541                        &opencv::core::Mat::new_rows_cols_with_data(
542                            subimage_height as i32,
543                            subimage_width as i32,
544                            opencv_mat_typ_sub,
545                            subimage_ptr,
546                            0,
547                        )
548                        .unwrap(),
549                        &mut out_mat,
550                        opencv::imgproc::TM_SQDIFF_NORMED,
551                        &opencv::core::no_array(),
552                    )
553                    .unwrap();
554
555                    for (opencv::core::Point_ { x, y }, val) in out_mat.iter().unwrap() {
556                        let val: f32 = val; // To help inference
557
558                        if val < threshold {
559                            self.positions_buffer.push((x as usize, y as usize, val));
560                        }
561                    }
562                }
563            }
564        }
565
566        self.prune_nearby_results(subimage_width, subimage_height);
567
568        &self.positions_buffer
569    }
570
571    // TODO: Iterator API?
572    // TODO: Allow custom strides w/ padding?
573
574    /// Remove results that are too close together according to prune_[width|height]_scale
575    /// prioritizing the ones with the lowest distance.
576    fn prune_nearby_results(&mut self, subimage_width: usize, subimage_height: usize) {
577        let width_threshold = (subimage_width as f32 * self.prune_width_scale) as isize;
578        let height_threshold = (subimage_height as f32 * self.prune_height_scale) as isize;
579
580        self.positions_buffer
581            .sort_unstable_by(|a, b| a.2.partial_cmp(&b.2).unwrap());
582
583        let mut i = 0;
584        while i < self.positions_buffer.len() {
585            let a = self.positions_buffer[i];
586
587            self.positions_buffer.retain(|b| {
588                let dist = (
589                    (b.0 as isize - a.0 as isize).abs(),
590                    (b.1 as isize - a.1 as isize).abs(),
591                );
592                dist == (0, 0) || (dist.0 > width_threshold || dist.1 > height_threshold)
593            });
594
595            i += 1;
596        }
597    }
598
599    /// This returns the same as the last value returned from [Self::find_subimage_positions],
600    /// as long as you haven't modified them by calling [Self::most_recent_results_mut]
601    pub fn most_recent_results(&self) -> &[(usize, usize, f32)] {
602        &self.positions_buffer
603    }
604
605    /// Gives a mutable reference to the most recent results. Calling this after
606    /// [Self::find_subimage_positions] gives you the same slice, but with mutable access. This can
607    /// be useful if you want to sort the results without allocating a new Vec.
608    ///
609    /// For example, if you need to sort by y and then by x position:
610    /// ```
611    /// use find_subimage::{Image, SubImageFinderState};
612    /// let (w, h) = (128, 128);
613    /// let mut rgb_image = vec![0u8; w * h * 3];
614    /// let (sub_w, sub_h) = (16, 16);
615    /// let mut rgb_subimage = vec![0u8; sub_w * sub_h * 3];
616    ///
617    /// let mut finder = SubImageFinderState::new();
618    /// finder.find_subimage_positions((&rgb_image, w, h), (&rgb_subimage, sub_w, sub_h), 3);
619    ///
620    /// let results = finder.most_recent_results_mut();
621    /// results.sort_unstable_by(|a, b| a.1.cmp(&b.1).then(a.0.cmp(&b.0)));
622    /// ```
623    pub fn most_recent_results_mut(&mut self) -> &mut [(usize, usize, f32)] {
624        &mut self.positions_buffer
625    }
626}
627
628// I looked into std portable-simd but doing runtime detection with it seems way more complicated
629// than the handy simdeez macro I'm pretty sure simdeez has UB in it though
630// I may add a StaticTargetCpuSimd backend or something without runtime detection that expects users
631// to compile with appropiate target cpu flags and uses portable-simd
632
633use std::num::NonZeroU8;
634
635#[cfg(feature = "simdeez")]
636use simdeez::*;
637#[cfg(feature = "simdeez")]
638use simdeez::{avx2::*, scalar::*, sse2::*, sse41::*};
639#[cfg(feature = "simdeez")]
640simd_runtime_generate!(
641    fn simdeez_width() -> usize {
642        S::VF32_WIDTH
643    }
644);
645
646macro_rules! make_simdeez_fn {
647    ($with_remainder: expr, $fn_name: ident) => {
648        #[cfg(feature = "simdeez")]
649        simd_runtime_generate!(
650            fn $fn_name(
651                x_offset: usize,
652                y_offset: usize,
653                search_img: &[f32],
654                search_w: usize,
655                subimage: &[f32],
656                w: usize,
657                h: usize,
658            ) -> f32 {
659                #[cfg(not(feature = "checked-simdeez"))]
660                let slice: fn(&[f32], _) -> &[f32] = |x, range| x.get_unchecked(range);
661                #[cfg(feature = "checked-simdeez")]
662                let slice: fn(&[f32], _) -> &[f32] = |x, range| &x[range];
663                #[cfg(not(feature = "checked-simdeez"))]
664                let slice_elem: fn(&[f32], _) -> &f32 = |x, idx| x.get_unchecked(idx);
665                #[cfg(feature = "checked-simdeez")]
666                let slice_elem: fn(&[f32], _) -> &f32 = |x, idx| &x[idx];
667
668                // These 3 lines should do all the bounds checking we need
669                // We use get_unchecked below
670                let subimage = &subimage[..(w * h)];
671
672                let search_img = &search_img[(x_offset + y_offset * search_w)..];
673                let search_img = &search_img[..(h * search_w)];
674
675                // [0.0; S::VF32_WIDTH] gave me a const generics error
676                // In my case it's 8, 32 should be plenty conservative
677                let zeroes = [0.0; 32];
678                let mut res_simd = S::loadu_ps(&zeroes[0]);
679                let mut res_scalar = 0.0f32;
680
681                let simd_iters_per_row = w / S::VF32_WIDTH;
682                let scalar_iters_per_row = w % S::VF32_WIDTH;
683
684                for y in 0..h {
685                    let row_sub = (y * w) as usize;
686                    let row_search = (y * search_w) as usize;
687
688                    let mut subimage = slice(subimage, row_sub..);
689                    let mut search_img = slice(search_img, row_search..);
690
691                    for _ in 0..simd_iters_per_row {
692                        let search = S::loadu_ps(slice_elem(search_img, 0));
693                        let sub = S::loadu_ps(slice_elem(subimage, 0));
694
695                        let diff = S::sub_ps(sub, search);
696                        let square = S::mul_ps(diff, diff);
697
698                        res_simd = S::add_ps(res_simd, square);
699
700                        subimage = slice(subimage, S::VF32_WIDTH..);
701                        search_img = slice(search_img, S::VF32_WIDTH..);
702                    }
703
704                    if $with_remainder {
705                        for i in 0..scalar_iters_per_row {
706                            let search = slice_elem(search_img, i);
707                            let sub = slice_elem(subimage, i);
708
709                            let diff = sub - search;
710                            let square = diff * diff;
711                            res_scalar += square;
712                        }
713                    }
714                }
715
716                let res = S::horizontal_add_ps(res_simd) + res_scalar;
717
718                //res.sqrt() / w as f32 / h as f32
719                //res / (255.0 * 255.0) / w as f32 / h as f32
720                (res / w as f32 / h as f32).sqrt() / 255.0
721                //'res.sqrt() / ((w as f32 * h as f32).sqrt() * 255.0)
722            }
723        );
724    };
725}
726make_simdeez_fn!(true, image_dist_simdeez_with_remainder);
727make_simdeez_fn!(false, image_dist_simdeez);
728
729fn image_dist_naive(
730    (x_offset, y_offset): (usize, usize),
731    (search_img, search_w): (&[u8], usize),
732    (subimage, w, h): (&[u8], usize, usize),
733    channel_count: usize,
734) -> f32 {
735    let subimage = &subimage[..w * h * channel_count];
736
737    let search_stride = search_w * channel_count;
738    let sub_stride = w * channel_count;
739
740    let search_img = &search_img[x_offset * channel_count + y_offset * search_stride..];
741    let search_img = &search_img[..h * search_stride];
742
743    let calc_dist = |a, b| (a as f32 - b as f32).powi(2);
744    let mut dist = 0.0f32;
745    for y in 0..h {
746        #[allow(clippy::identity_op)]
747        for x in 0..sub_stride {
748            let pos_sub = x + y * sub_stride;
749            let pos_search = x + y * search_stride;
750
751            dist += calc_dist(subimage[pos_sub], search_img[pos_search]);
752        }
753    }
754    (dist / w as f32 / h as f32).sqrt() / 255.0
755}
756
757impl Default for SubImageFinderState {
758    fn default() -> Self {
759        Self::new()
760    }
761}
find_subimage/lib.rs

find_subimage/
lib.rs