find_subimage/lib.rs
1//! This crate provides basic functionality to find likely positions of a subimage within a larger
2//! image by calculating an image distance. It has a naive scalar implementation in rust, and a simd
3//! implementation that selects the best implementation based on cpu features at runtime in rust
4//! using the [simdeez](https://github.com/jackmott/simdeez) crate. It also provides an implementation which uses
5//! [OpenCV](https://opencv.org/)'s (A C++ library) matchTemplate function using the
6//! [opencv-rust](https://github.com/twistedfall/opencv-rust) crate through an optional off-by-default
7//! feature. It can also optionally convert images to grayscale before applying the algorithms.
8//!
9//! Here's a simple example showing how to use the API:
10//!
11//! ```
12//! # // If you modify this example, please also update it's copy in README.md
13//! use find_subimage::{Image, SubImageFinderState};
14//! // Make a dummy 128x128 black image with a red dot at (50, 0)
15//! let (w, h) = (128, 128);
16//! let mut rgb_image = vec![0u8; w * h * 3];
17//! rgb_image[50 * 3] = 250;
18//! // Make a dummy 32x32 black image
19//! // with a red dot at (0, 0)
20//! let (sub_w, sub_h) = (32, 32);
21//! let mut rgb_subimage = vec![0u8; sub_w * sub_h * 3];
22//! rgb_subimage[0] = 250;
23//!
24//! let mut finder = SubImageFinderState::new();
25//! // These are (x, y, distance) where x and y are the position within the larger image
26//! // and distance is the distance value, where a smaller distance means a more precise match
27//! let positions: &[(usize, usize, f32)] =
28//! finder.find_subimage_positions((&rgb_image, w, h), (&rgb_subimage, sub_w, sub_h), 3);
29//! let max: Option<&(usize, usize, f32)> = positions
30//! .iter()
31//! .min_by(|(_, _, dist), (_, _, dist2)| dist.partial_cmp(dist2).unwrap());
32//! println!("The subimage was found at position {:?}", &max);
33//! assert_eq!(Some((50, 0)), max.map(|max| (max.0, max.1)));
34//! // find_subimage_positions actually returns the results sorted by distance already,
35//! // so we can skip finding the minimum
36//! assert_eq!(Some((50, 0)), positions.get(0).map(|max| (max.0, max.1)));
37//! ```
38//!
39//! The most important functions provided are [find_subimage_positions] and
40//! [find_subimage_positions_as_grayscale].
41//!
42//! You may find their "_with_backend" versions useful.
43//!
44//! By default, this library prunes results that are close together. You can disable (Set to 0) or
45//! tweak this using [with_pruning].
46//!
47//! You can look at the page for the [Backend] enum to learn about the possible backends.
48//!
49//! There are some examples in the /examples folder in the repository.
50//!
51//! [with_pruning]: SubImageFinderState::with_pruning
52//! [find_subimage_positions]: SubImageFinderState::find_subimage_positions
53//! [find_subimage_positions_as_grayscale]: SubImageFinderState::find_subimage_positions_as_grayscale
54
55/// A simple struct to group (bytes, width, height) arguments
56pub struct Image<'a> {
57 pub bytes: &'a [u8],
58 pub width: usize,
59 pub height: usize,
60}
61impl<'a> Image<'a> {
62 fn new(bytes: &'a [u8], width: usize, height: usize) -> Self {
63 Self {
64 bytes,
65 width,
66 height,
67 }
68 }
69}
70impl<'a, T: AsRef<[u8]>, A: Into<usize>, B: Into<usize>> From<(&'a T, A, B)> for Image<'a> {
71 fn from((bytes, width, height): (&'a T, A, B)) -> Self {
72 Image::new(bytes.as_ref(), width.into(), height.into())
73 }
74}
75
76/// The main context struct. This stores the necessary buffers for the search, eters.
77///
78/// u8 buffers are used if conversion to grayscale is necessary, and f32 buffers for the backends
79/// that require them.
80///
81/// There is also a Vec<(usize, usize, f32)> used to store results.
82pub struct SubImageFinderState {
83 positions_buffer: Vec<(usize, usize, f32)>,
84 backend: Backend,
85
86 prune_width_scale: f32,
87 prune_height_scale: f32,
88
89 f32buf_search_image: Vec<f32>,
90 f32buf_subimage: Vec<f32>,
91
92 u8buf_search_image: Vec<u8>,
93 u8buf_subimage: Vec<u8>,
94}
95
96/// The backend/algorithm to use.
97///
98/// There is an optional opencv backend, that uses the opencv-rust crate which depends on the OpenCV
99/// C++ library. This requires enabling the opencv feature in find-subimage.
100///
101/// There is another simdeez optional dependency, which uses the simdeez crate for a rust SIMD
102/// implementation. This is enabled by default.
103///
104/// The only implementation which cannot be disabled at present is the scalar one.
105#[derive(Clone, Copy)]
106pub enum Backend {
107 /// OpenCV SQDIFF_NORMED MatchTemplate
108 ///
109 /// Note that the threshold values for this backend use a different scale than the others.
110 #[cfg(feature = "opencv")]
111 OpenCV { threshold: f32 },
112 /// This should detect CPU features at runtime and use the best possible rust SIMD
113 /// implementation of SQDIFF_NORMED (square difference).
114 ///
115 /// step_x and y let you customize it to skip every Nth x or y coordinate in case you need less
116 /// accurate results, potentially giving large speedups.
117 #[cfg(feature = "simdeez")]
118 RuntimeDetectedSimd {
119 threshold: f32,
120 step_x: usize,
121 step_y: usize,
122 },
123 /// Scalar SQDIFF_NORMED (square difference) implementation.
124 ///
125 /// Slowest, should work anywhere and be reliable.
126 ///
127 /// Smallest in terms of generated code size.
128 ///
129 /// step_x and y let you customize it to skip every Nth x or y coordinate in case you need less
130 /// accurate results, potentially giving large speedups.
131 Scalar {
132 threshold: f32,
133 step_x: usize,
134 step_y: usize,
135 },
136}
137/// The default value used in [fn@SubImageFinderState::new_opencv]
138pub const OPENCV_DEFAULT_THRESHOLD: f32 = 0.05;
139/// The default value used in [SubImageFinderState::new] and [SubImageFinderState::default]
140pub const NONOPENCV_DEFAULT_THRESHOLD: f32 = 0.1;
141
142impl SubImageFinderState {
143 /// Create a SubImageFinderState
144 ///
145 /// This uses the [Backend::Scalar] backend by default, unless the "simdeez-default-new" e is
146 /// enabled (It is currently enabled by default).
147 ///
148 /// See the backend and with_backend methods to change the backend.
149 pub fn new() -> Self {
150 #[cfg(feature = "simdeez-default-new")]
151 let backend = Backend::RuntimeDetectedSimd {
152 threshold: NONOPENCV_DEFAULT_THRESHOLD,
153 step_x: 1,
154 step_y: 1,
155 };
156 #[cfg(not(feature = "simdeez-default-new"))]
157 let backend = Backend::Scalar {
158 threshold: NONOPENCV_DEFAULT_THRESHOLD,
159 step_x: 1,
160 step_y: 1,
161 };
162 Self {
163 positions_buffer: vec![],
164 f32buf_search_image: vec![],
165 f32buf_subimage: vec![],
166 u8buf_search_image: vec![],
167 u8buf_subimage: vec![],
168 prune_width_scale: 0.5f32,
169 prune_height_scale: 0.5f32,
170 backend,
171 }
172 }
173
174 /// Like [Self::new] but uses [Backend::OpenCV]
175 #[cfg(feature = "opencv")]
176 pub fn new_opencv(threshold: Option<f32>) -> Self {
177 let mut ret = Self::new();
178 ret.backend = Backend::OpenCV {
179 threshold: threshold.unwrap_or(OPENCV_DEFAULT_THRESHOLD),
180 };
181 ret
182 }
183
184 pub fn backend(&mut self) -> &Backend {
185 &self.backend
186 }
187
188 pub fn backend_mut(&mut self) -> &mut Backend {
189 &mut self.backend
190 }
191
192 /// Set the currently configured backend.
193 ///
194 /// See also [Self::with_backend]
195 pub fn set_backend(&mut self, new_backend: Backend) {
196 self.backend = new_backend;
197 }
198
199 /// Set the currently configured prune width/height scaling.
200 ///
201 /// For more information see [Self::with_pruning]
202 pub fn set_pruning(&mut self, prune_width_scale: f32, prune_height_scale: f32) {
203 self.prune_height_scale = prune_height_scale;
204 self.prune_width_scale = prune_width_scale;
205 }
206
207 /// Return a new state with the given backend
208 /// ```
209 /// use find_subimage::{Backend, SubImageFinderState};
210 /// let state = SubImageFinderState::new().with_backend(Backend::Scalar {
211 /// threshold: 0.5,
212 /// step_x: 2,
213 /// step_y: 1,
214 /// });
215 /// ```
216 #[must_use]
217 pub fn with_backend(mut self, new_backend: Backend) -> Self {
218 self.set_backend(new_backend);
219 self
220 }
221
222 /// Return a new state with the given pruning width/height scaling parameters.
223 ///
224 /// These default to 0.5
225 #[must_use]
226 pub fn with_pruning(mut self, prune_width_scale: f32, prune_height_scale: f32) -> Self {
227 self.set_pruning(prune_width_scale, prune_height_scale);
228 self
229 }
230
231 /// Finds positions where the subimage is found within the search image. These positions
232 /// represent the top-right corner of the subimage.
233 ///
234 /// You can tweak the likelyhood of positions found with the backend's threshold. Note that the
235 /// threshold is backend-dependant.
236 ///
237 /// The `channel_count` argument should be the number of channels for both input images (For
238 /// example, 3 for an RGB image or 1 for grayscale).
239 ///
240 /// The input image can optionally be converted to grayscale before applying the algorithm, see
241 /// [Self::find_subimage_positions_as_grayscale].
242 ///
243 /// The third field of the tuples in the returned slice is the matching/distance value. Values
244 /// closer to 1 mean a fuzzier match, and closer to 0 a more exact match. These values are
245 /// returned sorted by distance, with the best matches first.
246 pub fn find_subimage_positions<'a, 'b, T: Into<Image<'a>>, U: Into<Image<'b>>>(
247 &mut self,
248 search_image: T,
249 subimage: U,
250 channel_count: u8,
251 ) -> &[(usize, usize, f32)] {
252 let backend = self.backend;
253 self.find_subimage_positions_with_backend(
254 search_image.into(),
255 subimage.into(),
256 &backend,
257 channel_count,
258 )
259 }
260
261 /// Like [Self::find_subimage_positions_as_grayscale] but lets you use a different backend
262 /// than the currently configured one.
263 pub fn find_subimage_positions_with_backend<'a, 'b, T: Into<Image<'a>>, U: Into<Image<'b>>>(
264 &mut self,
265 search_image: T,
266 subimage: U,
267 backend: &Backend,
268 channel_count: u8,
269 ) -> &[(usize, usize, f32)] {
270 self.find_subimage_positions_with_backend_impl(
271 search_image.into(),
272 subimage.into(),
273 backend,
274 false,
275 channel_count,
276 channel_count,
277 )
278 }
279
280 /// Like [Self::find_subimage_positions], but before finding positions it converts the images to
281 /// grayscale. This can speed up runtime, but depending on the images it can be harmful to
282 /// results.
283 ///
284 /// This is done using internal buffers. If you reuse a [SubImageFinderState] for multiple
285 /// images of the same size, it should only need to allocate once.
286 ///
287 /// If channel_count_subimage is None, channel_count_search is used in its place.
288 pub fn find_subimage_positions_as_grayscale<'a, 'b, T: Into<Image<'a>>, U: Into<Image<'b>>>(
289 &mut self,
290 search_image: T,
291 subimage: U,
292 channel_count_search: u8,
293 channel_count_subimage: Option<NonZeroU8>,
294 ) -> &[(usize, usize, f32)] {
295 let backend = self.backend;
296 self.find_subimage_positions_as_grayscale_with_backend(
297 search_image.into(),
298 subimage.into(),
299 &backend,
300 channel_count_search,
301 channel_count_subimage,
302 )
303 }
304
305 /// Like [Self::find_subimage_positions_as_grayscale] but lets you use a different backend
306 /// than the currently configured one.
307 pub fn find_subimage_positions_as_grayscale_with_backend<
308 'a,
309 'b,
310 T: Into<Image<'a>>,
311 U: Into<Image<'b>>,
312 >(
313 &mut self,
314 search_image: T,
315 subimage: U,
316 backend: &Backend,
317 channel_count_search: u8,
318 channel_count_subimage: Option<NonZeroU8>,
319 ) -> &[(usize, usize, f32)] {
320 self.find_subimage_positions_with_backend_impl(
321 search_image.into(),
322 subimage.into(),
323 backend,
324 true,
325 channel_count_search,
326 channel_count_subimage
327 .map(|x| x.get())
328 .unwrap_or(channel_count_search),
329 )
330 }
331
332 /// The main implementation of the algorithm.
333 ///
334 /// This runs the hot loop, performs grayscale conversion, calls the appropiate backend, and
335 /// prunes results at the end.
336 ///
337 /// All the public functions that find positions call into this.
338 fn find_subimage_positions_with_backend_impl(
339 &mut self,
340 search_image: Image,
341 subimage: Image,
342 backend: &Backend,
343 to_grayscale: bool,
344 search_image_channel_count: u8,
345 subimage_channel_count: u8,
346 ) -> &[(usize, usize, f32)] {
347 // If there is no grayscale conversion, channel counts should match
348 if !to_grayscale && search_image_channel_count != subimage_channel_count {
349 panic!(
350 "Attempted to find_subimage_positions with different channel counts. search:{} subimage:{}",
351 search_image_channel_count, subimage_channel_count
352 );
353 }
354
355 self.positions_buffer.clear();
356
357 let Image {
358 bytes: search_image,
359 width: search_width,
360 height: search_height,
361 } = search_image;
362 let Image {
363 bytes: subimage,
364 width: subimage_width,
365 height: subimage_height,
366 } = subimage;
367
368 let to_gray_sub = move |rgb: &[u8]| {
369 rgb.iter()
370 .map(|x| (*x as f32) / (subimage_channel_count as f32))
371 .sum::<f32>() as u8
372 };
373 let to_gray_search = move |rgb: &[u8]| {
374 rgb.iter()
375 .map(|x| (*x as f32) / (search_image_channel_count as f32))
376 .sum::<f32>() as u8
377 };
378 let to_f32 = |x: u8| x as f32;
379 let ref_to_f32 = |&x: &u8| x as f32;
380
381 match *backend {
382 #[cfg(feature = "simdeez")]
383 Backend::RuntimeDetectedSimd {
384 threshold,
385 step_x,
386 step_y,
387 } => {
388 self.f32buf_subimage.clear();
389 if to_grayscale && subimage_channel_count != 1 {
390 self.f32buf_subimage.extend(
391 subimage
392 .chunks_exact(subimage_channel_count as usize)
393 .map(to_gray_sub)
394 .map(to_f32),
395 );
396 } else {
397 self.f32buf_subimage.extend(subimage.iter().map(ref_to_f32));
398 }
399
400 self.f32buf_search_image.clear();
401 if to_grayscale && search_image_channel_count != 1 {
402 self.f32buf_search_image.extend(
403 search_image
404 .chunks_exact(search_image_channel_count as usize)
405 .map(to_gray_search)
406 .map(to_f32),
407 );
408 } else {
409 self.f32buf_search_image
410 .extend(search_image.iter().map(ref_to_f32));
411 }
412
413 let simdeez_width = simdeez_width_runtime_select();
414 let dist_function = if subimage_width % simdeez_width == 0 {
415 image_dist_simdeez_runtime_select
416 } else {
417 image_dist_simdeez_with_remainder_runtime_select
418 };
419
420 let width_multiplier =
421 if to_grayscale { 1 } else { subimage_channel_count as usize };
422 for y in (0..(search_height - subimage_height)).step_by(step_y) {
423 for x in (0..(search_width - subimage_width)).step_by(step_x) {
424 let dist = dist_function(
425 x * width_multiplier,
426 y,
427 &self.f32buf_search_image,
428 search_width * width_multiplier,
429 &self.f32buf_subimage,
430 subimage_width * width_multiplier,
431 subimage_height,
432 );
433 if dist < threshold {
434 self.positions_buffer.push((x, y, dist));
435 }
436 }
437 }
438 }
439 Backend::Scalar {
440 threshold,
441 step_x,
442 step_y,
443 } => {
444 let subimage_bytes: &[u8] = if to_grayscale && subimage_channel_count != 1 {
445 self.u8buf_subimage.clear();
446 self.u8buf_subimage.extend(
447 subimage
448 .chunks_exact(subimage_channel_count as usize)
449 .map(to_gray_sub),
450 );
451 &self.u8buf_subimage
452 } else {
453 subimage
454 };
455
456 let search_bytes: &[u8] = if to_grayscale && search_image_channel_count != 1 {
457 self.u8buf_search_image.clear();
458 self.u8buf_search_image.extend(
459 search_image
460 .chunks_exact(search_image_channel_count as usize)
461 .map(to_gray_search),
462 );
463 &self.u8buf_search_image
464 } else {
465 search_image
466 };
467
468 for y in (0..(search_height - subimage_height)).step_by(step_y) {
469 for x in (0..(search_width - subimage_width)).step_by(step_x) {
470 let dist = image_dist_naive(
471 (x, y),
472 (search_bytes, search_width),
473 (subimage_bytes, subimage_width, subimage_height),
474 if to_grayscale { 1 } else { subimage_channel_count as usize },
475 );
476 if dist < threshold {
477 self.positions_buffer.push((x, y, dist));
478 }
479 }
480 }
481 }
482 #[cfg(feature = "opencv")]
483 Backend::OpenCV { threshold } => {
484 let subimage_ptr: *mut std::ffi::c_void =
485 if to_grayscale && subimage_channel_count != 1 {
486 self.u8buf_subimage.clear();
487 self.u8buf_subimage.extend(
488 subimage
489 .chunks_exact(subimage_channel_count as usize)
490 .map(to_gray_sub),
491 );
492 self.u8buf_subimage.as_mut_ptr() as *mut _
493 } else {
494 subimage.as_ptr() as *mut _
495 };
496
497 let search_ptr: *mut std::ffi::c_void =
498 if to_grayscale && search_image_channel_count != 1 {
499 self.u8buf_search_image.clear();
500 self.u8buf_search_image.extend(
501 search_image
502 .chunks_exact(search_image_channel_count as usize)
503 .map(to_gray_search),
504 );
505 self.u8buf_search_image.as_mut_ptr() as *mut _
506 } else {
507 search_image.as_ptr() as *mut _
508 };
509
510 let ch_count_to_mat_typ = |channels| match channels {
511 1 => opencv::core::CV_8UC1,
512 2 => opencv::core::CV_8UC2,
513 3 => opencv::core::CV_8UC3,
514 4 => opencv::core::CV_8UC4,
515 _ => panic!(
516 "opencv matrices do not support more than 4 channels (Tried to use {} channels)",
517 channels
518 ),
519 };
520 let opencv_mat_typ_search: i32 = if to_grayscale {
521 opencv::core::CV_8UC1
522 } else {
523 ch_count_to_mat_typ(search_image_channel_count)
524 };
525 let opencv_mat_typ_sub: i32 = if to_grayscale {
526 opencv::core::CV_8UC1
527 } else {
528 ch_count_to_mat_typ(subimage_channel_count)
529 };
530 unsafe {
531 let mut out_mat = opencv::core::Mat::default();
532 opencv::imgproc::match_template(
533 &opencv::core::Mat::new_rows_cols_with_data(
534 search_height as i32,
535 search_width as i32,
536 opencv_mat_typ_search,
537 search_ptr,
538 0,
539 )
540 .unwrap(),
541 &opencv::core::Mat::new_rows_cols_with_data(
542 subimage_height as i32,
543 subimage_width as i32,
544 opencv_mat_typ_sub,
545 subimage_ptr,
546 0,
547 )
548 .unwrap(),
549 &mut out_mat,
550 opencv::imgproc::TM_SQDIFF_NORMED,
551 &opencv::core::no_array(),
552 )
553 .unwrap();
554
555 for (opencv::core::Point_ { x, y }, val) in out_mat.iter().unwrap() {
556 let val: f32 = val; // To help inference
557
558 if val < threshold {
559 self.positions_buffer.push((x as usize, y as usize, val));
560 }
561 }
562 }
563 }
564 }
565
566 self.prune_nearby_results(subimage_width, subimage_height);
567
568 &self.positions_buffer
569 }
570
571 // TODO: Iterator API?
572 // TODO: Allow custom strides w/ padding?
573
574 /// Remove results that are too close together according to prune_[width|height]_scale
575 /// prioritizing the ones with the lowest distance.
576 fn prune_nearby_results(&mut self, subimage_width: usize, subimage_height: usize) {
577 let width_threshold = (subimage_width as f32 * self.prune_width_scale) as isize;
578 let height_threshold = (subimage_height as f32 * self.prune_height_scale) as isize;
579
580 self.positions_buffer
581 .sort_unstable_by(|a, b| a.2.partial_cmp(&b.2).unwrap());
582
583 let mut i = 0;
584 while i < self.positions_buffer.len() {
585 let a = self.positions_buffer[i];
586
587 self.positions_buffer.retain(|b| {
588 let dist = (
589 (b.0 as isize - a.0 as isize).abs(),
590 (b.1 as isize - a.1 as isize).abs(),
591 );
592 dist == (0, 0) || (dist.0 > width_threshold || dist.1 > height_threshold)
593 });
594
595 i += 1;
596 }
597 }
598
599 /// This returns the same as the last value returned from [Self::find_subimage_positions],
600 /// as long as you haven't modified them by calling [Self::most_recent_results_mut]
601 pub fn most_recent_results(&self) -> &[(usize, usize, f32)] {
602 &self.positions_buffer
603 }
604
605 /// Gives a mutable reference to the most recent results. Calling this after
606 /// [Self::find_subimage_positions] gives you the same slice, but with mutable access. This can
607 /// be useful if you want to sort the results without allocating a new Vec.
608 ///
609 /// For example, if you need to sort by y and then by x position:
610 /// ```
611 /// use find_subimage::{Image, SubImageFinderState};
612 /// let (w, h) = (128, 128);
613 /// let mut rgb_image = vec![0u8; w * h * 3];
614 /// let (sub_w, sub_h) = (16, 16);
615 /// let mut rgb_subimage = vec![0u8; sub_w * sub_h * 3];
616 ///
617 /// let mut finder = SubImageFinderState::new();
618 /// finder.find_subimage_positions((&rgb_image, w, h), (&rgb_subimage, sub_w, sub_h), 3);
619 ///
620 /// let results = finder.most_recent_results_mut();
621 /// results.sort_unstable_by(|a, b| a.1.cmp(&b.1).then(a.0.cmp(&b.0)));
622 /// ```
623 pub fn most_recent_results_mut(&mut self) -> &mut [(usize, usize, f32)] {
624 &mut self.positions_buffer
625 }
626}
627
628// I looked into std portable-simd but doing runtime detection with it seems way more complicated
629// than the handy simdeez macro I'm pretty sure simdeez has UB in it though
630// I may add a StaticTargetCpuSimd backend or something without runtime detection that expects users
631// to compile with appropiate target cpu flags and uses portable-simd
632
633use std::num::NonZeroU8;
634
635#[cfg(feature = "simdeez")]
636use simdeez::*;
637#[cfg(feature = "simdeez")]
638use simdeez::{avx2::*, scalar::*, sse2::*, sse41::*};
639#[cfg(feature = "simdeez")]
640simd_runtime_generate!(
641 fn simdeez_width() -> usize {
642 S::VF32_WIDTH
643 }
644);
645
646macro_rules! make_simdeez_fn {
647 ($with_remainder: expr, $fn_name: ident) => {
648 #[cfg(feature = "simdeez")]
649 simd_runtime_generate!(
650 fn $fn_name(
651 x_offset: usize,
652 y_offset: usize,
653 search_img: &[f32],
654 search_w: usize,
655 subimage: &[f32],
656 w: usize,
657 h: usize,
658 ) -> f32 {
659 #[cfg(not(feature = "checked-simdeez"))]
660 let slice: fn(&[f32], _) -> &[f32] = |x, range| x.get_unchecked(range);
661 #[cfg(feature = "checked-simdeez")]
662 let slice: fn(&[f32], _) -> &[f32] = |x, range| &x[range];
663 #[cfg(not(feature = "checked-simdeez"))]
664 let slice_elem: fn(&[f32], _) -> &f32 = |x, idx| x.get_unchecked(idx);
665 #[cfg(feature = "checked-simdeez")]
666 let slice_elem: fn(&[f32], _) -> &f32 = |x, idx| &x[idx];
667
668 // These 3 lines should do all the bounds checking we need
669 // We use get_unchecked below
670 let subimage = &subimage[..(w * h)];
671
672 let search_img = &search_img[(x_offset + y_offset * search_w)..];
673 let search_img = &search_img[..(h * search_w)];
674
675 // [0.0; S::VF32_WIDTH] gave me a const generics error
676 // In my case it's 8, 32 should be plenty conservative
677 let zeroes = [0.0; 32];
678 let mut res_simd = S::loadu_ps(&zeroes[0]);
679 let mut res_scalar = 0.0f32;
680
681 let simd_iters_per_row = w / S::VF32_WIDTH;
682 let scalar_iters_per_row = w % S::VF32_WIDTH;
683
684 for y in 0..h {
685 let row_sub = (y * w) as usize;
686 let row_search = (y * search_w) as usize;
687
688 let mut subimage = slice(subimage, row_sub..);
689 let mut search_img = slice(search_img, row_search..);
690
691 for _ in 0..simd_iters_per_row {
692 let search = S::loadu_ps(slice_elem(search_img, 0));
693 let sub = S::loadu_ps(slice_elem(subimage, 0));
694
695 let diff = S::sub_ps(sub, search);
696 let square = S::mul_ps(diff, diff);
697
698 res_simd = S::add_ps(res_simd, square);
699
700 subimage = slice(subimage, S::VF32_WIDTH..);
701 search_img = slice(search_img, S::VF32_WIDTH..);
702 }
703
704 if $with_remainder {
705 for i in 0..scalar_iters_per_row {
706 let search = slice_elem(search_img, i);
707 let sub = slice_elem(subimage, i);
708
709 let diff = sub - search;
710 let square = diff * diff;
711 res_scalar += square;
712 }
713 }
714 }
715
716 let res = S::horizontal_add_ps(res_simd) + res_scalar;
717
718 //res.sqrt() / w as f32 / h as f32
719 //res / (255.0 * 255.0) / w as f32 / h as f32
720 (res / w as f32 / h as f32).sqrt() / 255.0
721 //'res.sqrt() / ((w as f32 * h as f32).sqrt() * 255.0)
722 }
723 );
724 };
725}
726make_simdeez_fn!(true, image_dist_simdeez_with_remainder);
727make_simdeez_fn!(false, image_dist_simdeez);
728
729fn image_dist_naive(
730 (x_offset, y_offset): (usize, usize),
731 (search_img, search_w): (&[u8], usize),
732 (subimage, w, h): (&[u8], usize, usize),
733 channel_count: usize,
734) -> f32 {
735 let subimage = &subimage[..w * h * channel_count];
736
737 let search_stride = search_w * channel_count;
738 let sub_stride = w * channel_count;
739
740 let search_img = &search_img[x_offset * channel_count + y_offset * search_stride..];
741 let search_img = &search_img[..h * search_stride];
742
743 let calc_dist = |a, b| (a as f32 - b as f32).powi(2);
744 let mut dist = 0.0f32;
745 for y in 0..h {
746 #[allow(clippy::identity_op)]
747 for x in 0..sub_stride {
748 let pos_sub = x + y * sub_stride;
749 let pos_search = x + y * search_stride;
750
751 dist += calc_dist(subimage[pos_sub], search_img[pos_search]);
752 }
753 }
754 (dist / w as f32 / h as f32).sqrt() / 255.0
755}
756
757impl Default for SubImageFinderState {
758 fn default() -> Self {
759 Self::new()
760 }
761}