image_match/
lib.rs

1use std::cmp::{max, min};
2use std::collections::HashMap;
3
4#[allow(unused_imports)] // It's actually used, I promise
5use num::Signed;
6
7#[cfg(feature = "img")]
8pub mod image;
9
10const DEFAULT_CROP: f32 = 0.05;
11const DEFAULT_GRID_SIZE: usize = 10;
12
13/// Produces a 544 signed byte signature for a provided image that's encoded as an array of
14/// conceptually grouped RGBA bytes with the provided width. The result is designed to be compared
15/// to other vectors computed by a call to this method using [cosine-similarity(a, b)].
16pub fn get_buffer_signature(rgba_buffer: &[u8], width: usize) -> Vec<i8> {
17    let gray = grayscale_buffer(rgba_buffer, width);
18
19    let average_square_width_fn = |width, height| {
20        max(
21            2_usize,
22            (0.5 + min(width, height) as f32 / 20.0).floor() as usize,
23        ) / 2
24    };
25
26    compute_from_gray(gray, DEFAULT_CROP, DEFAULT_GRID_SIZE, average_square_width_fn)
27}
28
29/// Produces a variable length signed byte signature for a provided image, encoded as an array of
30/// conceptually grouped RGBA bytes with the provided width. The result is designed to be compared
31/// to other vectors computed by a call to this method with identical tuning parameters using
32/// [cosine-similarity(a, b)]. `crop` is a value in [0, 0.5] indicating what percentage of the image
33/// to crop on all sides before grid placement. Note that this percentage is based not on the raw
34/// width but a calculation of color density. `grid_size` indicates how many points to place on the
35/// image for measurement in the resulting signature. Changing `grid_size` will alter the length of
36/// the signature to `8 * (grid_size - 1)^2 - 12 * (grid_size - 3) - 20`.The
37/// `average_square_width_fn` controls the size of the box around each grid point that's averaged
38/// to produce that grid point's brightness value. The paper proposes
39/// `max(2, floor(0.5 + min(cropped_width, cropped_height) / 20))` but provides no information about
40/// how that was chosen.
41pub fn get_tuned_buffer_signature(
42    rgba_buffer: &[u8],
43    width: usize,
44    crop: f32,
45    grid_size: usize,
46    average_square_width_fn: fn(width: usize, height: usize) -> usize,
47) -> Vec<i8> {
48    let gray = grayscale_buffer(rgba_buffer, width);
49    compute_from_gray(gray, crop, grid_size, average_square_width_fn)
50}
51
52/// Computes the cosine of the angle between two feature vectors. Those vectors must have been both
53/// produced by calls to an un-tuned signature function or identical calls to a tuned version. Per
54/// the source paper and out own research, when using the un-tuned signature calculation a cosine of
55/// 0.6 or greater indicates significant similarity.
56/// For the edge case of vectors with all zeroes, we return a similarity of 0.0 if one vector is
57/// all zeroes, and a similarity of 1.0 if both are.
58pub fn cosine_similarity(a: &Vec<i8>, b: &Vec<i8>) -> f64 {
59    // For our purposes here, unequal lengths are a sign of major issues in client code.
60    // One of my favorite professors always said "Crash early, crash often."
61    assert_eq!(a.len(), b.len(), "Compared vectors must be of equal length");
62
63    let a_length = vector_length(a);
64    let b_length = vector_length(b);
65    if a_length == 0.0 || b_length == 0.0 {
66        if a_length == 0.0 && b_length == 0.0 {
67            1.0
68        } else {
69            0.0
70        }
71    } else {
72        let dot_product: f64 = a.iter().zip(b.iter())
73            .map(|(av, bv)| *av as f64 * *bv as f64)
74            .sum();
75
76        dot_product / (a_length * b_length)
77    }
78}
79
80fn vector_length(v: &[i8]) -> f64 {
81    v.iter().map(|vi| *vi as i32).map(|vi| (vi * vi) as f64).sum::<f64>().sqrt()
82}
83
84/// Core computation steps of image signatures. Descriptions for each step can be found on the
85/// called functions and are pulled directly from the implemented paper.
86fn compute_from_gray(
87    gray: Vec<Vec<u8>>,
88    crop: f32,
89    grid_size: usize,
90    average_square_width_fn: fn(width: usize, height: usize) -> usize,
91) -> Vec<i8> {
92    let bounds = crop_boundaries(&gray, crop);
93    let points = grid_points(&bounds, grid_size);
94    let averages = grid_averages(gray, points, bounds, average_square_width_fn);
95    compute_signature(averages, grid_size)
96}
97
98/*
99Step 1.
100"If the image is color, we first convert it to 8-bit grayscale .. Pure white is represented by 255
101and pure black by 0."
102 */
103fn grayscale_buffer(rgba_buffer: &[u8], width: usize) -> Vec<Vec<u8>> {
104    let height = (rgba_buffer.len() / 4) / width;
105    let mut result = Vec::with_capacity(height);
106    let mut idx: usize = 0;
107    while idx < rgba_buffer.len() {
108        let mut row = Vec::with_capacity(width);
109        for _ in 0..width {
110            let avg = pixel_gray(
111                rgba_buffer[idx],
112                rgba_buffer[idx + 1],
113                rgba_buffer[idx + 2],
114                rgba_buffer[idx + 3],
115            );
116
117            row.push(avg);
118            idx += 4;
119        }
120        result.push(row);
121    }
122
123    result
124}
125
126fn pixel_gray(r: u8, g: u8, b: u8, a: u8) -> u8 {
127    let rgb_avg = (r as u16 + g as u16 + b as u16) / 3;
128    ((rgb_avg as f32) * (a as f32 / 255.0)) as u8
129}
130
131#[derive(Debug, PartialEq)]
132struct Bounds {
133    lower_x: usize,
134    upper_x: usize,
135    lower_y: usize,
136    upper_y: usize,
137}
138
139/*
140Step 2, part 1
141We define the grid in a way that is robust to mild cropping, under the assumption that such
142cropping usually removes relatively featureless parts of the image, for example, the margins of a
143document image or the dark bottom of the Mona Lisa picture.
144
145For each column of the image, we compute the sum of absolute values of differences
146between adjacent pixels in that column. We compute the total of all columns, and crop the image
147based on the `crop` parameter, which determines how much of the image to discard. For a `crop` of
1480.05, we crop the image at the 5% and 95% columns, that is, the columns such that 5% of the total
149sum of differences lies on either side of the cropped image. We crop the rows of the image the
150same way (using the sums of original uncropped rows).
151*/
152fn crop_boundaries(pixels: &Vec<Vec<u8>>, crop: f32) -> Bounds {
153    let row_diff_sums: Vec<i32> = (0..pixels.len()).map(|y|
154        (1..pixels[y].len()).map(|x|
155            pixels[y][x].abs_diff(pixels[y][x - 1]) as i32).sum()
156    ).collect();
157
158    let (top, bottom) = get_bounds(row_diff_sums, crop);
159
160    let col_diff_sums: Vec<i32> = (0..pixels[0].len()).map(|x|
161        (1..pixels.len()).map(|y|
162            pixels[y][x].abs_diff(pixels[y - 1][x]) as i32).sum()
163    ).collect();
164
165    let (left, right) = get_bounds(col_diff_sums, crop);
166
167    Bounds {
168        lower_x: left,
169        upper_x: right,
170        lower_y: top,
171        upper_y: bottom,
172    }
173}
174
175/// Returns the max number of contiguous values on each end of `diff_sums` that are, when summed,
176/// below `crop` times the total of `diff_sums`
177fn get_bounds(diff_sums: Vec<i32>, crop: f32) -> (usize, usize) {
178    let total_diff_sum: i32 = diff_sums.iter().sum();
179    let threshold = (total_diff_sum as f32 * crop) as i32;
180    let mut lower = 0;
181    let mut upper = diff_sums.len() - 1;
182    let mut sum = diff_sums[lower];
183
184    while sum < threshold {
185        lower += 1;
186        sum += diff_sums[lower];
187    }
188    sum = diff_sums[upper];
189    while sum < threshold {
190        upper -= 1;
191        sum += diff_sums[upper];
192    }
193    (lower, upper)
194}
195
196/*
197Step 2, part 2
198"We next impose a 9x9 grid of points on the image. (For large databases, a bigger grid such as 11x11
199would give greater first-stage filtering.)
200...
201Conceptually, we then divide the cropped image into a 10x10 grid of blocks. We round each interior
202grid point to the closest pixel (that is, integer coordinates), thereby setting a 9x9 grid of
203points on the image."
204
205- grid_size: size of superimposed grid (10 in the above example)
206 */
207fn grid_points(bounds: &Bounds, grid_size: usize) -> HashMap<(i8, i8), (usize, usize)> {
208    let x_width = (bounds.upper_x - bounds.lower_x + 1) as f32 / grid_size as f32;
209    let y_width = (bounds.upper_y - bounds.lower_y + 1) as f32 / grid_size as f32;
210
211    let mut points = HashMap::new();
212    for x in 1..grid_size {
213        for y in 1..grid_size {
214            points.insert(
215                (x as i8, y as i8),
216                (
217                    bounds.lower_x + (x as f32 * x_width).trunc() as usize,
218                    bounds.lower_y + (y as f32 * y_width).trunc() as usize,
219                ),
220            );
221        }
222    }
223
224    points
225}
226
227/*
228Step 3
229"At each grid point, we compute the average gray level of the PxP square centered at the grid point.
230We ran our experiments with P = max(2, floor(0.5 + min(n, m) / 20)) where n and m are the dimensions
231of the image in pixels. The squares are slightly soft-edged, meaning that instead of using the
232pixel’s gray levels themselves, we use an average of a 3x3 block centered at that pixel."
233 */
234fn grid_averages(
235    pixels: Vec<Vec<u8>>,
236    points: HashMap<(i8, i8), (usize, usize)>,
237    bounds: Bounds,
238    average_square_width_fn: fn(width: usize, height: usize) -> usize,
239) -> HashMap<(i8, i8), u8> {
240    let width = bounds.upper_x - bounds.lower_x;
241    let height = bounds.upper_y - bounds.lower_y;
242    let square_edge = average_square_width_fn(width, height) as i32;
243
244    let mut result = HashMap::new();
245    for (grid_coord, (point_x, point_y)) in points {
246        let mut sum: f32 = 0.0;
247        for delta_x in -square_edge..=square_edge {
248            for delta_y in -square_edge..=square_edge {
249                let average = pixel_average(
250                    &pixels,
251                    (point_x as i32 + delta_x) as usize,
252                    (point_y as i32 + delta_y) as usize,
253                );
254                sum += average;
255            }
256        }
257
258        let i = sum / ((square_edge * 2 + 1) * (square_edge * 2 + 1)) as f32;
259        result.insert(grid_coord, i as u8);
260    }
261
262    result
263}
264
265/*
266Step 4
267For each grid point, we compute an 8-element array whose elements give a comparison of the average
268gray level of the grid point square with those of its eight neighbors. The result of a comparison
269can be “much darker”, “darker”, “same”, “lighter”, or “much lighter”, represented numerically as
270-2, -1, 0, 1 and 2, respectively. The “same” values are those averages that differ by no more than
2712 on a scale of 0 to 255. We set the boundary between “much darker” and “darker” so that these two
272values are equally popular; we do the same for “lighter” and “much lighter”. The rationale in this
273step is that “same” may be very common in images with flat backgrounds (such as text documents), and
274hence it should not be included in the histogram equalization applied to the other values. Grid
275points in the first or last rows or column have fewer than 8 neighbors..."
276
277(The authors pad missing neighbors with 0's, we just omit them.)
278
279Step 5
280"The signature of an image is simply the concatenation of the 8-element arrays corresponding to the
281grid points, ordered left-to-right, top-to-bottom..."
282*/
283const GRID_DELTAS: [(i8, i8); 9] = [
284    (-1, -1), (0, -1), (1, -1),
285    (-1, 0), (0, 0), (1, 0),
286    (-1, 1), (0, 1), (1, 1)
287];
288
289fn compute_signature(point_averages: HashMap<(i8, i8), u8>, grid_size: usize) -> Vec<i8> {
290    let mut raw_diffs = Vec::with_capacity(grid_size * grid_size);
291    for grid_y in 1..(grid_size as i8) {
292        for grid_x in 1..(grid_size as i8) {
293            let gray = *point_averages.get(&(grid_x, grid_y)).unwrap();
294            let raw_point_diffs: Vec<i16> = GRID_DELTAS.iter()
295                .filter_map(|(delta_x, delta_y)| {
296                    point_averages.get(&(grid_x + delta_x, grid_y + delta_y))
297                        .map(|other| compute_diff(gray, *other))
298                }).collect();
299            raw_diffs.push(raw_point_diffs)
300        }
301    }
302
303    let (dark_threshold, light_threshold) = get_thresholds(&raw_diffs);
304    raw_diffs.into_iter().flat_map(|neighbors|
305        neighbors.into_iter()
306            .map(|v| {
307                match v {
308                    v if v > 0 => collapse(v, light_threshold),
309                    v if v < 0 => collapse(v, dark_threshold),
310                    _ => 0
311                }
312            })).collect()
313}
314
315
316fn get_thresholds(raw_diffs: &[Vec<i16>]) -> (i16, i16) {
317    let (dark, light): (Vec<i16>, Vec<i16>) = raw_diffs.iter().flatten()
318        .filter(|d| **d != 0)
319        .partition(|d| **d < 0);
320
321    let dark_threshold = get_median(dark);
322    let light_threshold = get_median(light);
323
324    (dark_threshold, light_threshold)
325}
326
327fn collapse(val: i16, threshold: i16) -> i8 {
328    if val.abs() >= threshold.abs() {
329        2 * val.signum() as i8
330    } else {
331        val.signum() as i8
332    }
333}
334
335fn get_median(mut vec: Vec<i16>) -> i16 {
336    vec.sort();
337    if vec.len() % 2 == 0 {
338        if vec.is_empty() {
339            0
340        } else {
341            (vec[(vec.len() / 2) - 1] + vec[vec.len() / 2]) / 2
342        }
343    } else {
344        vec[vec.len() / 2]
345    }
346}
347
348fn compute_diff(me: u8, other: u8) -> i16 {
349    let raw_result = me as i16 - other as i16;
350    if raw_result.abs() <= 2 {
351        0
352    } else {
353        raw_result
354    }
355}
356
357const PIXEL_DELTAS: [(i32, i32); 9] = [
358    (-1, -1), (0, -1), (1, -1),
359    (-1, 0), (0, 0), (1, 0),
360    (-1, 1), (0, 1), (1, 1)
361];
362
363fn pixel_average(pixels: &[Vec<u8>], x: usize, y: usize) -> f32 {
364    let max_y = pixels.len() as i32 - 1;
365    let max_x = pixels[0].len() as i32 - 1;
366
367    let sum: f32 = PIXEL_DELTAS.iter().map(|(delta_x, delta_y)| {
368        pixels[(y as i32 + *delta_y).clamp(0, max_y) as usize][(x as i32 + *delta_x).clamp(0, max_x) as usize] as f32
369    }).sum();
370
371    sum / 9.0
372}
373
374#[cfg(test)]
375mod tests {
376    use super::*;
377    use pretty_assertions::{assert_eq};
378    use std::collections::BTreeMap;
379
380    macro_rules! assert_map_eq {
381        ( $actual:expr, $expected:expr ) => {
382            {
383                let actual: BTreeMap<_, _> = ($actual).into_iter().collect();
384                let expected: BTreeMap<_, _> = ($expected).into_iter().collect();
385                assert_eq!(actual, expected)
386            }
387        }
388    }
389
390    fn from_dotgrid(grid: &str) -> Vec<Vec<u8>> {
391        grid.split("\n")
392            .map(|row| row.replace(" ",""))
393            .filter(|row| row.len() > 0)
394            .map(|row| row.chars().map(|c| match c {
395                '.' => 0,
396                'o' => 64,
397                'O' => 128,
398                'x' => 192,
399                'X' => 255,
400                c => panic!("Unexpected dotgrid character '{}'", c)
401            }).collect()).collect()
402    }
403
404    #[test]
405    fn test_pixel_gray() {
406        assert_eq!(pixel_gray(255,255,255,255), 255);
407        assert_eq!(pixel_gray(0,0,0,0), 0);
408        assert_eq!(pixel_gray(255,255,255,0), 0);
409        assert_eq!(pixel_gray(32, 64, 96, 255), 64);
410    }
411
412    #[test]
413    fn test_grayscale_buffer() {
414        assert_eq!(grayscale_buffer(&[
415            255, 255, 255, 255,
416            128, 128, 128, 128,
417            0, 0, 0, 0,
418            0, 128, 255, 128
419        ], 2), [
420            [255, 64],
421            [0, 63]
422        ]);
423    }
424    
425    #[test]
426    fn test_get_bounds() {
427        assert_eq!([
428            (vec![0,0,50,50,0,0], 0.05),
429            (vec![0,0,0,50,50,0,0,0], 0.05),
430        ].map(|(v, c)| get_bounds(v, c)),
431        [(2, 3), (3, 4)]);
432    }
433
434    #[test]
435    fn test_crop_boundaries() {
436        let pic = from_dotgrid("
437        .......
438        .oooo..
439        .oXxo..
440        .oXxo..
441        .......
442        .......
443        ");
444
445        assert_eq!(crop_boundaries(&pic, 0.05), Bounds {
446            lower_x: 1,
447            upper_x: 4,
448            lower_y: 1,
449            upper_y: 3,
450        });
451        assert_eq!(crop_boundaries(&pic, 0.25), Bounds {
452            lower_x: 2,
453            upper_x: 3,
454            lower_y: 2,
455            upper_y: 3,
456        });
457        assert_eq!(crop_boundaries(&pic, 0.5), Bounds {
458            lower_x: 2,
459            upper_x: 2,
460            lower_y: 2,
461            upper_y: 2,
462        });
463    }
464
465    #[test]
466    fn test_grid_points() {
467        assert_map_eq!(grid_points(&Bounds {
468            lower_x: 5,
469            upper_x: 15,
470            lower_y: 10,
471            upper_y: 30,
472        }, 2), [
473            ((1, 1), (10, 20))
474        ]);
475
476        assert_map_eq!(grid_points(&Bounds {
477            lower_x: 5,
478            upper_x: 15,
479            lower_y: 10,
480            upper_y: 30,
481        }, 3), [
482            ((1, 1), (8, 17)),
483            ((2, 1), (12, 17)),
484            ((1, 2), (8, 24)),
485            ((2, 2), (12, 24)),
486        ]);
487    }
488
489    #[test]
490    fn test_grid_points_extreme() {
491        assert_map_eq!(grid_points(&Bounds {
492            lower_x: 0,
493            upper_x: 100,
494            lower_y: 1,
495            upper_y: 1,
496        }, 6), [
497            ((1, 1), (16, 1)),
498            ((2, 1), (33, 1)),
499            ((3, 1), (50, 1)),
500            ((4, 1), (67, 1)),
501            ((5, 1), (84, 1)),
502
503            ((1, 2), (16, 1)),
504            ((2, 2), (33, 1)),
505            ((3, 2), (50, 1)),
506            ((4, 2), (67, 1)),
507            ((5, 2), (84, 1)),
508
509            ((1, 3), (16, 1)),
510            ((2, 3), (33, 1)),
511            ((3, 3), (50, 1)),
512            ((4, 3), (67, 1)),
513            ((5, 3), (84, 1)),
514
515            ((1, 4), (16, 1)),
516            ((2, 4), (33, 1)),
517            ((3, 4), (50, 1)),
518            ((4, 4), (67, 1)),
519            ((5, 4), (84, 1)),
520
521            ((1, 5), (16, 1)),
522            ((2, 5), (33, 1)),
523            ((3, 5), (50, 1)),
524            ((4, 5), (67, 1)),
525            ((5, 5), (84, 1)),
526        ]);
527    }
528
529    #[test]
530    fn test_grid_points_tiny() {
531        assert_map_eq!(grid_points(&Bounds {
532            lower_x: 0,
533            upper_x: 1,
534            lower_y: 0,
535            upper_y: 1,
536        }, 3), [
537            ((1,1), (0,0)),
538            ((2,1), (1,0)),
539            ((1,2), (0,1)),
540            ((2,2), (1,1)),
541        ]);
542    }
543}
image_match/lib.rs

image_match/
lib.rs