image_match/
lib.rs

1use std::cmp::{max, min};
2use std::collections::HashMap;
3
4#[allow(unused_imports)] // It's actually used, I promise
5use num::Signed;
6
7#[cfg(feature = "img")]
8pub mod image;
9
10const DEFAULT_CROP: f32 = 0.05;
11const DEFAULT_GRID_SIZE: usize = 10;
12
13/// Produces a 544 signed byte signature for a provided image that's encoded as an array of
14/// conceptually grouped RGBA bytes with the provided width. The result is designed to be compared
15/// to other vectors computed by a call to this method using [cosine-similarity(a, b)].
16pub fn get_buffer_signature(rgba_buffer: &[u8], width: usize) -> Vec<i8> {
17    let gray = grayscale_buffer(rgba_buffer, width);
18
19    let average_square_width_fn = |width, height| {
20        max(
21            2_usize,
22            (0.5 + min(width, height) as f32 / 20.0).floor() as usize,
23        ) / 2
24    };
25
26    compute_from_gray(gray, DEFAULT_CROP, DEFAULT_GRID_SIZE, average_square_width_fn)
27}
28
29/// Produces a variable length signed byte signature for a provided image, encoded as an array of
30/// conceptually grouped RGBA bytes with the provided width. The result is designed to be compared
31/// to other vectors computed by a call to this method with identical tuning parameters using
32/// [cosine-similarity(a, b)]. `crop` is a value in [0, 0.5] indicating what percentage of the image
33/// to crop on all sides before grid placement. Note that this percentage is based not on the raw
34/// width but a calculation of color density. `grid_size` indicates how many points to place on the
35/// image for measurement in the resulting signature. Changing `grid_size` will alter the length of
36/// the signature to `8 * (grid_size - 1)^2 - 12 * (grid_size - 3) - 20`.The
37/// `average_square_width_fn` controls the size of the box around each grid point that's averaged
38/// to produce that grid point's brightness value. The paper proposes
39/// `max(2, floor(0.5 + min(cropped_width, cropped_height) / 20))` but provides no information about
40/// how that was chosen.
41pub fn get_tuned_buffer_signature(
42    rgba_buffer: &[u8],
43    width: usize,
44    crop: f32,
45    grid_size: usize,
46    average_square_width_fn: fn(width: usize, height: usize) -> usize,
47) -> Vec<i8> {
48    let gray = grayscale_buffer(rgba_buffer, width);
49    compute_from_gray(gray, crop, grid_size, average_square_width_fn)
50}
51
52/// Computes the cosine of the angle between two feature vectors. Those vectors must have been both
53/// produced by calls to an un-tuned signature function or identical calls to a tuned version. Per
54/// the source paper and out own research, when using the un-tuned signature calculation a cosine of
55/// 0.6 or greater indicates significant similarity.
56pub fn cosine_similarity(a: &Vec<i8>, b: &Vec<i8>) -> f64 {
57    // For our purposes here, unequal lengths are a sign of major issues in client code.
58    // One of my favorite professors always said "Crash early, crash often."
59    assert_eq!(a.len(), b.len(), "Compared vectors must be of equal length");
60
61    let dot_product: f64 = a.iter().zip(b.iter())
62        .map(|(av, bv)| *av as f64 * *bv as f64)
63        .sum();
64
65    dot_product / (vector_length(a) * vector_length(b))
66}
67
68fn vector_length(v: &[i8]) -> f64 {
69    v.iter().map(|vi| *vi as i32).map(|vi| (vi * vi) as f64).sum::<f64>().sqrt()
70}
71
72/// Core computation steps of image signatures. Descriptions for each step can be found on the
73/// called functions and are pulled directly from the implemented paper.
74fn compute_from_gray(
75    gray: Vec<Vec<u8>>,
76    crop: f32,
77    grid_size: usize,
78    average_square_width_fn: fn(width: usize, height: usize) -> usize,
79) -> Vec<i8> {
80    let bounds = crop_boundaries(&gray, crop);
81    let points = grid_points(&bounds, grid_size);
82    let averages = grid_averages(gray, points, bounds, average_square_width_fn);
83    compute_signature(averages, grid_size)
84}
85
86/*
87Step 1.
88"If the image is color, we first convert it to 8-bit grayscale .. Pure white is represented by 255
89and pure black by 0."
90 */
91fn grayscale_buffer(rgba_buffer: &[u8], width: usize) -> Vec<Vec<u8>> {
92    let height = (rgba_buffer.len() / 4) / width;
93    let mut result = Vec::with_capacity(height);
94    let mut idx: usize = 0;
95    while idx < rgba_buffer.len() {
96        let mut row = Vec::with_capacity(width);
97        for _ in 0..width {
98            let avg = pixel_gray(
99                rgba_buffer[idx],
100                rgba_buffer[idx + 1],
101                rgba_buffer[idx + 2],
102                rgba_buffer[idx + 3],
103            );
104
105            row.push(avg);
106            idx += 4;
107        }
108        result.push(row);
109    }
110
111    result
112}
113
114fn pixel_gray(r: u8, g: u8, b: u8, a: u8) -> u8 {
115    let rgb_avg = (r as u16 + g as u16 + b as u16) / 3;
116    ((rgb_avg as f32) * (a as f32 / 255.0)) as u8
117}
118
119#[derive(Debug)]
120struct Bounds {
121    lower_x: usize,
122    upper_x: usize,
123    lower_y: usize,
124    upper_y: usize,
125}
126
127/*
128Step 2, part 1
129"We define the grid in a way that is robust to mild
130cropping, under the assumption that such cropping usually removes relatively featureless parts of
131the image, for example, the margins of a document image or the dark bottom of the Mona Lisa picture.
132For each column of the image, we compute the sum of absolute values of differences between adjacent
133pixels in that column. We compute the total of all columns, and crop the image at the 5% and 95%
134columns, that is, the columns such that 5% of the total sum of differences lies on either side of
135the cropped image. We crop the rows of the image the same way (using the sums of original uncropped
136rows).
137
138For each column of the image, we compute the sum of absolute values of differences between
139adjacent pixels in that column. We compute the total of all columns, and crop the image at
140the 5% and 95% columns, that is, the columns such that 5% of the total sum of differences
141lies on either side of the cropped image. We crop the rows of the image the same way"
142(using the sums of original uncropped rows).
143 */
144fn crop_boundaries(pixels: &Vec<Vec<u8>>, crop: f32) -> Bounds {
145    let row_diff_sums: Vec<i32> = (0..pixels.len()).map(|y|
146        (1..pixels[y].len()).map(|x|
147            pixels[y][x].abs_diff(pixels[y][x - 1]) as i32).sum()
148    ).collect();
149
150    let (top, bottom) = get_bounds(row_diff_sums, crop);
151
152    let col_diff_sums: Vec<i32> = (0..pixels[0].len()).map(|x|
153        (1..pixels.len()).map(|y|
154            pixels[y][x].abs_diff(pixels[y - 1][x]) as i32).sum()
155    ).collect();
156
157    let (left, right) = get_bounds(col_diff_sums, crop);
158
159    Bounds {
160        lower_x: left,
161        upper_x: right,
162        lower_y: top,
163        upper_y: bottom,
164    }
165}
166
167fn get_bounds(diff_sums: Vec<i32>, crop: f32) -> (usize, usize) {
168    let total_diff_sum: i32 = diff_sums.iter().sum();
169    let threshold = (total_diff_sum as f32 * crop) as i32;
170    let mut lower = 0;
171    let mut upper = diff_sums.len() - 1;
172    let mut sum = 0;
173
174    while sum < threshold {
175        sum += diff_sums[lower];
176        lower += 1;
177    }
178    sum = 0;
179    while sum < threshold {
180        sum += diff_sums[upper];
181        upper -= 1;
182    }
183    (lower, upper)
184}
185
186/*
187Step 2, part 2
188"We next impose a 9x9 grid of points on the image. (For large databases, a bigger grid such as 11x11
189would give greater first-stage filtering.)
190...
191Conceptually, we then divide the cropped image into a 10x10 grid of blocks. We round each interior
192grid point to the closest pixel (that is, integer coordinates), thereby setting a 9x9 grid of
193points on the image."
194 */
195fn grid_points(bounds: &Bounds, grid_size: usize) -> HashMap<(i8, i8), (usize, usize)> {
196    let x_width = (bounds.upper_x - bounds.lower_x) / grid_size;
197    let y_width = (bounds.upper_y - bounds.lower_y) / grid_size;
198
199    let mut points = HashMap::new();
200    for x in 1..grid_size {
201        for y in 1..grid_size {
202            points.insert((x as i8, y as i8), (x * x_width, y * y_width));
203        }
204    }
205
206    points
207}
208
209/*
210Step 3
211"At each grid point, we compute the average gray level of the PxP square centered at the grid point.
212We ran our experiments with P = max(2, floor(0.5 + min(n, m) / 20)) where n and m are the dimensions
213of the image in pixels. The squares are slightly soft-edged, meaning that instead of using the
214pixel’s gray levels themselves, we use an average of a 3x3 block centered at that pixel."
215 */
216fn grid_averages(
217    pixels: Vec<Vec<u8>>,
218    points: HashMap<(i8, i8), (usize, usize)>,
219    bounds: Bounds,
220    average_square_width_fn: fn(width: usize, height: usize) -> usize,
221) -> HashMap<(i8, i8), u8> {
222    let width = bounds.upper_x - bounds.lower_x;
223    let height = bounds.upper_y - bounds.lower_y;
224    let square_edge = average_square_width_fn(width, height) as i32;
225
226    let mut result = HashMap::new();
227    for (grid_coord, (point_x, point_y)) in points {
228        let mut sum: f32 = 0.0;
229        for delta_x in -square_edge..=square_edge {
230            for delta_y in -square_edge..=square_edge {
231                let average = pixel_average(
232                    &pixels,
233                    (point_x as i32 + delta_x) as usize,
234                    (point_y as i32 + delta_y) as usize,
235                );
236                sum += average;
237            }
238        }
239
240        let i = sum / ((square_edge * 2 + 1) * (square_edge * 2 + 1)) as f32;
241        result.insert(grid_coord, i as u8);
242    }
243
244    result
245}
246
247/*
248Step 4
249For each grid point, we compute an 8-element array whose elements give a comparison of the average
250gray level of the grid point square with those of its eight neighbors. The result of a comparison
251can be “much darker”, “darker”, “same”, “lighter”, or “much lighter”, represented numerically as
252-2, -1, 0, 1 and 2, respectively. The “same” values are those averages that differ by no more than
2532 on a scale of 0 to 255. We set the boundary between “much darker” and “darker” so that these two
254values are equally popular; we do the same for “lighter” and “much lighter”. The rationale in this
255step is that “same” may be very common in images with flat backgrounds (such as text documents), and
256hence it should not be included in the histogram equalization applied to the other values. Grid
257points in the first or last rows or column have fewer than 8 neighbors..."
258
259(The authors pad missing neighbors with 0's, we just omit them.)
260
261Step 5
262"The signature of an image is simply the concatenation of the 8-element arrays corresponding to the
263grid points, ordered left-to-right, top-to-bottom..."
264*/
265const GRID_DELTAS: [(i8, i8); 9] = [
266    (-1, -1), (0, -1), (1, -1),
267    (-1, 0), (0, 0), (1, 0),
268    (-1, 1), (0, 1), (1, 1)
269];
270
271fn compute_signature(point_averages: HashMap<(i8, i8), u8>, grid_size: usize) -> Vec<i8> {
272    let mut raw_diffs = Vec::with_capacity(grid_size * grid_size);
273    for grid_y in 1..(grid_size as i8) {
274        for grid_x in 1..(grid_size as i8) {
275            let gray = *point_averages.get(&(grid_x, grid_y)).unwrap();
276            let raw_point_diffs: Vec<i16> = GRID_DELTAS.iter()
277                .filter_map(|(delta_x, delta_y)| {
278                    point_averages.get(&(grid_x + delta_x, grid_y + delta_y))
279                        .map(|other| compute_diff(gray, *other))
280                }).collect();
281            raw_diffs.push(raw_point_diffs)
282        }
283    }
284
285    let (dark_threshold, light_threshold) = get_thresholds(&raw_diffs);
286    raw_diffs.into_iter().flat_map(|neighbors|
287        neighbors.into_iter()
288            .map(|v| {
289                match v {
290                    v if v > 0 => collapse(v, light_threshold),
291                    v if v < 0 => collapse(v, dark_threshold),
292                    _ => 0
293                }
294            })).collect()
295}
296
297
298fn get_thresholds(raw_diffs: &[Vec<i16>]) -> (i16, i16) {
299    let (dark, light): (Vec<i16>, Vec<i16>) = raw_diffs.iter().flatten()
300        .filter(|d| **d != 0)
301        .partition(|d| **d < 0);
302
303    let dark_threshold = get_median(dark);
304    let light_threshold = get_median(light);
305
306    (dark_threshold, light_threshold)
307}
308
309fn collapse(val: i16, threshold: i16) -> i8 {
310    if val.abs() >= threshold.abs() {
311        2 * val.signum() as i8
312    } else {
313        val.signum() as i8
314    }
315}
316
317fn get_median(mut vec: Vec<i16>) -> i16 {
318    vec.sort();
319    if vec.len() % 2 == 0 {
320        if vec.is_empty() {
321            0
322        } else {
323            (vec[(vec.len() / 2) - 1] + vec[vec.len() / 2]) / 2
324        }
325    } else {
326        vec[vec.len() / 2]
327    }
328}
329
330fn compute_diff(me: u8, other: u8) -> i16 {
331    let raw_result = me as i16 - other as i16;
332    if raw_result.abs() <= 2 {
333        0
334    } else {
335        raw_result
336    }
337}
338
339const PIXEL_DELTAS: [(i32, i32); 9] = [
340    (-1, -1), (0, -1), (1, -1),
341    (-1, 0), (0, 0), (1, 0),
342    (-1, 1), (0, 1), (1, 1)
343];
344
345fn pixel_average(pixels: &[Vec<u8>], x: usize, y: usize) -> f32 {
346    let sum: f32 = PIXEL_DELTAS.iter().map(|(delta_x, delta_y)| {
347        pixels[(y as i32 + *delta_y) as usize][(x as i32 + *delta_x) as usize] as f32
348    }).sum();
349
350    sum / 9.0
351}