visual_hash/
alg.rs

1mod blockhash;
2
3use serde::{Deserialize, Serialize};
4
5use crate::{
6    traits::{BitSet, Image},
7    CowImage::*,
8    HashCtxt,
9    HashVals::*,
10};
11
12/// Hash algorithms implemented by this crate.
13///
14/// Implemented primarily based on the high-level descriptions on the blog Hacker Factor
15/// written by Dr. Neal Krawetz: http://www.hackerfactor.com/
16///
17/// Note that `hash_width` and `hash_height` in these docs refer to the parameters of
18/// [`HasherConfig::hash_size()`](struct.HasherConfig.html#method.hash_size).
19///
20/// ### Choosing an Algorithm
21/// Each algorithm has different performance characteristics
22#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
23#[non_exhaustive]
24pub enum HashAlg {
25    /// The Mean hashing algorithm.
26    ///
27    /// The image is converted to grayscale, scaled down to `hash_width x hash_height`,
28    /// the mean pixel value is taken, and then the hash bits are generated by comparing
29    /// the pixels of the descaled image to the mean.
30    ///
31    /// This is the most basic hash algorithm supported, resistant only to changes in
32    /// resolution, aspect ratio, and overall brightness.
33    ///
34    /// Further Reading:
35    /// http://www.hackerfactor.com/blog/?/archives/432-Looks-Like-It.html
36    Mean,
37
38    /// The Gradient hashing algorithm.
39    ///
40    /// The image is converted to grayscale, scaled down to `(hash_width + 1) x hash_height`,
41    /// and then in row-major order the pixels are compared with each other, setting bits
42    /// in the hash for each comparison. The extra pixel is needed to have `hash_width` comparisons
43    /// per row.
44    ///
45    /// This hash algorithm is as fast or faster than Mean (because it only traverses the
46    /// hash data once) and is more resistant to changes than Mean.
47    ///
48    /// Further Reading:
49    /// http://www.hackerfactor.com/blog/index.php?/archives/529-Kind-of-Like-That.html
50    Gradient,
51
52    /// The Vertical-Gradient hashing algorithm.
53    ///
54    /// Equivalent to [`Gradient`](#variant.Gradient) but operating on the columns of the image
55    /// instead of the rows.
56    VertGradient,
57
58    /// The Double-Gradient hashing algorithm.
59    ///
60    /// An advanced version of [`Gradient`](#variant.Gradient);
61    /// resizes the grayscaled image to `(width / 2 + 1) x (height / 2 + 1)` and compares columns
62    /// in addition to rows.
63    ///
64    /// This algorithm is slightly slower than `Gradient` (resizing the image dwarfs
65    /// the hash time in most cases) but the extra comparison direction may improve results (though
66    /// you might want to consider increasing
67    /// [`hash_size`](struct.HasherConfig.html#method.hash_size)
68    /// to accommodate the extra comparisons).
69    DoubleGradient,
70
71    /// The [Blockhash.io](https://blockhash.io) algorithm.
72    ///
73    /// Compared to the other algorithms, this does not require any preprocessing steps and so
74    /// may be significantly faster at the cost of some resilience.
75    ///
76    /// The algorithm is described in a high level here:
77    /// https://github.com/commonsmachinery/blockhash-rfc/blob/master/main.md
78    Blockhash,
79}
80
81fn next_multiple_of_2(x: u32) -> u32 {
82    (x + 1) & !1
83}
84
85fn next_multiple_of_4(x: u32) -> u32 {
86    (x + 3) & !3
87}
88
89impl HashAlg {
90    pub(crate) fn hash_image<I, B>(&self, ctxt: &HashCtxt, image: &I) -> B
91    where
92        I: Image,
93        B: BitSet,
94    {
95        let post_gauss = ctxt.gauss_preproc(image);
96
97        let HashCtxt { width, height, .. } = *ctxt;
98
99        if *self == HashAlg::Blockhash {
100            return match post_gauss {
101                Borrowed(img) => blockhash::blockhash(img, width, height),
102                Owned(img) => blockhash::blockhash(&img, width, height),
103            };
104        }
105
106        let grayscale = post_gauss.to_grayscale();
107        let (resize_width, resize_height) = self.resize_dimensions(width, height);
108
109        let hash_vals = ctxt.calc_hash_vals(&*grayscale, resize_width, resize_height);
110
111        let rowstride = resize_width as usize;
112
113        match (*self, hash_vals) {
114            (HashAlg::Mean, Floats(ref floats)) => B::from_bools(mean_hash_f32(floats)),
115            (HashAlg::Mean, Bytes(ref bytes)) => B::from_bools(mean_hash_u8(bytes)),
116            (HashAlg::Gradient, Floats(ref floats)) => {
117                B::from_bools(gradient_hash(floats, rowstride))
118            }
119            (HashAlg::Gradient, Bytes(ref bytes)) => B::from_bools(gradient_hash(bytes, rowstride)),
120            (HashAlg::VertGradient, Floats(ref floats)) => {
121                B::from_bools(vert_gradient_hash(floats, rowstride))
122            }
123            (HashAlg::VertGradient, Bytes(ref bytes)) => {
124                B::from_bools(vert_gradient_hash(bytes, rowstride))
125            }
126            (HashAlg::DoubleGradient, Floats(ref floats)) => {
127                B::from_bools(double_gradient_hash(floats, rowstride))
128            }
129            (HashAlg::DoubleGradient, Bytes(ref bytes)) => {
130                B::from_bools(double_gradient_hash(bytes, rowstride))
131            }
132            (HashAlg::Blockhash, _) => unreachable!(),
133        }
134    }
135
136    pub(crate) fn round_hash_size(&self, width: u32, height: u32) -> (u32, u32) {
137        match self {
138            HashAlg::DoubleGradient => (next_multiple_of_2(width), next_multiple_of_2(height)),
139            HashAlg::Blockhash => (next_multiple_of_4(width), next_multiple_of_4(height)),
140            _ => (width, height),
141        }
142    }
143
144    pub(crate) fn resize_dimensions(&self, width: u32, height: u32) -> (u32, u32) {
145        match self {
146            HashAlg::Mean => (width, height),
147            HashAlg::Blockhash => panic!("Blockhash algorithm does not resize"),
148            HashAlg::Gradient => (width + 1, height),
149            HashAlg::VertGradient => (width, height + 1),
150            HashAlg::DoubleGradient => (width / 2 + 1, height / 2 + 1),
151        }
152    }
153}
154
155fn mean_hash_u8(luma: &[u8]) -> impl Iterator<Item = bool> + '_ {
156    let mean = (luma.iter().map(|&l| l as u32).sum::<u32>() / luma.len() as u32) as u8;
157    luma.iter().map(move |&x| x >= mean)
158}
159
160fn mean_hash_f32(luma: &[f32]) -> impl Iterator<Item = bool> + '_ {
161    let mean = luma.iter().sum::<f32>() / luma.len() as f32;
162    luma.iter().map(move |&x| x >= mean)
163}
164
165/// The guts of the gradient hash separated so we can reuse them
166fn gradient_hash_impl<I>(luma: I) -> impl Iterator<Item = bool>
167where
168    I: IntoIterator + Clone,
169    <I as IntoIterator>::Item: PartialOrd,
170{
171    luma.clone()
172        .into_iter()
173        .skip(1)
174        .zip(luma)
175        .map(|(this, last)| last < this)
176}
177
178fn gradient_hash<T: PartialOrd>(luma: &[T], rowstride: usize) -> impl Iterator<Item = bool> + '_ {
179    luma.chunks(rowstride).flat_map(gradient_hash_impl)
180}
181
182fn vert_gradient_hash<T: PartialOrd>(
183    luma: &[T],
184    rowstride: usize,
185) -> impl Iterator<Item = bool> + '_ {
186    (0..rowstride)
187        .map(move |col_start| luma[col_start..].iter().step_by(rowstride))
188        .flat_map(gradient_hash_impl)
189}
190
191fn double_gradient_hash<T: PartialOrd>(
192    luma: &[T],
193    rowstride: usize,
194) -> impl Iterator<Item = bool> + '_ {
195    gradient_hash(luma, rowstride).chain(vert_gradient_hash(luma, rowstride))
196}