fast_dhash/
lib.rs

1//! # Fast Dhash
2//!
3//! A fast rust implementation of the perceptual hash [*dhash*](https://www.hackerfactor.com/blog/index.php?/archives/529-Kind-of-Like-That.html).
4//!
5//! The main difference with other rust implementations, and the reason it is called *fast*, is that it uses multithreading and does not resize nor converts the image, effectively cycling through its bytes only once.
6//!
7//! ## Usage
8//!
9//! For forward and backward compatibility, *fast dhash* does NOT directly rely on the [*image*](https://docs.rs/image/latest/image/index.html) crate, it is up to the user to provide the image bytes and dimensions.
10//!
11//! ```
12//! use fast_dhash::Dhash;
13//! use image::ImageReader;
14//!
15//! let image = ImageReader::open(".test/radial.jpg")
16//!     .expect("cannot read image")
17//!     .decode()
18//!     .expect("cannot decode image");
19//!
20//! let hash = Dhash::new(
21//!     image.as_bytes(),
22//!     image.width(),
23//!     image.height(),
24//!     image.color().channel_count(),
25//! );
26//!
27//! println!("hash: {}", hash);
28//! // hash: f0f0e8cccce8f0f0
29//! ```
30use serde::{Deserialize, Serialize};
31use std::{fmt, num, str, thread};
32
33#[derive(Debug, Clone, Copy, Deserialize, Serialize)]
34pub struct Dhash {
35    pub hash: u64,
36}
37
38impl Dhash {
39    pub fn new(bytes: &[u8], width: u32, height: u32, channel_count: u8) -> Self {
40        let width = width as usize;
41        let height = height as usize;
42        let channel_count = channel_count as usize;
43
44        // NOTE: Very important, prevents possible segfault
45        if width * height * channel_count != bytes.len() {
46            panic!(
47                "Invalid image dimensions, expected {} got {}",
48                bytes.len(),
49                width * height * channel_count
50            );
51        }
52
53        let cell_width = width / 9;
54        let cell_height = height / 8;
55
56        let grid = if channel_count >= 3 {
57            grid_from_rgb(bytes, width, cell_width, cell_height, channel_count)
58        } else {
59            grid_from_grayscale(bytes, width, cell_width, cell_height, channel_count)
60        };
61
62        let mut bits = [false; 64];
63
64        for y in 0..8 {
65            for x in 0..8 {
66                bits[y * 8 + x] = grid[y][x] > grid[y][x + 1];
67            }
68        }
69
70        let mut hash: u64 = 0;
71
72        for (i, &bit) in bits.iter().enumerate() {
73            if bit {
74                hash += 1 << i;
75            }
76        }
77
78        Self { hash }
79    }
80
81    pub fn hamming_distance(&self, other: &Self) -> u32 {
82        (self.hash ^ other.hash).count_ones()
83    }
84}
85
86impl PartialEq for Dhash {
87    fn eq(&self, other: &Self) -> bool {
88        self.hamming_distance(other) < 11
89    }
90}
91
92impl fmt::Display for Dhash {
93    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
94        write!(f, "{:016x}", &self.hash)
95    }
96}
97
98impl str::FromStr for Dhash {
99    type Err = num::ParseIntError;
100
101    fn from_str(s: &str) -> Result<Self, Self::Err> {
102        match u64::from_str_radix(s, 16) {
103            Ok(hash) => Ok(Self { hash }),
104            Err(error) => Err(error),
105        }
106    }
107}
108
109fn grid_from_rgb(
110    bytes: &[u8],
111    width: usize,
112    cell_width: usize,
113    cell_height: usize,
114    channel_count: usize,
115) -> [[f64; 9]; 8] {
116    let mut grid = [[0f64; 9]; 8];
117
118    thread::scope(|s| {
119        let mut handles = Vec::with_capacity(8);
120
121        for y in 0..8 {
122            handles.push(s.spawn(move || {
123                let mut row = [0f64; 9];
124
125                for (x, cell) in row.iter_mut().enumerate() {
126                    let from = x * cell_width;
127                    let to = from + cell_width;
128
129                    let mut rs = 0f64;
130                    let mut gs = 0f64;
131                    let mut bs = 0f64;
132
133                    for image_x in from..to {
134                        let from = y * cell_height;
135                        let to = from + cell_height;
136
137                        for image_y in from..to {
138                            let i = (image_y * width + image_x) * channel_count;
139
140                            unsafe {
141                                rs += *bytes.get_unchecked(i) as f64;
142                                gs += *bytes.get_unchecked(i + 1) as f64;
143                                bs += *bytes.get_unchecked(i + 2) as f64;
144                            }
145                        }
146                    }
147
148                    *cell += rs * 0.299 + gs * 0.587 + bs * 0.114;
149                }
150
151                (y, row)
152            }));
153        }
154
155        for handle in handles {
156            let (y, row) = handle.join().unwrap();
157            grid[y] = row;
158        }
159    });
160
161    grid
162}
163
164fn grid_from_grayscale(
165    bytes: &[u8],
166    width: usize,
167    cell_width: usize,
168    cell_height: usize,
169    channel_count: usize,
170) -> [[f64; 9]; 8] {
171    let mut grid = [[0f64; 9]; 8];
172
173    thread::scope(|s| {
174        let mut handles = Vec::with_capacity(8);
175
176        for y in 0..8 {
177            handles.push(s.spawn(move || {
178                let mut row = [0f64; 9];
179
180                for (x, cell) in row.iter_mut().enumerate() {
181                    let from = x * cell_width;
182                    let to = from + cell_width;
183
184                    let mut luma = 0f64;
185
186                    for image_x in from..to {
187                        let from = y * cell_height;
188                        let to = from + cell_height;
189
190                        for image_y in from..to {
191                            let i = (image_y * width + image_x) * channel_count;
192
193                            unsafe {
194                                luma += *bytes.get_unchecked(i) as f64;
195                            }
196                        }
197                    }
198
199                    *cell += luma;
200                }
201
202                (y, row)
203            }));
204        }
205
206        for handle in handles {
207            let (y, row) = handle.join().unwrap();
208            grid[y] = row;
209        }
210    });
211
212    grid
213}
214
215#[cfg(test)]
216mod test {
217    use super::Dhash;
218    use image::ImageReader;
219
220    #[test]
221    fn grad_ffff() {
222        let image = ImageReader::open(".test/grad.ffff.jpg")
223            .expect("cannot read image")
224            .decode()
225            .expect("cannot decode image");
226
227        let hash = Dhash::new(
228            image.as_bytes(),
229            image.width(),
230            image.height(),
231            image.color().channel_count(),
232        );
233
234        assert_eq!(hash.hash, 0xffffffffffffffff);
235    }
236
237    #[test]
238    fn grad_0000() {
239        let image = ImageReader::open(".test/grad.0000.jpg")
240            .expect("cannot read image")
241            .decode()
242            .expect("cannot decode image");
243
244        let hash = Dhash::new(
245            image.as_bytes(),
246            image.width(),
247            image.height(),
248            image.color().channel_count(),
249        );
250
251        assert_eq!(hash.hash, 0x0000000000000000);
252    }
253
254    #[test]
255    fn radial() {
256        let image = ImageReader::open(".test/radial.jpg")
257            .expect("cannot read image")
258            .decode()
259            .expect("cannot decode image");
260
261        let hash = Dhash::new(
262            image.as_bytes(),
263            image.width(),
264            image.height(),
265            image.color().channel_count(),
266        );
267
268        assert_eq!(hash.hash, 0xf0f0e8cccce8f0f0);
269    }
270}