#![deny(missing_docs)]
use std::{borrow::Cow, fmt, marker::PhantomData};
mod alg;
mod dct;
mod traits;
use dct::DctCtxt;
use image::imageops;
use image::GrayImage;
use serde::{Deserialize, Serialize};
pub use image::imageops::FilterType;
pub use alg::HashAlg;
pub(crate) use traits::BitSet;
pub use traits::{DiffImage, HashBytes, Image};
#[derive(Serialize, Deserialize)]
pub struct HasherConfig<B = Box<[u8]>> {
width: u32,
height: u32,
gauss_sigmas: Option<[f32; 2]>,
#[serde(with = "SerdeFilterType")]
resize_filter: FilterType,
dct: bool,
hash_alg: HashAlg,
_bytes_type: PhantomData<B>,
}
impl HasherConfig<Box<[u8]>> {
pub fn new() -> Self {
Self::with_bytes_type()
}
pub fn with_bytes_type<B_: HashBytes>() -> HasherConfig<B_> {
HasherConfig {
width: 8,
height: 8,
gauss_sigmas: None,
resize_filter: FilterType::Lanczos3,
dct: false,
hash_alg: HashAlg::Gradient,
_bytes_type: PhantomData,
}
}
}
impl<B: HashBytes> HasherConfig<B> {
pub fn hash_size(self, width: u32, height: u32) -> Self {
Self {
width,
height,
..self
}
}
pub fn resize_filter(self, resize_filter: FilterType) -> Self {
Self {
resize_filter,
..self
}
}
pub fn hash_alg(self, hash_alg: HashAlg) -> Self {
Self { hash_alg, ..self }
}
pub fn preproc_dct(self) -> Self {
Self { dct: true, ..self }
}
pub fn preproc_diff_gauss(self) -> Self {
self.preproc_diff_gauss_sigmas(5.0, 10.0)
}
pub fn preproc_diff_gauss_sigmas(self, sigma_a: f32, sigma_b: f32) -> Self {
Self {
gauss_sigmas: Some([sigma_a, sigma_b]),
..self
}
}
pub fn to_hasher(&self) -> Hasher<B> {
let Self {
hash_alg,
width,
height,
gauss_sigmas,
resize_filter,
dct,
..
} = *self;
let (width, height) = hash_alg.round_hash_size(width, height);
assert!(
(width * height) as usize <= B::max_bits(),
"hash size too large for container: {} x {}",
width,
height
);
let dct_coeffs = if dct && hash_alg != HashAlg::Blockhash {
let (dct_width, dct_height) = hash_alg.resize_dimensions(width, height);
Some(DctCtxt::new(dct_width, dct_height))
} else {
None
};
Hasher {
ctxt: HashCtxt {
gauss_sigmas,
dct_ctxt: dct_coeffs,
width,
height,
resize_filter,
},
hash_alg,
bytes_type: PhantomData,
}
}
}
impl<B> fmt::Debug for HasherConfig<B> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("HasherConfig")
.field("width", &self.width)
.field("height", &self.height)
.field("hash_alg", &self.hash_alg)
.field("resize_filter", &debug_filter_type(&self.resize_filter))
.field("gauss_sigmas", &self.gauss_sigmas)
.field("use_dct", &self.dct)
.finish()
}
}
impl Default for HasherConfig {
fn default() -> Self {
HasherConfig::new()
}
}
pub struct Hasher<B = Box<[u8]>> {
ctxt: HashCtxt,
hash_alg: HashAlg,
bytes_type: PhantomData<B>,
}
impl<B> Hasher<B>
where
B: HashBytes,
{
pub fn hash_image<I: Image>(&self, img: &I) -> ImageHash<B> {
let hash = self.hash_alg.hash_image(&self.ctxt, img);
ImageHash { hash }
}
}
enum CowImage<'a, I: Image> {
Borrowed(&'a I),
Owned(I::Buf),
}
impl<'a, I: Image> CowImage<'a, I> {
fn to_grayscale(&self) -> Cow<GrayImage> {
match self {
CowImage::Borrowed(img) => img.to_grayscale(),
CowImage::Owned(img) => img.to_grayscale(),
}
}
}
enum HashVals {
Floats(Vec<f32>),
Bytes(Vec<u8>),
}
struct HashCtxt {
gauss_sigmas: Option<[f32; 2]>,
dct_ctxt: Option<DctCtxt>,
resize_filter: FilterType,
width: u32,
height: u32,
}
impl HashCtxt {
fn gauss_preproc<'a, I: Image>(&self, image: &'a I) -> CowImage<'a, I> {
if let Some([sigma_a, sigma_b]) = self.gauss_sigmas {
let mut blur_a = image.blur(sigma_a);
let blur_b = image.blur(sigma_b);
blur_a.diff_inplace(&blur_b);
CowImage::Owned(blur_a)
} else {
CowImage::Borrowed(image)
}
}
fn calc_hash_vals(&self, img: &GrayImage, width: u32, height: u32) -> HashVals {
if let Some(ref dct_ctxt) = self.dct_ctxt {
let img =
imageops::resize(img, dct_ctxt.width(), dct_ctxt.height(), self.resize_filter);
let img_vals = img.into_vec();
let mut packed_2d: Vec<_> = img_vals.iter().copied().map(f32::from).collect();
dct_ctxt.dct_2d(&mut packed_2d);
HashVals::Floats(dct_ctxt.crop_2d(packed_2d))
} else {
let img = imageops::resize(img, width, height, self.resize_filter);
HashVals::Bytes(img.into_vec())
}
}
}
#[derive(PartialEq, Eq, Hash, Debug, Clone)]
pub struct ImageHash<B = Box<[u8]>> {
hash: B,
}
impl<B: AsRef<[u8]>> ImageHash<B> {
pub fn to_hex(&self) -> String {
static CHARS: &[u8] = b"0123456789abcdef";
let bytes = self.hash.as_ref();
let mut v = Vec::with_capacity(bytes.len() * 2);
for &byte in bytes {
v.push(CHARS[(byte >> 4) as usize]);
v.push(CHARS[(byte & 0xf) as usize]);
}
unsafe { String::from_utf8_unchecked(v) }
}
}
#[derive(Debug, PartialEq)]
pub enum InvalidBytesError {
BytesWrongLength {
expected: usize,
found: usize,
},
Base64(base64::DecodeError),
}
impl<B: HashBytes> ImageHash<B> {
pub fn as_bytes(&self) -> &[u8] {
self.hash.as_slice()
}
pub fn from_bytes(bytes: &[u8]) -> Result<ImageHash<B>, InvalidBytesError> {
if bytes.len() * 8 > B::max_bits() {
return Err(InvalidBytesError::BytesWrongLength {
expected: B::max_bits() / 8,
found: bytes.len(),
});
}
Ok(ImageHash {
hash: B::from_iter(bytes.iter().copied()),
})
}
pub fn dist(&self, other: &Self) -> u32 {
BitSet::hamming(&self.hash, &other.hash)
}
pub fn from_base64(encoded_hash: &str) -> Result<ImageHash<B>, InvalidBytesError> {
let bytes = base64::decode(encoded_hash).map_err(InvalidBytesError::Base64)?;
Self::from_bytes(&bytes)
}
pub fn to_base64(&self) -> String {
base64::encode(self.hash.as_slice())
}
}
#[derive(Serialize, Deserialize)]
#[serde(remote = "FilterType")]
enum SerdeFilterType {
Nearest,
Triangle,
CatmullRom,
Gaussian,
Lanczos3,
}
fn debug_filter_type(ft: &FilterType) -> &'static str {
match ft {
FilterType::Triangle => "Triangle",
FilterType::Nearest => "Nearest",
FilterType::CatmullRom => "CatmullRom",
FilterType::Lanczos3 => "Lanczos3",
FilterType::Gaussian => "Gaussian",
}
}