extern crate image;
use std::cmp::Ordering;
use image::{GenericImageView, DynamicImage};
use image::imageops::FilterType;
use image::ImageReader;
use std::fs;
use crate::image_error::MyImageError;
#[derive(Clone)]
pub struct ImagePath {
pub fpath: String,
pub is_compare_dir : bool,
pub always_mark_dupe_compare : bool,
}
pub struct ImageHashAV {
pub dupe_group : u64 ,
pub grey_hash : u64,
pub low_res : [u8;192],
pub width: u32,
pub height: u32,
pub file_size : u64,
pub num_pixels: u64,
pub std_dev : f32,
pub image_path: ImagePath,
}
pub struct ConfigOptions {
pub colour_difference_threshold : u64,
pub std_dev_threshold : f32,
pub alg_flip_threshold : u64,
pub alg_colour_diff_only : bool,
pub only_known_file_extensions : bool,
pub only_list_duplicates : bool,
pub only_list_uniques : bool,
pub list_all : bool,
pub num_threads : u32,
pub compare_dir : String,
pub am_comparing : bool,
pub always_mark_duplicates : bool,
pub min_width : u32,
pub min_height : u32,
}
impl Ord for ImageHashAV {
fn cmp(&self, other: &Self) -> Ordering {
if self.dupe_group < other.dupe_group{
return Ordering::Less;
}
if self.dupe_group > other.dupe_group{
return Ordering::Greater;
}
if self.image_path.always_mark_dupe_compare && self.image_path.is_compare_dir && (!other.image_path.always_mark_dupe_compare) {
return Ordering::Greater;
}
if (!self.image_path.always_mark_dupe_compare) && other.image_path.always_mark_dupe_compare && other.image_path.is_compare_dir {
return Ordering::Less;
}
if self.num_pixels > other.num_pixels{
return Ordering::Less;
}
if self.num_pixels < other.num_pixels{
return Ordering::Greater;
}
if self.file_size > other.file_size {
return Ordering::Less;
}
if self.file_size < other.file_size {
return Ordering::Greater;
}
if self.image_path.is_compare_dir && (!other.image_path.is_compare_dir) {
return Ordering::Greater;
}
if (!self.image_path.is_compare_dir) && other.image_path.is_compare_dir {
return Ordering::Less;
}
return Ordering::Equal
}
}
impl Eq for ImageHashAV {}
impl PartialOrd for ImageHashAV {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl PartialEq for ImageHashAV {
fn eq(&self, other: &Self) -> bool {
if (self.dupe_group == other.dupe_group) &&
( self.num_pixels == other.num_pixels ) &&
( self.file_size == other.file_size ) {
return true;
}
return false;
}
}
fn load_image_from_file( image_path: &str ) -> std::result::Result<DynamicImage, MyImageError> {
let img = match ImageReader::open(image_path) {
Ok(image) => image,
Err(_) => {
return Err(MyImageError::FileError(format!("Error: Failed to read image file: {}", image_path).to_string()));
},
};
let format_guessed = match img.with_guessed_format() {
Ok( format_guessed ) => format_guessed,
Err(_) => {
return Err(MyImageError::DecodeFail(format!("Error: Failed to identify image file format {}", image_path).to_string()));
}
};
let decoded_img = match format_guessed.decode() {
Ok( decoded_img ) => decoded_img,
Err(_) => {
return Err( MyImageError::DecodeFail(format!("Error: Failed to correctly decode image: {}", image_path).to_string()) );
}
};
return Ok(decoded_img);
}
impl ImageHashAV {
pub const DEFAULT_COLOUR_DIFF_THRESHOLD: u64 = 256;
pub const DEFAULT_STD_DEV_THRESHOLD : f32 = 3.0;
pub const DEFAULT_ALG_FLIP_THRESHOLD : u64 = 50000;
pub fn new(fpath : &ImagePath, min_width: u32, min_height : u32) -> Result<ImageHashAV,MyImageError> {
let mut object = ImageHashAV { dupe_group: 0, grey_hash: 0, low_res: [0;192],
width: 0, height: 0, num_pixels: 0, std_dev: 0f32,
file_size: 0, image_path : ImagePath { fpath: "".to_string(), is_compare_dir: false, always_mark_dupe_compare: false } };
match object.calc_image_hash( &fpath, min_width, min_height ) {
Some(e) => return Err(e),
None => return Ok(object),
}
}
pub fn has_similar_aspect_ratio( &self, comp: &ImageHashAV ) -> bool {
let aspect_ratio_a : f32 = self.width as f32 / self.height as f32;
let aspect_ratio_b : f32 = comp.width as f32 / comp.height as f32;
let aspect_ratio_a_high = aspect_ratio_a * 1.02;
let aspect_ratio_a_low = aspect_ratio_a - (aspect_ratio_a * 0.02);
if aspect_ratio_b <= aspect_ratio_a_high && aspect_ratio_b >= aspect_ratio_a_low {
return true;
}
return false;
}
pub fn diff_colour( &self, comp: &ImageHashAV ) -> u64{
let mut diff: u64 = 0;
for i in 0..64 {
let rdiff : u32 = (comp.low_res[i*3] as i32 - self.low_res[i*3] as i32).abs() as u32;
let gdiff : u32 = (comp.low_res[(i*3)+1] as i32 - self.low_res[(i*3)+1] as i32).abs() as u32;
let bdiff : u32 = (comp.low_res[(i*3)+2] as i32 - self.low_res[(i*3)+2] as i32).abs() as u32;
diff += ( rdiff + gdiff + bdiff ) as u64;
}
return diff;
}
pub fn is_dupe ( &self, other : &ImageHashAV, config: &ConfigOptions ) -> bool {
if self.std_dev > config.std_dev_threshold && other.std_dev > config.std_dev_threshold {
if self.has_similar_aspect_ratio( &other ) {
if self.diff_colour( &other ) <= config.colour_difference_threshold {
return true;
}
}
}
return false;
}
pub fn calc_std_dev_colour_hash ( &mut self ) {
let mut r_pixel_av : f32 = 0.0;
let mut g_pixel_av : f32 = 0.0;
let mut b_pixel_av : f32 = 0.0;
let mut r_square_total : f32 = 0.0;
let mut g_square_total : f32 = 0.0;
let mut b_square_total : f32 = 0.0;
for i in 0..64 {
r_pixel_av += self.low_res[i*3] as f32;
g_pixel_av += self.low_res[(i*3)+1] as f32;
b_pixel_av += self.low_res[(i*3)+2] as f32;
}
r_pixel_av /= 64.0;
g_pixel_av /= 64.0;
b_pixel_av /= 64.0;
for i in 0..64 {
r_square_total += ( (self.low_res[i*3] as f32) - r_pixel_av ).powf(2f32);
g_square_total += ( (self.low_res[(i*3)+1] as f32) - g_pixel_av ).powf(2f32);
b_square_total += ( (self.low_res[(i*3)+2] as f32) - b_pixel_av ).powf(2f32);
}
r_square_total /= 64.0;
g_square_total /= 64.0;
b_square_total /= 64.0;
self.std_dev = (r_square_total.sqrt() + g_square_total.sqrt() + b_square_total.sqrt())/3.0;
}
pub fn calc_image_hash(&mut self, im_path: &ImagePath, min_width: u32, min_height : u32 ) -> Option<MyImageError> {
match load_image_from_file( &im_path.fpath ) {
Ok(img) => {
let (width, height) = img.dimensions();
if width < 16 || height < 16 {
return Some( MyImageError::ImageTooSmall(format!("Warning: Image too small to deduplicate: {}", im_path.fpath).to_string()) );
}
if min_width > 0 && min_height > 0 {
if width < min_width || height < min_height {
return Some( MyImageError::ImageTooSmall(format!("Warning: Ignored image because dimensions ({},{}) are below minimum: {}",width,height, im_path.fpath).to_string()) );
}
}
self.width = width;
self.height = height;
self.num_pixels = (width as u64)*(height as u64);
self.image_path = im_path.clone();
match fs::metadata(im_path.fpath.clone()) {
Ok(md)=> {
self.file_size = md.len();
}
Err(_)=> {
return Some(MyImageError::FileError(format!("Error: Failed to get size of: {}", im_path.fpath).to_string()));
}
}
let scaled = img.resize_exact(8,8,FilterType::Gaussian);
let (width, height) = scaled.dimensions();
if width != 8 || height != 8 {
return Some( MyImageError::DecodeFail(format!("Error: Failed to resize image correctly: {}", im_path.fpath).to_string()) );
}
let gs = scaled.grayscale( );
let mut num_pixels = 0;
let mut total: u64 = 0;
for pixel in gs.pixels() {
let p: u64 = ((pixel.2).0)[0].into();
total += p;
num_pixels+=1;
}
let average: f32 = (total as f32)/ (num_pixels as f32);
let mut hash_val: u64 = 0;
let mut this_bit: u64 = 0;
for pixel in gs.pixels() {
let p: f32 = ((pixel.2).0)[0].into();
if p >= average {
hash_val = (((1 as u64) << this_bit ) as u64) | hash_val;
}
this_bit+=1;
}
let mut pnum : usize = 0;
for pixel in scaled.pixels() {
self.low_res[pnum*3] = ((pixel.2)[0]).into();
self.low_res[(pnum*3)+1] = ((pixel.2)[1]).into();
self.low_res[(pnum*3)+2] = ((pixel.2)[2]).into();
pnum+=1;
}
self.dupe_group = hash_val;
self.grey_hash = hash_val;
self.calc_std_dev_colour_hash();
return None;
},
Err(e) => {
return Some(e);
}
}
}
}
#[cfg(test)]
mod tests {
extern crate glob;
use super::*;
use glob::glob;
fn calc_hamming_distance( a: u64, b: u64) -> u8 {
let mut bits_similar : u8 = 0;
for i in 0..64 {
if (a & (1u64 << i)) == (b & (1u64 << i)) {
bits_similar+=1;
}
}
return bits_similar;
}
#[test]
fn test_image_read() {
let result = ImageHashAV::new( &ImagePath { fpath: "unit_test_images/bridge1_best.jpg".to_string(), is_compare_dir:false, always_mark_dupe_compare: false },0,0 ).unwrap();
assert_eq!(768,result.width,"Width OK");
assert_eq!(576,result.height,"Height OK");
assert_eq!(576*768,result.num_pixels,"NUm pixels OK");
}
#[test]
fn test_image_duplicates() {
let mut image_paths = Vec::new();
for entry in glob("unit_test_images/*").expect("Failed to read glob pattern") {
let path = entry.unwrap().display().to_string();
if path.contains("_best.") || path.contains("_duplicate_") {
image_paths.push( path );
}
}
image_paths.sort();
assert!(image_paths.len() >=3 , "3 or more image paths");
assert!(image_paths.len() % 3 == 0, "Image paths divides by 3 (best + 2 duplicates per image)");
for i in 0..(image_paths.len()/3) {
let result = ImageHashAV::new( &ImagePath { fpath: image_paths[i*3].clone(), is_compare_dir:false, always_mark_dupe_compare: false },0,0 ).unwrap();
let dupe1 = ImageHashAV::new( &ImagePath { fpath: image_paths[(i*3)+1].clone(), is_compare_dir:false, always_mark_dupe_compare: false },0,0 ).unwrap();
let dupe2 = ImageHashAV::new( &ImagePath { fpath: image_paths[(i*3)+2].clone(), is_compare_dir:false, always_mark_dupe_compare: false },0,0 ).unwrap();
assert!( calc_hamming_distance(result.dupe_group, dupe1.dupe_group) >= 63, "First duplicate grey hash matches" );
assert!( calc_hamming_distance(result.dupe_group, dupe2.dupe_group) >= 63, "Second duplicate grey hash matches" );
assert!( result.diff_colour( &dupe1 ) <= ImageHashAV::DEFAULT_COLOUR_DIFF_THRESHOLD, "First duplicate colours are similar" );
assert!( result.diff_colour( &dupe2 ) <= ImageHashAV::DEFAULT_COLOUR_DIFF_THRESHOLD, "Second duplicate colours are similar" );
}
}
#[test]
fn test_image_uniques() {
let mut image_paths = Vec::new();
let mut image_hashes = Vec::new();
for entry in glob("unit_test_images/*").expect("Failed to read glob pattern") {
let path = entry.unwrap().display().to_string();
if path.contains("_best.") {
image_paths.push( path );
}
}
for path in &image_paths {
let result = ImageHashAV::new( &ImagePath { fpath: path.to_string(), is_compare_dir:false, always_mark_dupe_compare: false },0,0 ).unwrap();
image_hashes.push( result );
}
for i in 0..image_hashes.len() {
for j in (i+1)..image_hashes.len() {
assert!( (calc_hamming_distance(image_hashes[j].dupe_group, image_hashes[i].dupe_group) <= 63) ||
(image_hashes[j].diff_colour( &image_hashes[i] ) > ImageHashAV::DEFAULT_COLOUR_DIFF_THRESHOLD),
"Images that should be unique don't match");
}
}
}
}