use crate::{DedupError, DedupResult};
#[derive(Debug, Clone)]
pub struct Image {
pub width: usize,
pub height: usize,
pub data: Vec<u8>,
pub channels: usize,
}
impl Image {
#[must_use]
pub fn new(width: usize, height: usize, channels: usize) -> Self {
let data = vec![0u8; width * height * channels];
Self {
width,
height,
data,
channels,
}
}
pub fn from_data(
width: usize,
height: usize,
channels: usize,
data: Vec<u8>,
) -> DedupResult<Self> {
if data.len() != width * height * channels {
return Err(DedupError::Visual(format!(
"Invalid data size: expected {}, got {}",
width * height * channels,
data.len()
)));
}
Ok(Self {
width,
height,
data,
channels,
})
}
#[must_use]
pub fn to_grayscale(&self) -> Self {
if self.channels == 1 {
return self.clone();
}
let mut gray = Vec::with_capacity(self.width * self.height);
for y in 0..self.height {
for x in 0..self.width {
let idx = (y * self.width + x) * self.channels;
let r = f64::from(self.data[idx]);
let g = f64::from(self.data[idx + 1]);
let b = f64::from(self.data[idx + 2]);
let gray_value = (0.299 * r + 0.587 * g + 0.114 * b) as u8;
gray.push(gray_value);
}
}
Self {
width: self.width,
height: self.height,
data: gray,
channels: 1,
}
}
#[must_use]
pub fn resize(&self, new_width: usize, new_height: usize) -> Self {
let mut resized = Image::new(new_width, new_height, self.channels);
let x_ratio = self.width as f64 / new_width as f64;
let y_ratio = self.height as f64 / new_height as f64;
for y in 0..new_height {
for x in 0..new_width {
let src_x = (x as f64 * x_ratio) as usize;
let src_y = (y as f64 * y_ratio) as usize;
let src_idx = (src_y * self.width + src_x) * self.channels;
let dst_idx = (y * new_width + x) * self.channels;
for c in 0..self.channels {
resized.data[dst_idx + c] = self.data[src_idx + c];
}
}
}
resized
}
#[must_use]
pub fn get_pixel(&self, x: usize, y: usize) -> Option<&[u8]> {
if x >= self.width || y >= self.height {
return None;
}
let idx = (y * self.width + x) * self.channels;
Some(&self.data[idx..idx + self.channels])
}
#[must_use]
pub fn mean(&self) -> f64 {
let sum: u64 = self.data.iter().map(|&v| u64::from(v)).sum();
sum as f64 / self.data.len() as f64
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PerceptualHash {
hash: u64,
bits: usize,
}
impl PerceptualHash {
#[must_use]
pub fn new(hash: u64, bits: usize) -> Self {
Self { hash, bits }
}
#[must_use]
pub fn hash(&self) -> u64 {
self.hash
}
#[must_use]
pub fn hamming_distance(&self, other: &Self) -> u32 {
(self.hash ^ other.hash).count_ones()
}
#[must_use]
pub fn similarity(&self, other: &Self) -> f64 {
let distance = self.hamming_distance(other);
1.0 - (f64::from(distance) / self.bits as f64)
}
#[must_use]
pub fn to_hex(&self) -> String {
format!("{:016x}", self.hash)
}
}
#[must_use]
pub fn compute_dhash(image: &Image) -> PerceptualHash {
const HASH_SIZE: usize = 8;
let gray = image.to_grayscale();
let resized = gray.resize(HASH_SIZE + 1, HASH_SIZE);
let mut hash = 0u64;
let mut bit = 0;
for y in 0..HASH_SIZE {
for x in 0..HASH_SIZE {
let idx1 = y * (HASH_SIZE + 1) + x;
let idx2 = y * (HASH_SIZE + 1) + x + 1;
if resized.data[idx2] > resized.data[idx1] {
hash |= 1u64 << bit;
}
bit += 1;
}
}
PerceptualHash::new(hash, 64)
}
#[must_use]
pub fn compute_ahash(image: &Image) -> PerceptualHash {
const HASH_SIZE: usize = 8;
let gray = image.to_grayscale();
let resized = gray.resize(HASH_SIZE, HASH_SIZE);
let mean = resized.mean();
let mut hash = 0u64;
for (i, &pixel) in resized.data.iter().enumerate() {
if f64::from(pixel) > mean {
hash |= 1u64 << i;
}
}
PerceptualHash::new(hash, 64)
}
fn dct_2d(input: &[f64], rows: usize, cols: usize) -> Vec<f64> {
let mut output = vec![0.0; rows * cols];
for u in 0..rows {
for v in 0..cols {
let mut sum = 0.0;
for i in 0..rows {
for j in 0..cols {
let val = input[i * cols + j];
let cos_i = ((2 * i + 1) as f64 * u as f64 * std::f64::consts::PI
/ (2.0 * rows as f64))
.cos();
let cos_j = ((2 * j + 1) as f64 * v as f64 * std::f64::consts::PI
/ (2.0 * cols as f64))
.cos();
sum += val * cos_i * cos_j;
}
}
let cu = if u == 0 {
(1.0 / rows as f64).sqrt()
} else {
(2.0 / rows as f64).sqrt()
};
let cv = if v == 0 {
(1.0 / cols as f64).sqrt()
} else {
(2.0 / cols as f64).sqrt()
};
output[u * cols + v] = cu * cv * sum;
}
}
output
}
#[must_use]
pub fn compute_phash(image: &Image) -> PerceptualHash {
const HASH_SIZE: usize = 8;
const DCT_SIZE: usize = 32;
let gray = image.to_grayscale();
let resized = gray.resize(DCT_SIZE, DCT_SIZE);
let mut input = vec![0.0f64; DCT_SIZE * DCT_SIZE];
for y in 0..DCT_SIZE {
for x in 0..DCT_SIZE {
let idx = y * DCT_SIZE + x;
input[idx] = f64::from(resized.data[idx]);
}
}
let dct = dct_2d(&input, DCT_SIZE, DCT_SIZE);
let mut low_freq = Vec::new();
for y in 0..HASH_SIZE {
for x in 0..HASH_SIZE {
low_freq.push(dct[y * DCT_SIZE + x]);
}
}
let mut sorted = low_freq.clone();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let median = sorted[sorted.len() / 2];
let mut hash = 0u64;
for (i, &val) in low_freq.iter().enumerate() {
if val > median {
hash |= 1u64 << i;
}
}
PerceptualHash::new(hash, 64)
}
#[must_use]
pub fn compute_histogram(image: &Image) -> Vec<Vec<u32>> {
let mut histograms = vec![vec![0u32; 256]; image.channels];
for i in 0..image.data.len() {
let channel = i % image.channels;
let value = image.data[i] as usize;
histograms[channel][value] += 1;
}
histograms
}
#[must_use]
pub fn compare_histograms(hist1: &[Vec<u32>], hist2: &[Vec<u32>]) -> f64 {
if hist1.len() != hist2.len() {
return 0.0;
}
let mut correlations = Vec::new();
for (h1, h2) in hist1.iter().zip(hist2.iter()) {
let correlation = histogram_correlation(h1, h2);
correlations.push(correlation);
}
correlations.iter().sum::<f64>() / correlations.len() as f64
}
fn histogram_correlation(hist1: &[u32], hist2: &[u32]) -> f64 {
let mean1: f64 = hist1.iter().map(|&v| f64::from(v)).sum::<f64>() / hist1.len() as f64;
let mean2: f64 = hist2.iter().map(|&v| f64::from(v)).sum::<f64>() / hist2.len() as f64;
let mut numerator = 0.0;
let mut denom1 = 0.0;
let mut denom2 = 0.0;
for i in 0..hist1.len() {
let d1 = f64::from(hist1[i]) - mean1;
let d2 = f64::from(hist2[i]) - mean2;
numerator += d1 * d2;
denom1 += d1 * d1;
denom2 += d2 * d2;
}
if denom1 == 0.0 || denom2 == 0.0 {
return 0.0;
}
numerator / (denom1 * denom2).sqrt()
}
pub struct SsimParams {
pub window_size: usize,
pub k1: f64,
pub k2: f64,
pub l: f64,
}
impl Default for SsimParams {
fn default() -> Self {
Self {
window_size: 11,
k1: 0.01,
k2: 0.03,
l: 255.0,
}
}
}
#[must_use]
pub fn compute_ssim(image1: &Image, image2: &Image, params: &SsimParams) -> f64 {
let gray1 = image1.to_grayscale();
let gray2 = image2.to_grayscale();
let (width, height) = if gray1.width == gray2.width && gray1.height == gray2.height {
(gray1.width, gray1.height)
} else {
let min_width = gray1.width.min(gray2.width);
let min_height = gray1.height.min(gray2.height);
(min_width, min_height)
};
let img1 = if gray1.width != width || gray1.height != height {
gray1.resize(width, height)
} else {
gray1
};
let img2 = if gray2.width != width || gray2.height != height {
gray2.resize(width, height)
} else {
gray2
};
let c1 = (params.k1 * params.l).powi(2);
let c2 = (params.k2 * params.l).powi(2);
let mut ssim_sum = 0.0;
let mut count = 0;
let half_window = params.window_size / 2;
for y in half_window..height.saturating_sub(half_window) {
for x in half_window..width.saturating_sub(half_window) {
let window1 = extract_window(&img1, x, y, params.window_size);
let window2 = extract_window(&img2, x, y, params.window_size);
let mean1 = window_mean(&window1);
let mean2 = window_mean(&window2);
let var1 = window_variance(&window1, mean1);
let var2 = window_variance(&window2, mean2);
let covar = window_covariance(&window1, &window2, mean1, mean2);
let numerator = (2.0 * mean1 * mean2 + c1) * (2.0 * covar + c2);
let denominator = (mean1 * mean1 + mean2 * mean2 + c1) * (var1 + var2 + c2);
if denominator != 0.0 {
ssim_sum += numerator / denominator;
count += 1;
}
}
}
if count == 0 {
return 0.0;
}
ssim_sum / count as f64
}
fn extract_window(image: &Image, cx: usize, cy: usize, window_size: usize) -> Vec<f64> {
let half = window_size / 2;
let mut window = Vec::new();
for y in cy.saturating_sub(half)..=(cy + half).min(image.height - 1) {
for x in cx.saturating_sub(half)..=(cx + half).min(image.width - 1) {
let idx = y * image.width + x;
window.push(f64::from(image.data[idx]));
}
}
window
}
fn window_mean(window: &[f64]) -> f64 {
window.iter().sum::<f64>() / window.len() as f64
}
fn window_variance(window: &[f64], mean: f64) -> f64 {
let sum_sq: f64 = window.iter().map(|&v| (v - mean).powi(2)).sum();
sum_sq / window.len() as f64
}
fn window_covariance(window1: &[f64], window2: &[f64], mean1: f64, mean2: f64) -> f64 {
let sum: f64 = window1
.iter()
.zip(window2.iter())
.map(|(&v1, &v2)| (v1 - mean1) * (v2 - mean2))
.sum();
sum / window1.len() as f64
}
#[derive(Debug, Clone)]
pub struct FeaturePoint {
pub x: f64,
pub y: f64,
pub descriptor: Vec<f64>,
}
#[must_use]
pub fn extract_features(image: &Image) -> Vec<FeaturePoint> {
let gray = image.to_grayscale();
let mut features = Vec::new();
let threshold = 100.0;
for y in 2..gray.height - 2 {
for x in 2..gray.width - 2 {
let score = compute_corner_response(&gray, x, y);
if score > threshold {
let descriptor = compute_descriptor(&gray, x, y);
features.push(FeaturePoint {
x: x as f64,
y: y as f64,
descriptor,
});
}
}
}
features
}
fn compute_corner_response(image: &Image, x: usize, y: usize) -> f64 {
let idx = y * image.width + x;
let center = f64::from(image.data[idx]);
let mut sum = 0.0;
for dy in -1..=1 {
for dx in -1..=1 {
if dx == 0 && dy == 0 {
continue;
}
let nx = (x as i32 + dx) as usize;
let ny = (y as i32 + dy) as usize;
if nx < image.width && ny < image.height {
let nidx = ny * image.width + nx;
let diff = center - f64::from(image.data[nidx]);
sum += diff * diff;
}
}
}
sum
}
fn compute_descriptor(image: &Image, cx: usize, cy: usize) -> Vec<f64> {
const DESC_SIZE: usize = 8;
let mut descriptor = Vec::new();
for dy in -(DESC_SIZE as i32 / 2)..=(DESC_SIZE as i32 / 2) {
for dx in -(DESC_SIZE as i32 / 2)..=(DESC_SIZE as i32 / 2) {
let nx = (cx as i32 + dx).clamp(0, image.width as i32 - 1) as usize;
let ny = (cy as i32 + dy).clamp(0, image.height as i32 - 1) as usize;
let idx = ny * image.width + nx;
descriptor.push(f64::from(image.data[idx]));
}
}
let norm: f64 = descriptor.iter().map(|&v| v * v).sum::<f64>().sqrt();
if norm > 0.0 {
descriptor.iter_mut().for_each(|v| *v /= norm);
}
descriptor
}
#[must_use]
pub fn match_features(features1: &[FeaturePoint], features2: &[FeaturePoint]) -> usize {
let mut matches = 0;
const MATCH_THRESHOLD: f64 = 0.8;
for f1 in features1 {
let mut best_distance = f64::MAX;
let mut second_best = f64::MAX;
for f2 in features2 {
let distance = descriptor_distance(&f1.descriptor, &f2.descriptor);
if distance < best_distance {
second_best = best_distance;
best_distance = distance;
} else if distance < second_best {
second_best = distance;
}
}
if best_distance < MATCH_THRESHOLD * second_best {
matches += 1;
}
}
matches
}
fn descriptor_distance(desc1: &[f64], desc2: &[f64]) -> f64 {
desc1
.iter()
.zip(desc2.iter())
.map(|(a, b)| (a - b).powi(2))
.sum::<f64>()
.sqrt()
}
#[must_use]
pub fn compute_whash(image: &Image) -> PerceptualHash {
const HASH_SIZE: usize = 8;
let gray = image.to_grayscale();
let resized = gray.resize(HASH_SIZE, HASH_SIZE);
let mut row_transform = vec![0.0f64; HASH_SIZE * HASH_SIZE];
for y in 0..HASH_SIZE {
for x in 0..HASH_SIZE / 2 {
let idx1 = y * HASH_SIZE + 2 * x;
let idx2 = y * HASH_SIZE + 2 * x + 1;
let a = f64::from(resized.data[idx1]);
let b = f64::from(resized.data[idx2]);
row_transform[y * HASH_SIZE + x] = (a + b) / 2.0;
row_transform[y * HASH_SIZE + HASH_SIZE / 2 + x] = (a - b) / 2.0;
}
}
let mut wavelet = vec![0.0f64; HASH_SIZE * HASH_SIZE];
for x in 0..HASH_SIZE {
for y in 0..HASH_SIZE / 2 {
let idx1 = (2 * y) * HASH_SIZE + x;
let idx2 = (2 * y + 1) * HASH_SIZE + x;
let a = row_transform[idx1];
let b = row_transform[idx2];
wavelet[y * HASH_SIZE + x] = (a + b) / 2.0;
wavelet[(HASH_SIZE / 2 + y) * HASH_SIZE + x] = (a - b) / 2.0;
}
}
let mut sorted = wavelet.clone();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let median = sorted[sorted.len() / 2];
let mut hash = 0u64;
for (i, &val) in wavelet.iter().enumerate() {
if val > median {
hash |= 1u64 << i;
}
}
PerceptualHash::new(hash, 64)
}
pub fn compare_images(image1: &Image, image2: &Image) -> DedupResult<VisualSimilarity> {
let dhash1 = compute_dhash(image1);
let dhash2 = compute_dhash(image2);
let dhash_similarity = dhash1.similarity(&dhash2);
let ahash1 = compute_ahash(image1);
let ahash2 = compute_ahash(image2);
let ahash_similarity = ahash1.similarity(&ahash2);
let phash1 = compute_phash(image1);
let phash2 = compute_phash(image2);
let phash_similarity = phash1.similarity(&phash2);
let whash1 = compute_whash(image1);
let whash2 = compute_whash(image2);
let whash_similarity = whash1.similarity(&whash2);
let hist1 = compute_histogram(image1);
let hist2 = compute_histogram(image2);
let histogram_similarity = compare_histograms(&hist1, &hist2);
let ssim_params = SsimParams::default();
let ssim = compute_ssim(image1, image2, &ssim_params);
let features1 = extract_features(image1);
let features2 = extract_features(image2);
let feature_matches = match_features(&features1, &features2);
Ok(VisualSimilarity {
dhash_similarity,
ahash_similarity,
phash_similarity,
whash_similarity,
histogram_similarity,
ssim,
feature_matches,
})
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SsimConfig {
pub thumbnail_width: u32,
pub thumbnail_height: u32,
}
impl Default for SsimConfig {
fn default() -> Self {
Self {
thumbnail_width: 8,
thumbnail_height: 8,
}
}
}
pub fn find_ssim_duplicates_with_config(
files: &[std::path::PathBuf],
threshold: f64,
config: &SsimConfig,
) -> crate::DedupResult<Vec<crate::report::DuplicateGroup>> {
let tw = (config.thumbnail_width.max(4)) as usize;
let th = (config.thumbnail_height.max(4)) as usize;
let pixel_count = tw * th;
let mut images: Vec<(std::path::PathBuf, Image)> = Vec::new();
for path in files {
let bytes = match std::fs::read(path) {
Ok(b) => b,
Err(_) => continue,
};
let mut pixel_data = vec![0u8; pixel_count];
for (i, px) in pixel_data.iter_mut().enumerate() {
*px = if bytes.is_empty() {
0u8
} else {
bytes[i % bytes.len()]
};
}
if let Ok(img) = Image::from_data(tw, th, 1, pixel_data) {
images.push((path.clone(), img));
}
}
if images.len() < 2 {
return Ok(Vec::new());
}
let ssim_params = SsimParams::default();
let mut groups: Vec<crate::report::DuplicateGroup> = Vec::new();
let mut assigned = vec![false; images.len()];
for i in 0..images.len() {
if assigned[i] {
continue;
}
let mut group_files: Vec<String> = vec![images[i].0.to_string_lossy().to_string()];
let mut best_score = 0.0f64;
for j in (i + 1)..images.len() {
if assigned[j] {
continue;
}
let ssim = compute_ssim(&images[i].1, &images[j].1, &ssim_params);
if ssim >= threshold {
group_files.push(images[j].0.to_string_lossy().to_string());
assigned[j] = true;
if ssim > best_score {
best_score = ssim;
}
}
}
if group_files.len() > 1 {
assigned[i] = true;
groups.push(crate::report::DuplicateGroup {
files: group_files,
scores: vec![crate::report::SimilarityScore {
method: "ssim".to_string(),
score: best_score,
metadata: Vec::new(),
}],
});
}
}
Ok(groups)
}
pub fn find_ssim_duplicates(
files: &[std::path::PathBuf],
threshold: f64,
) -> crate::DedupResult<Vec<crate::report::DuplicateGroup>> {
find_ssim_duplicates_with_config(files, threshold, &SsimConfig::default())
}
#[derive(Debug, Clone)]
pub struct VisualSimilarity {
pub dhash_similarity: f64,
pub ahash_similarity: f64,
pub phash_similarity: f64,
pub whash_similarity: f64,
pub histogram_similarity: f64,
pub ssim: f64,
pub feature_matches: usize,
}
impl VisualSimilarity {
#[must_use]
pub fn overall_score(&self) -> f64 {
let hash_score = (self.dhash_similarity
+ self.ahash_similarity
+ self.phash_similarity
+ self.whash_similarity)
/ 4.0;
let feature_score = (self.feature_matches as f64 / 100.0).min(1.0);
hash_score * 0.3 + self.histogram_similarity * 0.2 + self.ssim * 0.3 + feature_score * 0.2
}
#[must_use]
pub fn is_similar(&self, threshold: f64) -> bool {
self.overall_score() >= threshold
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_image(width: usize, height: usize) -> Image {
let data = (0..width * height).map(|i| (i % 256) as u8).collect();
Image {
width,
height,
data,
channels: 1,
}
}
#[test]
fn test_image_creation() {
let img = Image::new(100, 100, 3);
assert_eq!(img.width, 100);
assert_eq!(img.height, 100);
assert_eq!(img.channels, 3);
assert_eq!(img.data.len(), 100 * 100 * 3);
}
#[test]
fn test_grayscale_conversion() {
let img = create_test_image(10, 10);
let gray = img.to_grayscale();
assert_eq!(gray.channels, 1);
assert_eq!(gray.width, 10);
assert_eq!(gray.height, 10);
}
#[test]
fn test_image_resize() {
let img = create_test_image(100, 100);
let resized = img.resize(50, 50);
assert_eq!(resized.width, 50);
assert_eq!(resized.height, 50);
}
#[test]
fn test_dhash() {
let img = create_test_image(64, 64);
let hash = compute_dhash(&img);
assert!(hash.hash() != 0);
}
#[test]
fn test_ahash() {
let img = create_test_image(64, 64);
let hash = compute_ahash(&img);
assert!(hash.hash() != 0);
}
#[test]
fn test_phash() {
let img = create_test_image(64, 64);
let hash = compute_phash(&img);
assert!(hash.hash() != 0);
}
#[test]
fn test_hash_similarity() {
let img1 = create_test_image(64, 64);
let img2 = create_test_image(64, 64);
let hash1 = compute_dhash(&img1);
let hash2 = compute_dhash(&img2);
assert_eq!(hash1.similarity(&hash2), 1.0);
}
#[test]
fn test_histogram() {
let img = create_test_image(10, 10);
let hist = compute_histogram(&img);
assert_eq!(hist.len(), 1); assert_eq!(hist[0].len(), 256);
}
#[test]
fn test_histogram_comparison() {
let img1 = create_test_image(10, 10);
let img2 = create_test_image(10, 10);
let hist1 = compute_histogram(&img1);
let hist2 = compute_histogram(&img2);
let similarity = compare_histograms(&hist1, &hist2);
assert!(similarity >= 0.0 && similarity <= 1.0);
}
#[test]
fn test_ssim() {
let img1 = create_test_image(64, 64);
let img2 = create_test_image(64, 64);
let params = SsimParams::default();
let ssim = compute_ssim(&img1, &img2, ¶ms);
assert!(ssim > 0.9);
}
#[test]
fn test_feature_extraction() {
let img = create_test_image(64, 64);
let features = extract_features(&img);
assert!(!features.is_empty());
for feature in &features {
assert!(!feature.descriptor.is_empty());
}
}
#[test]
fn test_feature_matching() {
let img = create_test_image(16, 16);
let features1 = extract_features(&img);
let features2 = extract_features(&img);
let matches = match_features(&features1, &features2);
assert!(matches > 0);
}
#[test]
fn test_whash() {
let img = create_test_image(64, 64);
let hash = compute_whash(&img);
assert!(hash.hash() != 0);
}
#[test]
fn test_whash_identical() {
let img = create_test_image(64, 64);
let h1 = compute_whash(&img);
let h2 = compute_whash(&img);
assert_eq!(h1.similarity(&h2), 1.0);
}
#[test]
fn test_whash_different() {
let img1 = create_test_image(64, 64);
let data: Vec<u8> = (0..64 * 64)
.map(|i| (255u16.saturating_sub((i * 3 % 256) as u16)) as u8)
.collect();
let img2 = Image {
width: 64,
height: 64,
data,
channels: 1,
};
let h1 = compute_whash(&img1);
let h2 = compute_whash(&img2);
assert!(
h1.hash() != h2.hash() || h1.similarity(&h2) <= 1.0,
"Clearly different images should produce distinct wHash values"
);
}
#[test]
fn test_whash_deterministic() {
let img = create_test_image(32, 32);
let h1 = compute_whash(&img);
let h2 = compute_whash(&img);
assert_eq!(h1.hash(), h2.hash());
}
#[test]
fn test_compare_images_includes_whash() {
let img = create_test_image(64, 64);
let result = compare_images(&img, &img).expect("should succeed");
assert!(result.whash_similarity > 0.9);
}
#[test]
fn test_ssim_config_default_is_8x8() {
let cfg = SsimConfig::default();
assert_eq!(cfg.thumbnail_width, 8);
assert_eq!(cfg.thumbnail_height, 8);
}
#[test]
fn test_ssim_config_custom_16x16() {
let config = SsimConfig {
thumbnail_width: 16,
thumbnail_height: 16,
};
let dir = std::env::temp_dir().join("oximedia_ssim_16x16");
let _ = std::fs::create_dir_all(&dir);
let f1 = dir.join("a.bin");
let f2 = dir.join("b.bin");
std::fs::write(&f1, &[128u8; 256]).expect("write f1");
std::fs::write(&f2, &[200u8; 256]).expect("write f2");
let result = find_ssim_duplicates_with_config(&[f1, f2], 0.5, &config);
assert!(result.is_ok(), "16x16 config should run without error");
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn test_ssim_config_default_matches_legacy() {
let dir = std::env::temp_dir().join("oximedia_ssim_legacy");
let _ = std::fs::create_dir_all(&dir);
let f1 = dir.join("a.bin");
let f2 = dir.join("b.bin");
std::fs::write(&f1, &[64u8; 64]).expect("write f1");
std::fs::write(&f2, &[64u8; 64]).expect("write f2");
let r1 =
find_ssim_duplicates(&[f1.clone(), f2.clone()], 0.5).expect("legacy should succeed");
let r2 = find_ssim_duplicates_with_config(&[f1, f2], 0.5, &SsimConfig::default())
.expect("config should succeed");
assert_eq!(r1.len(), r2.len(), "default config should match legacy");
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn test_ssim_duplicates_identical_files_grouped() {
let dir = std::env::temp_dir().join("oximedia_ssim_identical");
let _ = std::fs::create_dir_all(&dir);
let f1 = dir.join("same_a.bin");
let f2 = dir.join("same_b.bin");
let config = SsimConfig {
thumbnail_width: 32,
thumbnail_height: 32,
};
let payload = vec![42u8; 1024];
std::fs::write(&f1, &payload).expect("write f1");
std::fs::write(&f2, &payload).expect("write f2");
let groups =
find_ssim_duplicates_with_config(&[f1, f2], 0.9, &config).expect("should succeed");
assert_eq!(groups.len(), 1, "identical files should form one group");
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn test_ssim_single_file_returns_empty() {
let dir = std::env::temp_dir().join("oximedia_ssim_single");
let _ = std::fs::create_dir_all(&dir);
let f1 = dir.join("solo.bin");
std::fs::write(&f1, &[0u8; 32]).expect("write");
let groups = find_ssim_duplicates(&[f1], 0.5).expect("should succeed");
assert!(groups.is_empty(), "single file cannot form a group");
let _ = std::fs::remove_dir_all(&dir);
}
}