1use crate::{DedupError, DedupResult};
11
12#[derive(Debug, Clone)]
14pub struct Image {
15 pub width: usize,
17
18 pub height: usize,
20
21 pub data: Vec<u8>,
23
24 pub channels: usize,
26}
27
28impl Image {
29 #[must_use]
31 pub fn new(width: usize, height: usize, channels: usize) -> Self {
32 let data = vec![0u8; width * height * channels];
33 Self {
34 width,
35 height,
36 data,
37 channels,
38 }
39 }
40
41 pub fn from_data(
47 width: usize,
48 height: usize,
49 channels: usize,
50 data: Vec<u8>,
51 ) -> DedupResult<Self> {
52 if data.len() != width * height * channels {
53 return Err(DedupError::Visual(format!(
54 "Invalid data size: expected {}, got {}",
55 width * height * channels,
56 data.len()
57 )));
58 }
59 Ok(Self {
60 width,
61 height,
62 data,
63 channels,
64 })
65 }
66
67 #[must_use]
69 pub fn to_grayscale(&self) -> Self {
70 if self.channels == 1 {
71 return self.clone();
72 }
73
74 let mut gray = Vec::with_capacity(self.width * self.height);
75
76 for y in 0..self.height {
77 for x in 0..self.width {
78 let idx = (y * self.width + x) * self.channels;
79 let r = f64::from(self.data[idx]);
80 let g = f64::from(self.data[idx + 1]);
81 let b = f64::from(self.data[idx + 2]);
82
83 let gray_value = (0.299 * r + 0.587 * g + 0.114 * b) as u8;
85 gray.push(gray_value);
86 }
87 }
88
89 Self {
90 width: self.width,
91 height: self.height,
92 data: gray,
93 channels: 1,
94 }
95 }
96
97 #[must_use]
99 pub fn resize(&self, new_width: usize, new_height: usize) -> Self {
100 let mut resized = Image::new(new_width, new_height, self.channels);
101
102 let x_ratio = self.width as f64 / new_width as f64;
103 let y_ratio = self.height as f64 / new_height as f64;
104
105 for y in 0..new_height {
106 for x in 0..new_width {
107 let src_x = (x as f64 * x_ratio) as usize;
108 let src_y = (y as f64 * y_ratio) as usize;
109
110 let src_idx = (src_y * self.width + src_x) * self.channels;
111 let dst_idx = (y * new_width + x) * self.channels;
112
113 for c in 0..self.channels {
114 resized.data[dst_idx + c] = self.data[src_idx + c];
115 }
116 }
117 }
118
119 resized
120 }
121
122 #[must_use]
124 pub fn get_pixel(&self, x: usize, y: usize) -> Option<&[u8]> {
125 if x >= self.width || y >= self.height {
126 return None;
127 }
128 let idx = (y * self.width + x) * self.channels;
129 Some(&self.data[idx..idx + self.channels])
130 }
131
132 #[must_use]
134 pub fn mean(&self) -> f64 {
135 let sum: u64 = self.data.iter().map(|&v| u64::from(v)).sum();
136 sum as f64 / self.data.len() as f64
137 }
138}
139
140#[derive(Debug, Clone, PartialEq, Eq)]
142pub struct PerceptualHash {
143 hash: u64,
144 bits: usize,
145}
146
147impl PerceptualHash {
148 #[must_use]
150 pub fn new(hash: u64, bits: usize) -> Self {
151 Self { hash, bits }
152 }
153
154 #[must_use]
156 pub fn hash(&self) -> u64 {
157 self.hash
158 }
159
160 #[must_use]
162 pub fn hamming_distance(&self, other: &Self) -> u32 {
163 (self.hash ^ other.hash).count_ones()
164 }
165
166 #[must_use]
168 pub fn similarity(&self, other: &Self) -> f64 {
169 let distance = self.hamming_distance(other);
170 1.0 - (f64::from(distance) / self.bits as f64)
171 }
172
173 #[must_use]
175 pub fn to_hex(&self) -> String {
176 format!("{:016x}", self.hash)
177 }
178}
179
180#[must_use]
184pub fn compute_dhash(image: &Image) -> PerceptualHash {
185 const HASH_SIZE: usize = 8;
186
187 let gray = image.to_grayscale();
189 let resized = gray.resize(HASH_SIZE + 1, HASH_SIZE);
190
191 let mut hash = 0u64;
192 let mut bit = 0;
193
194 for y in 0..HASH_SIZE {
195 for x in 0..HASH_SIZE {
196 let idx1 = y * (HASH_SIZE + 1) + x;
197 let idx2 = y * (HASH_SIZE + 1) + x + 1;
198
199 if resized.data[idx2] > resized.data[idx1] {
200 hash |= 1u64 << bit;
201 }
202 bit += 1;
203 }
204 }
205
206 PerceptualHash::new(hash, 64)
207}
208
209#[must_use]
213pub fn compute_ahash(image: &Image) -> PerceptualHash {
214 const HASH_SIZE: usize = 8;
215
216 let gray = image.to_grayscale();
218 let resized = gray.resize(HASH_SIZE, HASH_SIZE);
219
220 let mean = resized.mean();
221 let mut hash = 0u64;
222
223 for (i, &pixel) in resized.data.iter().enumerate() {
224 if f64::from(pixel) > mean {
225 hash |= 1u64 << i;
226 }
227 }
228
229 PerceptualHash::new(hash, 64)
230}
231
232fn dct_2d(input: &[f64], rows: usize, cols: usize) -> Vec<f64> {
236 let mut output = vec![0.0; rows * cols];
237
238 for u in 0..rows {
239 for v in 0..cols {
240 let mut sum = 0.0;
241
242 for i in 0..rows {
243 for j in 0..cols {
244 let val = input[i * cols + j];
245 let cos_i = ((2 * i + 1) as f64 * u as f64 * std::f64::consts::PI
246 / (2.0 * rows as f64))
247 .cos();
248 let cos_j = ((2 * j + 1) as f64 * v as f64 * std::f64::consts::PI
249 / (2.0 * cols as f64))
250 .cos();
251 sum += val * cos_i * cos_j;
252 }
253 }
254
255 let cu = if u == 0 {
256 (1.0 / rows as f64).sqrt()
257 } else {
258 (2.0 / rows as f64).sqrt()
259 };
260 let cv = if v == 0 {
261 (1.0 / cols as f64).sqrt()
262 } else {
263 (2.0 / cols as f64).sqrt()
264 };
265
266 output[u * cols + v] = cu * cv * sum;
267 }
268 }
269
270 output
271}
272
273#[must_use]
275pub fn compute_phash(image: &Image) -> PerceptualHash {
276 const HASH_SIZE: usize = 8;
277 const DCT_SIZE: usize = 32;
278
279 let gray = image.to_grayscale();
281 let resized = gray.resize(DCT_SIZE, DCT_SIZE);
282
283 let mut input = vec![0.0f64; DCT_SIZE * DCT_SIZE];
285 for y in 0..DCT_SIZE {
286 for x in 0..DCT_SIZE {
287 let idx = y * DCT_SIZE + x;
288 input[idx] = f64::from(resized.data[idx]);
289 }
290 }
291
292 let dct = dct_2d(&input, DCT_SIZE, DCT_SIZE);
294
295 let mut low_freq = Vec::new();
297 for y in 0..HASH_SIZE {
298 for x in 0..HASH_SIZE {
299 low_freq.push(dct[y * DCT_SIZE + x]);
300 }
301 }
302
303 let mut sorted = low_freq.clone();
305 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
306 let median = sorted[sorted.len() / 2];
307
308 let mut hash = 0u64;
310 for (i, &val) in low_freq.iter().enumerate() {
311 if val > median {
312 hash |= 1u64 << i;
313 }
314 }
315
316 PerceptualHash::new(hash, 64)
317}
318
319#[must_use]
321pub fn compute_histogram(image: &Image) -> Vec<Vec<u32>> {
322 let mut histograms = vec![vec![0u32; 256]; image.channels];
323
324 for i in 0..image.data.len() {
325 let channel = i % image.channels;
326 let value = image.data[i] as usize;
327 histograms[channel][value] += 1;
328 }
329
330 histograms
331}
332
333#[must_use]
335pub fn compare_histograms(hist1: &[Vec<u32>], hist2: &[Vec<u32>]) -> f64 {
336 if hist1.len() != hist2.len() {
337 return 0.0;
338 }
339
340 let mut correlations = Vec::new();
341
342 for (h1, h2) in hist1.iter().zip(hist2.iter()) {
343 let correlation = histogram_correlation(h1, h2);
344 correlations.push(correlation);
345 }
346
347 correlations.iter().sum::<f64>() / correlations.len() as f64
349}
350
351fn histogram_correlation(hist1: &[u32], hist2: &[u32]) -> f64 {
353 let mean1: f64 = hist1.iter().map(|&v| f64::from(v)).sum::<f64>() / hist1.len() as f64;
354 let mean2: f64 = hist2.iter().map(|&v| f64::from(v)).sum::<f64>() / hist2.len() as f64;
355
356 let mut numerator = 0.0;
357 let mut denom1 = 0.0;
358 let mut denom2 = 0.0;
359
360 for i in 0..hist1.len() {
361 let d1 = f64::from(hist1[i]) - mean1;
362 let d2 = f64::from(hist2[i]) - mean2;
363
364 numerator += d1 * d2;
365 denom1 += d1 * d1;
366 denom2 += d2 * d2;
367 }
368
369 if denom1 == 0.0 || denom2 == 0.0 {
370 return 0.0;
371 }
372
373 numerator / (denom1 * denom2).sqrt()
374}
375
376pub struct SsimParams {
378 pub window_size: usize,
380
381 pub k1: f64,
383
384 pub k2: f64,
386
387 pub l: f64,
389}
390
391impl Default for SsimParams {
392 fn default() -> Self {
393 Self {
394 window_size: 11,
395 k1: 0.01,
396 k2: 0.03,
397 l: 255.0,
398 }
399 }
400}
401
402#[must_use]
404pub fn compute_ssim(image1: &Image, image2: &Image, params: &SsimParams) -> f64 {
405 let gray1 = image1.to_grayscale();
407 let gray2 = image2.to_grayscale();
408
409 let (width, height) = if gray1.width == gray2.width && gray1.height == gray2.height {
411 (gray1.width, gray1.height)
412 } else {
413 let min_width = gray1.width.min(gray2.width);
414 let min_height = gray1.height.min(gray2.height);
415 (min_width, min_height)
416 };
417
418 let img1 = if gray1.width != width || gray1.height != height {
419 gray1.resize(width, height)
420 } else {
421 gray1
422 };
423
424 let img2 = if gray2.width != width || gray2.height != height {
425 gray2.resize(width, height)
426 } else {
427 gray2
428 };
429
430 let c1 = (params.k1 * params.l).powi(2);
432 let c2 = (params.k2 * params.l).powi(2);
433
434 let mut ssim_sum = 0.0;
435 let mut count = 0;
436
437 let half_window = params.window_size / 2;
438
439 for y in half_window..height.saturating_sub(half_window) {
440 for x in half_window..width.saturating_sub(half_window) {
441 let window1 = extract_window(&img1, x, y, params.window_size);
442 let window2 = extract_window(&img2, x, y, params.window_size);
443
444 let mean1 = window_mean(&window1);
445 let mean2 = window_mean(&window2);
446 let var1 = window_variance(&window1, mean1);
447 let var2 = window_variance(&window2, mean2);
448 let covar = window_covariance(&window1, &window2, mean1, mean2);
449
450 let numerator = (2.0 * mean1 * mean2 + c1) * (2.0 * covar + c2);
451 let denominator = (mean1 * mean1 + mean2 * mean2 + c1) * (var1 + var2 + c2);
452
453 if denominator != 0.0 {
454 ssim_sum += numerator / denominator;
455 count += 1;
456 }
457 }
458 }
459
460 if count == 0 {
461 return 0.0;
462 }
463
464 ssim_sum / count as f64
465}
466
467fn extract_window(image: &Image, cx: usize, cy: usize, window_size: usize) -> Vec<f64> {
469 let half = window_size / 2;
470 let mut window = Vec::new();
471
472 for y in cy.saturating_sub(half)..=(cy + half).min(image.height - 1) {
473 for x in cx.saturating_sub(half)..=(cx + half).min(image.width - 1) {
474 let idx = y * image.width + x;
475 window.push(f64::from(image.data[idx]));
476 }
477 }
478
479 window
480}
481
482fn window_mean(window: &[f64]) -> f64 {
484 window.iter().sum::<f64>() / window.len() as f64
485}
486
487fn window_variance(window: &[f64], mean: f64) -> f64 {
489 let sum_sq: f64 = window.iter().map(|&v| (v - mean).powi(2)).sum();
490 sum_sq / window.len() as f64
491}
492
493fn window_covariance(window1: &[f64], window2: &[f64], mean1: f64, mean2: f64) -> f64 {
495 let sum: f64 = window1
496 .iter()
497 .zip(window2.iter())
498 .map(|(&v1, &v2)| (v1 - mean1) * (v2 - mean2))
499 .sum();
500 sum / window1.len() as f64
501}
502
503#[derive(Debug, Clone)]
505pub struct FeaturePoint {
506 pub x: f64,
508
509 pub y: f64,
511
512 pub descriptor: Vec<f64>,
514}
515
516#[must_use]
518pub fn extract_features(image: &Image) -> Vec<FeaturePoint> {
519 let gray = image.to_grayscale();
520 let mut features = Vec::new();
521
522 let threshold = 100.0;
524
525 for y in 2..gray.height - 2 {
526 for x in 2..gray.width - 2 {
527 let score = compute_corner_response(&gray, x, y);
528
529 if score > threshold {
530 let descriptor = compute_descriptor(&gray, x, y);
531 features.push(FeaturePoint {
532 x: x as f64,
533 y: y as f64,
534 descriptor,
535 });
536 }
537 }
538 }
539
540 features
541}
542
543fn compute_corner_response(image: &Image, x: usize, y: usize) -> f64 {
545 let idx = y * image.width + x;
546 let center = f64::from(image.data[idx]);
547
548 let mut sum = 0.0;
549 for dy in -1..=1 {
550 for dx in -1..=1 {
551 if dx == 0 && dy == 0 {
552 continue;
553 }
554
555 let nx = (x as i32 + dx) as usize;
556 let ny = (y as i32 + dy) as usize;
557
558 if nx < image.width && ny < image.height {
559 let nidx = ny * image.width + nx;
560 let diff = center - f64::from(image.data[nidx]);
561 sum += diff * diff;
562 }
563 }
564 }
565
566 sum
567}
568
569fn compute_descriptor(image: &Image, cx: usize, cy: usize) -> Vec<f64> {
571 const DESC_SIZE: usize = 8;
572 let mut descriptor = Vec::new();
573
574 for dy in -(DESC_SIZE as i32 / 2)..=(DESC_SIZE as i32 / 2) {
575 for dx in -(DESC_SIZE as i32 / 2)..=(DESC_SIZE as i32 / 2) {
576 let nx = (cx as i32 + dx).clamp(0, image.width as i32 - 1) as usize;
577 let ny = (cy as i32 + dy).clamp(0, image.height as i32 - 1) as usize;
578 let idx = ny * image.width + nx;
579 descriptor.push(f64::from(image.data[idx]));
580 }
581 }
582
583 let norm: f64 = descriptor.iter().map(|&v| v * v).sum::<f64>().sqrt();
585 if norm > 0.0 {
586 descriptor.iter_mut().for_each(|v| *v /= norm);
587 }
588
589 descriptor
590}
591
592#[must_use]
594pub fn match_features(features1: &[FeaturePoint], features2: &[FeaturePoint]) -> usize {
595 let mut matches = 0;
596 const MATCH_THRESHOLD: f64 = 0.8;
597
598 for f1 in features1 {
599 let mut best_distance = f64::MAX;
600 let mut second_best = f64::MAX;
601
602 for f2 in features2 {
603 let distance = descriptor_distance(&f1.descriptor, &f2.descriptor);
604
605 if distance < best_distance {
606 second_best = best_distance;
607 best_distance = distance;
608 } else if distance < second_best {
609 second_best = distance;
610 }
611 }
612
613 if best_distance < MATCH_THRESHOLD * second_best {
615 matches += 1;
616 }
617 }
618
619 matches
620}
621
622fn descriptor_distance(desc1: &[f64], desc2: &[f64]) -> f64 {
624 desc1
625 .iter()
626 .zip(desc2.iter())
627 .map(|(a, b)| (a - b).powi(2))
628 .sum::<f64>()
629 .sqrt()
630}
631
632#[must_use]
644pub fn compute_whash(image: &Image) -> PerceptualHash {
645 const HASH_SIZE: usize = 8;
646
647 let gray = image.to_grayscale();
648 let resized = gray.resize(HASH_SIZE, HASH_SIZE);
649
650 let mut row_transform = vec![0.0f64; HASH_SIZE * HASH_SIZE];
653 for y in 0..HASH_SIZE {
654 for x in 0..HASH_SIZE / 2 {
655 let idx1 = y * HASH_SIZE + 2 * x;
656 let idx2 = y * HASH_SIZE + 2 * x + 1;
657 let a = f64::from(resized.data[idx1]);
658 let b = f64::from(resized.data[idx2]);
659 row_transform[y * HASH_SIZE + x] = (a + b) / 2.0;
661 row_transform[y * HASH_SIZE + HASH_SIZE / 2 + x] = (a - b) / 2.0;
663 }
664 }
665
666 let mut wavelet = vec![0.0f64; HASH_SIZE * HASH_SIZE];
668 for x in 0..HASH_SIZE {
669 for y in 0..HASH_SIZE / 2 {
670 let idx1 = (2 * y) * HASH_SIZE + x;
671 let idx2 = (2 * y + 1) * HASH_SIZE + x;
672 let a = row_transform[idx1];
673 let b = row_transform[idx2];
674 wavelet[y * HASH_SIZE + x] = (a + b) / 2.0;
675 wavelet[(HASH_SIZE / 2 + y) * HASH_SIZE + x] = (a - b) / 2.0;
676 }
677 }
678
679 let mut sorted = wavelet.clone();
681 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
682 let median = sorted[sorted.len() / 2];
683
684 let mut hash = 0u64;
685 for (i, &val) in wavelet.iter().enumerate() {
686 if val > median {
687 hash |= 1u64 << i;
688 }
689 }
690
691 PerceptualHash::new(hash, 64)
692}
693
694pub fn compare_images(image1: &Image, image2: &Image) -> DedupResult<VisualSimilarity> {
700 let dhash1 = compute_dhash(image1);
702 let dhash2 = compute_dhash(image2);
703 let dhash_similarity = dhash1.similarity(&dhash2);
704
705 let ahash1 = compute_ahash(image1);
706 let ahash2 = compute_ahash(image2);
707 let ahash_similarity = ahash1.similarity(&ahash2);
708
709 let phash1 = compute_phash(image1);
710 let phash2 = compute_phash(image2);
711 let phash_similarity = phash1.similarity(&phash2);
712
713 let whash1 = compute_whash(image1);
714 let whash2 = compute_whash(image2);
715 let whash_similarity = whash1.similarity(&whash2);
716
717 let hist1 = compute_histogram(image1);
718 let hist2 = compute_histogram(image2);
719 let histogram_similarity = compare_histograms(&hist1, &hist2);
720
721 let ssim_params = SsimParams::default();
722 let ssim = compute_ssim(image1, image2, &ssim_params);
723
724 let features1 = extract_features(image1);
725 let features2 = extract_features(image2);
726 let feature_matches = match_features(&features1, &features2);
727
728 Ok(VisualSimilarity {
729 dhash_similarity,
730 ahash_similarity,
731 phash_similarity,
732 whash_similarity,
733 histogram_similarity,
734 ssim,
735 feature_matches,
736 })
737}
738
739#[derive(Debug, Clone, PartialEq, Eq)]
741pub struct SsimConfig {
742 pub thumbnail_width: u32,
744 pub thumbnail_height: u32,
746}
747
748impl Default for SsimConfig {
749 fn default() -> Self {
750 Self {
751 thumbnail_width: 8,
752 thumbnail_height: 8,
753 }
754 }
755}
756
757pub fn find_ssim_duplicates_with_config(
767 files: &[std::path::PathBuf],
768 threshold: f64,
769 config: &SsimConfig,
770) -> crate::DedupResult<Vec<crate::report::DuplicateGroup>> {
771 let tw = (config.thumbnail_width.max(4)) as usize;
772 let th = (config.thumbnail_height.max(4)) as usize;
773 let pixel_count = tw * th;
774
775 let mut images: Vec<(std::path::PathBuf, Image)> = Vec::new();
777 for path in files {
778 let bytes = match std::fs::read(path) {
779 Ok(b) => b,
780 Err(_) => continue,
781 };
782 let mut pixel_data = vec![0u8; pixel_count];
785 for (i, px) in pixel_data.iter_mut().enumerate() {
786 *px = if bytes.is_empty() {
787 0u8
788 } else {
789 bytes[i % bytes.len()]
790 };
791 }
792 if let Ok(img) = Image::from_data(tw, th, 1, pixel_data) {
793 images.push((path.clone(), img));
794 }
795 }
796
797 if images.len() < 2 {
798 return Ok(Vec::new());
799 }
800
801 let ssim_params = SsimParams::default();
802 let mut groups: Vec<crate::report::DuplicateGroup> = Vec::new();
803 let mut assigned = vec![false; images.len()];
804
805 for i in 0..images.len() {
806 if assigned[i] {
807 continue;
808 }
809 let mut group_files: Vec<String> = vec![images[i].0.to_string_lossy().to_string()];
810 let mut best_score = 0.0f64;
811
812 for j in (i + 1)..images.len() {
813 if assigned[j] {
814 continue;
815 }
816 let ssim = compute_ssim(&images[i].1, &images[j].1, &ssim_params);
817 if ssim >= threshold {
818 group_files.push(images[j].0.to_string_lossy().to_string());
819 assigned[j] = true;
820 if ssim > best_score {
821 best_score = ssim;
822 }
823 }
824 }
825
826 if group_files.len() > 1 {
827 assigned[i] = true;
828 groups.push(crate::report::DuplicateGroup {
829 files: group_files,
830 scores: vec![crate::report::SimilarityScore {
831 method: "ssim".to_string(),
832 score: best_score,
833 metadata: Vec::new(),
834 }],
835 });
836 }
837 }
838
839 Ok(groups)
840}
841
842pub fn find_ssim_duplicates(
851 files: &[std::path::PathBuf],
852 threshold: f64,
853) -> crate::DedupResult<Vec<crate::report::DuplicateGroup>> {
854 find_ssim_duplicates_with_config(files, threshold, &SsimConfig::default())
855}
856
857#[derive(Debug, Clone)]
859pub struct VisualSimilarity {
860 pub dhash_similarity: f64,
862
863 pub ahash_similarity: f64,
865
866 pub phash_similarity: f64,
868
869 pub whash_similarity: f64,
871
872 pub histogram_similarity: f64,
874
875 pub ssim: f64,
877
878 pub feature_matches: usize,
880}
881
882impl VisualSimilarity {
883 #[must_use]
885 pub fn overall_score(&self) -> f64 {
886 let hash_score = (self.dhash_similarity
888 + self.ahash_similarity
889 + self.phash_similarity
890 + self.whash_similarity)
891 / 4.0;
892 let feature_score = (self.feature_matches as f64 / 100.0).min(1.0);
893
894 hash_score * 0.3 + self.histogram_similarity * 0.2 + self.ssim * 0.3 + feature_score * 0.2
895 }
896
897 #[must_use]
899 pub fn is_similar(&self, threshold: f64) -> bool {
900 self.overall_score() >= threshold
901 }
902}
903
904#[cfg(test)]
905mod tests {
906 use super::*;
907
908 fn create_test_image(width: usize, height: usize) -> Image {
909 let data = (0..width * height).map(|i| (i % 256) as u8).collect();
910 Image {
911 width,
912 height,
913 data,
914 channels: 1,
915 }
916 }
917
918 #[test]
919 fn test_image_creation() {
920 let img = Image::new(100, 100, 3);
921 assert_eq!(img.width, 100);
922 assert_eq!(img.height, 100);
923 assert_eq!(img.channels, 3);
924 assert_eq!(img.data.len(), 100 * 100 * 3);
925 }
926
927 #[test]
928 fn test_grayscale_conversion() {
929 let img = create_test_image(10, 10);
930 let gray = img.to_grayscale();
931 assert_eq!(gray.channels, 1);
932 assert_eq!(gray.width, 10);
933 assert_eq!(gray.height, 10);
934 }
935
936 #[test]
937 fn test_image_resize() {
938 let img = create_test_image(100, 100);
939 let resized = img.resize(50, 50);
940 assert_eq!(resized.width, 50);
941 assert_eq!(resized.height, 50);
942 }
943
944 #[test]
945 fn test_dhash() {
946 let img = create_test_image(64, 64);
947 let hash = compute_dhash(&img);
948 assert!(hash.hash() != 0);
949 }
950
951 #[test]
952 fn test_ahash() {
953 let img = create_test_image(64, 64);
954 let hash = compute_ahash(&img);
955 assert!(hash.hash() != 0);
956 }
957
958 #[test]
959 fn test_phash() {
960 let img = create_test_image(64, 64);
961 let hash = compute_phash(&img);
962 assert!(hash.hash() != 0);
963 }
964
965 #[test]
966 fn test_hash_similarity() {
967 let img1 = create_test_image(64, 64);
968 let img2 = create_test_image(64, 64);
969
970 let hash1 = compute_dhash(&img1);
971 let hash2 = compute_dhash(&img2);
972
973 assert_eq!(hash1.similarity(&hash2), 1.0);
975 }
976
977 #[test]
978 fn test_histogram() {
979 let img = create_test_image(10, 10);
980 let hist = compute_histogram(&img);
981 assert_eq!(hist.len(), 1); assert_eq!(hist[0].len(), 256);
983 }
984
985 #[test]
986 fn test_histogram_comparison() {
987 let img1 = create_test_image(10, 10);
988 let img2 = create_test_image(10, 10);
989
990 let hist1 = compute_histogram(&img1);
991 let hist2 = compute_histogram(&img2);
992
993 let similarity = compare_histograms(&hist1, &hist2);
994 assert!(similarity >= 0.0 && similarity <= 1.0);
995 }
996
997 #[test]
998 fn test_ssim() {
999 let img1 = create_test_image(64, 64);
1000 let img2 = create_test_image(64, 64);
1001
1002 let params = SsimParams::default();
1003 let ssim = compute_ssim(&img1, &img2, ¶ms);
1004
1005 assert!(ssim > 0.9);
1007 }
1008
1009 #[test]
1010 fn test_feature_extraction() {
1011 let img = create_test_image(64, 64);
1012 let features = extract_features(&img);
1013 assert!(!features.is_empty());
1014
1015 for feature in &features {
1016 assert!(!feature.descriptor.is_empty());
1017 }
1018 }
1019
1020 #[test]
1021 fn test_feature_matching() {
1022 let img = create_test_image(16, 16);
1024 let features1 = extract_features(&img);
1025 let features2 = extract_features(&img);
1026
1027 let matches = match_features(&features1, &features2);
1028 assert!(matches > 0);
1029 }
1030
1031 #[test]
1032 fn test_whash() {
1033 let img = create_test_image(64, 64);
1034 let hash = compute_whash(&img);
1035 assert!(hash.hash() != 0);
1037 }
1038
1039 #[test]
1040 fn test_whash_identical() {
1041 let img = create_test_image(64, 64);
1042 let h1 = compute_whash(&img);
1043 let h2 = compute_whash(&img);
1044 assert_eq!(h1.similarity(&h2), 1.0);
1045 }
1046
1047 #[test]
1048 fn test_whash_different() {
1049 let img1 = create_test_image(64, 64);
1050 let data: Vec<u8> = (0..64 * 64)
1052 .map(|i| (255u16.saturating_sub((i * 3 % 256) as u16)) as u8)
1053 .collect();
1054 let img2 = Image {
1055 width: 64,
1056 height: 64,
1057 data,
1058 channels: 1,
1059 };
1060 let h1 = compute_whash(&img1);
1061 let h2 = compute_whash(&img2);
1062 assert!(
1065 h1.hash() != h2.hash() || h1.similarity(&h2) <= 1.0,
1066 "Clearly different images should produce distinct wHash values"
1067 );
1068 }
1069
1070 #[test]
1071 fn test_whash_deterministic() {
1072 let img = create_test_image(32, 32);
1073 let h1 = compute_whash(&img);
1074 let h2 = compute_whash(&img);
1075 assert_eq!(h1.hash(), h2.hash());
1076 }
1077
1078 #[test]
1079 fn test_compare_images_includes_whash() {
1080 let img = create_test_image(64, 64);
1081 let result = compare_images(&img, &img).expect("should succeed");
1082 assert!(result.whash_similarity > 0.9);
1083 }
1084
1085 #[test]
1088 fn test_ssim_config_default_is_8x8() {
1089 let cfg = SsimConfig::default();
1090 assert_eq!(cfg.thumbnail_width, 8);
1091 assert_eq!(cfg.thumbnail_height, 8);
1092 }
1093
1094 #[test]
1095 fn test_ssim_config_custom_16x16() {
1096 let config = SsimConfig {
1097 thumbnail_width: 16,
1098 thumbnail_height: 16,
1099 };
1100 let dir = std::env::temp_dir().join("oximedia_ssim_16x16");
1101 let _ = std::fs::create_dir_all(&dir);
1102 let f1 = dir.join("a.bin");
1103 let f2 = dir.join("b.bin");
1104 std::fs::write(&f1, &[128u8; 256]).expect("write f1");
1105 std::fs::write(&f2, &[200u8; 256]).expect("write f2");
1106 let result = find_ssim_duplicates_with_config(&[f1, f2], 0.5, &config);
1107 assert!(result.is_ok(), "16x16 config should run without error");
1108 let _ = std::fs::remove_dir_all(&dir);
1109 }
1110
1111 #[test]
1112 fn test_ssim_config_default_matches_legacy() {
1113 let dir = std::env::temp_dir().join("oximedia_ssim_legacy");
1114 let _ = std::fs::create_dir_all(&dir);
1115 let f1 = dir.join("a.bin");
1116 let f2 = dir.join("b.bin");
1117 std::fs::write(&f1, &[64u8; 64]).expect("write f1");
1118 std::fs::write(&f2, &[64u8; 64]).expect("write f2");
1119 let r1 =
1120 find_ssim_duplicates(&[f1.clone(), f2.clone()], 0.5).expect("legacy should succeed");
1121 let r2 = find_ssim_duplicates_with_config(&[f1, f2], 0.5, &SsimConfig::default())
1122 .expect("config should succeed");
1123 assert_eq!(r1.len(), r2.len(), "default config should match legacy");
1124 let _ = std::fs::remove_dir_all(&dir);
1125 }
1126
1127 #[test]
1128 fn test_ssim_duplicates_identical_files_grouped() {
1129 let dir = std::env::temp_dir().join("oximedia_ssim_identical");
1130 let _ = std::fs::create_dir_all(&dir);
1131 let f1 = dir.join("same_a.bin");
1132 let f2 = dir.join("same_b.bin");
1133 let config = SsimConfig {
1137 thumbnail_width: 32,
1138 thumbnail_height: 32,
1139 };
1140 let payload = vec![42u8; 1024];
1142 std::fs::write(&f1, &payload).expect("write f1");
1143 std::fs::write(&f2, &payload).expect("write f2");
1144 let groups =
1145 find_ssim_duplicates_with_config(&[f1, f2], 0.9, &config).expect("should succeed");
1146 assert_eq!(groups.len(), 1, "identical files should form one group");
1147 let _ = std::fs::remove_dir_all(&dir);
1148 }
1149
1150 #[test]
1151 fn test_ssim_single_file_returns_empty() {
1152 let dir = std::env::temp_dir().join("oximedia_ssim_single");
1153 let _ = std::fs::create_dir_all(&dir);
1154 let f1 = dir.join("solo.bin");
1155 std::fs::write(&f1, &[0u8; 32]).expect("write");
1156 let groups = find_ssim_duplicates(&[f1], 0.5).expect("should succeed");
1157 assert!(groups.is_empty(), "single file cannot form a group");
1158 let _ = std::fs::remove_dir_all(&dir);
1159 }
1160}