dedcore 0.1.0

A high-performance deduplication tool
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
//! Module for file similarity detection and comparison.
//! 
//! This module provides functionality for comparing text and image files using various algorithms.
//! It includes implementations of:
//! - Levenshtein distance for text similarity
//! - Perceptual hashing for image similarity
//! - Grouping of similar files based on configurable thresholds

use std::path::Path;
use anyhow::{Result, Context};
use image::{GenericImageView, DynamicImage, imageops::FilterType, Pixel};
use std::collections::HashMap;
use std::f32;

use crate::cli::ImageHashAlgorithm;

/// Calculate the Levenshtein distance between two strings.
///
/// # Arguments
/// * `a` - First string
/// * `b` - Second string
///
/// # Returns
/// The Levenshtein distance as a `usize`
///
/// # Note
/// This implementation is not optimized for very large strings.
pub fn levenshtein(a: &str, b: &str) -> usize {
    let mut costs = vec![0; b.len() + 1];
    for j in 0..=b.len() {
        costs[j] = j;
    }
    for (i, ca) in a.chars().enumerate() {
        let mut last = i;
        costs[0] = i + 1;
        for (j, cb) in b.chars().enumerate() {
            let old = costs[j + 1];
            costs[j + 1] = if ca == cb {
                last
            } else {
                1 + last.min(costs[j]).min(costs[j + 1])
            };
            last = old;
        }
    }
    costs[b.len()]
}

/// Compare two text files and return their Levenshtein distance.
///
/// # Arguments
/// * `path1` - Path to the first text file
/// * `path2` - Path to the second text file
///
/// # Returns
/// * `Result<usize>` - The Levenshtein distance between the two files
///
/// # Note
/// This function loads both files entirely into memory, so it's not suitable for very large files.
pub fn compare_text_files(path1: &Path, path2: &Path) -> Result<usize> {
    let text1 = std::fs::read_to_string(path1)?;
    let text2 = std::fs::read_to_string(path2)?;
    Ok(levenshtein(&text1, &text2))
}

/// Calculate a normalized similarity score between two text files.
///
/// # Arguments
/// * `path1` - Path to the first text file
/// * `path2` - Path to the second text file
///
/// # Returns
/// * `Result<f32>` - A value between 0.0 (completely different) and 1.0 (identical)
///
/// # Note
/// This is based on the normalized Levenshtein distance between the file contents.
pub fn text_similarity(path1: &Path, path2: &Path) -> Result<f32> {
    let distance = compare_text_files(path1, path2)? as f32;
    let max_len = std::cmp::max(
        std::fs::read_to_string(path1)?.len(),
        std::fs::read_to_string(path2)?.len(),
    ) as f32;
    
    if max_len == 0.0 {
        return Ok(1.0); // Both files are empty
    }
    
    Ok(1.0 - (distance / max_len).min(1.0))
}

/// Group text files that are similar to each other based on a threshold.
///
/// # Arguments
/// * `files` - A slice of file paths to analyze
/// * `similarity_threshold` - The minimum similarity score (0.0 to 1.0) for files to be considered similar
///
/// # Returns
/// * `Result<Vec<Vec<String>>>` - A vector of groups, where each group contains paths of similar files
///
/// # Note
/// This function uses a simple grouping algorithm that compares each file to others in its size bucket.
pub fn group_similar_text_files(
    files: &[String],
    similarity_threshold: f32,
) -> Result<Vec<Vec<String>>> {
    if files.is_empty() {
        return Ok(Vec::new());
    }

    // First pass: group identical files by size and hash
    let mut size_groups: HashMap<u64, Vec<String>> = HashMap::new();
    for file in files {
        if let Ok(metadata) = std::fs::metadata(file) {
            size_groups.entry(metadata.len()).or_default().push(file.clone());
        }
    }

    let mut groups: Vec<Vec<String>> = Vec::new();
    let mut processed = std::collections::HashSet::new();

    for file in files {
        if processed.contains(file) {
            continue;
        }

        let mut current_group = vec![file.clone()];
        processed.insert(file.clone());

        // Compare with other files of similar size
        if let Ok(metadata) = std::fs::metadata(file) {
            if let Some(similar_sized) = size_groups.get(&metadata.len()) {
                for other_file in similar_sized {
                    if processed.contains(other_file) || file == other_file {
                        continue;
                    }

                    if let Ok(similarity) = text_similarity(
                        Path::new(file),
                        Path::new(other_file),
                    ) {
                        if similarity >= similarity_threshold {
                            current_group.push(other_file.clone());
                            processed.insert(other_file.clone());
                        }
                    }
                }
            }
        }

        if current_group.len() > 1 {
            groups.push(current_group);
        }
    }

    Ok(groups)
}

/// Represents a perceptual hash signature for image comparison.
/// 
/// This signature contains multiple hash values computed using different algorithms
/// to improve the accuracy of image similarity detection.
#[derive(Debug, Clone)]
pub struct ImageSignature {
    pub avg_hash: u64,
    pub phash: u64,
    pub dhash: u64,
    pub color_hash: u64,
}

/// Generate a comprehensive perceptual hash signature for an image.
///
/// This function computes multiple hash values using different algorithms
/// to create a robust signature for image comparison.
///
/// # Arguments
/// * `path` - Path to the image file
///
/// # Returns
/// * `Result<ImageSignature>` - A structure containing multiple hash values
///
/// # Errors
/// Returns an error if the image cannot be loaded or processed.
pub fn generate_image_signature(path: &Path) -> Result<ImageSignature> {
    let img = image::open(path)
        .with_context(|| format!("Failed to open image: {}", path.display()))?;
    
    // Convert to grayscale for most hashes
    let gray_img = img.grayscale();
    
    // Generate different types of hashes in parallel using nested joins
    let ((avg_hash, phash), (dhash, color_hash)) = rayon::join(
        || {
            rayon::join(
                || average_hash(&gray_img).unwrap_or_else(|_| 0),
                || perceptual_hash(&gray_img).unwrap_or_else(|_| 0),
            )
        },
        || {
            rayon::join(
                || difference_hash(&gray_img).unwrap_or_else(|_| 0),
                || color_hash(&img).unwrap_or_else(|_| 0),
            )
        },
    );
    
    Ok(ImageSignature {
        avg_hash,
        phash,
        dhash,
        color_hash,
    })
}

/// Calculate the similarity score between two image signatures.
///
/// # Arguments
/// * `sig1` - First image signature
/// * `sig2` - Second image signature
///
/// # Returns
/// * `f32` - A value between 0.0 (completely different) and 1.0 (identical)
pub fn compare_image_signatures(sig1: &ImageSignature, sig2: &ImageSignature) -> f32 {
    let weights = [0.3, 0.4, 0.2, 0.1]; // Weighted importance of each hash
    
    let scores = [
        hamming_similarity(sig1.avg_hash, sig2.avg_hash),
        hamming_similarity(sig1.phash, sig2.phash),
        hamming_similarity(sig1.dhash, sig2.dhash),
        hamming_similarity(sig1.color_hash, sig2.color_hash),
    ];
    
    // Calculate weighted average of similarity scores
    scores.iter()
        .zip(weights.iter())
        .map(|(&score, &weight)| score * weight)
        .sum()
}

/// Calculate the average hash (aHash) of an image.
///
/// This is a simple but effective perceptual hash that works by:
/// 1. Converting the image to grayscale
/// 2. Resizing to 8x8 pixels
/// 3. Calculating the average pixel value
/// 4. Creating a 64-bit hash based on which pixels are above/below average
///
/// # Arguments
/// * `img` - Reference to a DynamicImage to hash
///
/// # Returns
/// * `Result<u64>` - A 64-bit hash value, or an error if the hashing fails
///
/// # Note
/// Fast but less accurate than other methods for some types of images.
pub fn average_hash(img: &DynamicImage) -> Result<u64> {
    let img = img.resize_exact(8, 8, FilterType::Lanczos3);
    let mut total = 0u32;
    let mut pixels = [0u8; 64];
    
    for (i, p) in img.pixels().enumerate() {
        let luma = p.2[0];
        pixels[i] = luma;
        total += luma as u32;
    }
    
    let avg = total / 64;
    let mut hash = 0u64;
    
    for (i, &luma) in pixels.iter().enumerate() {
        if luma as u32 >= avg {
            hash |= 1 << i;
        }
    }
    
    Ok(hash)
}

/// Calculate the perceptual hash (pHash) of an image using DCT.
///
/// This implementation follows the standard pHash algorithm:
/// 1. Convert to grayscale
/// 2. Resize to 32x32
/// 3. Apply DCT
/// 4. Take top-left 8x8 of DCT coefficients
/// 5. Calculate median and create hash
///
/// # Arguments
/// * `img` - Reference to a DynamicImage to hash
///
/// # Returns
/// * `Result<u64>` - A 64-bit hash value
///
/// # Note
/// More accurate than average hash but slower and more memory intensive.
pub fn perceptual_hash(img: &DynamicImage) -> Result<u64> {
    // 1. Convert to grayscale and resize to 32x32
    let img = img.resize_exact(32, 32, FilterType::Lanczos3).to_luma8();
    
    // 2. Convert to f64 and scale to [0,1]
    let mut pixels = vec![0.0; 32 * 32];
    for (i, (_, _, pixel)) in img.enumerate_pixels().enumerate() {
        pixels[i] = pixel[0] as f64 / 255.0;
    }
    
    // 3. Apply DCT
    let dct = apply_dct_2d(&pixels, 32);
    
    // 4. Take top-left 8x8 DCT coefficients (excluding the DC component)
    let mut dct_values = Vec::with_capacity(64);
    for i in 0..8 {
        for j in 0..8 {
            if i == 0 && j == 0 { continue; } // Skip DC component
            dct_values.push(dct[i * 32 + j]);
        }
    }
    
    // 5. Calculate median
    dct_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
    let median = if !dct_values.is_empty() {
        dct_values[dct_values.len() / 2]
    } else {
        return Err(anyhow::anyhow!("Failed to calculate median for perceptual hash"));
    };
    
    // 6. Create hash
    let mut hash = 0u64;
    for (i, &val) in dct_values.iter().enumerate() {
        if val > median && i < 64 {
            hash |= 1u64 << i;
        }
    }
    
    Ok(hash)
}

/// Calculate the difference hash (dHash) of an image.
///
/// This hash works by:
/// 1. Converting to grayscale
/// 2. Resizing to 9x8 pixels (one extra column for differences)
/// 3. Comparing adjacent pixels to create a 64-bit hash
///
/// # Arguments
/// * `img` - Reference to a DynamicImage to hash
///
/// # Returns
/// * `Result<u64>` - A 64-bit hash value
///
/// # Note
/// Good balance between speed and accuracy.
fn difference_hash(img: &DynamicImage) -> Result<u64> {
    let img = img.resize_exact(9, 8, FilterType::Lanczos3);
    let mut hash = 0u64;
    
    for y in 0..8 {
        for x in 0..8 {
            let left = img.get_pixel(x, y).to_luma()[0];
            let right = img.get_pixel(x + 1, y).to_luma()[0];
            
            if left > right {
                hash |= 1 << (y * 8 + x);
            }
        }
    }
    
    Ok(hash)
}

/// Calculate a color hash based on dominant colors in the image.
///
/// This hash focuses on color distribution rather than structure.
///
/// # Arguments
/// * `img` - Reference to a DynamicImage to hash
///
/// # Returns
/// * `Result<u64>` - A 64-bit hash value
///
/// # Note
/// Useful for finding images with similar color palettes.
fn color_hash(img: &DynamicImage) -> Result<u64> {
    let img = img.resize_exact(8, 8, FilterType::Lanczos3);
    let mut hash = 0u64;
    
    for (_i, (_x, _y, pixel)) in img.pixels().enumerate() {
        let r = pixel[0] as u32;
        let g = pixel[1] as u32;
        let b = pixel[2] as u32;
        
        // Simple color quantization
        let rq = (r / 64) as u8;
        let gq = (g / 64) as u8;
        let bq = (b / 64) as u8;
        
        // Combine into a single byte (2 bits per channel)
        let color_byte = (rq << 4) | (gq << 2) | bq;
        
        // Update hash
        hash = hash.wrapping_mul(31).wrapping_add(color_byte as u64);
    }
    
    Ok(hash)
}

/// Apply 2D Discrete Cosine Transform (DCT) to a matrix.
///
/// This is a helper function used by the perceptual hash algorithm.
///
/// # Arguments
/// * `matrix` - Input matrix of f64 values
/// * `size` - Size of the matrix (assumed to be square)
///
/// # Returns
/// * `Vec<f64>` - Transformed matrix
///
/// # Note
/// This is a naive implementation and may be slow for large matrices.
fn apply_dct_2d(matrix: &[f64], size: usize) -> Vec<f64> {
    let mut output = vec![0.0; size * size];
    let c1 = std::f64::consts::PI / (size as f64);
    
    for u in 0..size {
        for v in 0..size {
            let cu = if u == 0 { 1.0 / 2.0f64.sqrt() } else { 1.0 };
            let cv = if v == 0 { 1.0 / 2.0f64.sqrt() } else { 1.0 };
            
            let mut sum = 0.0;
            
            for x in 0..size {
                for y in 0..size {
                    let cos1 = (c1 * (x as f64 + 0.5) * u as f64).cos();
                    let cos2 = (c1 * (y as f64 + 0.5) * v as f64).cos();
                    sum += matrix[x * size + y] * cos1 * cos2;
                }
            }
            
            output[u * size + v] = 0.25 * cu * cv * sum;
        }
    }
    
    output
}

/// Compare two images and return a similarity score.
///
/// # Arguments
/// * `path1` - Path to the first image
/// * `path2` - Path to the second image
///
/// # Returns
/// * `Result<f32>` - A value between 0.0 (completely different) and 1.0 (identical)
///
/// # Note
/// Uses a combination of perceptual hashing for comparison.
#[allow(dead_code)]
pub fn compare_images(path1: &Path, path2: &Path) -> Result<f32> {
    let sig1 = generate_image_signature(path1)?;
    let sig2 = generate_image_signature(path2)?;
    Ok(compare_image_signatures(&sig1, &sig2))
}

/// Calculate the Hamming distance between two 64-bit hashes.
///
/// # Arguments
/// * `a` - First hash value
/// * `b` - Second hash value
///
/// # Returns
/// * `u32` - Number of bits that differ between the two hashes
pub fn hamming_distance(a: u64, b: u64) -> u32 {
    (a ^ b).count_ones()
}

/// Calculate a normalized similarity score from a Hamming distance.
///
/// # Arguments
/// * `a` - First hash value
/// * `b` - Second hash value
///
/// # Returns
/// * `f32` - A value between 0.0 (completely different) and 1.0 (identical)
pub fn hamming_similarity(a: u64, b: u64) -> f32 {
    let distance = hamming_distance(a, b) as f32;
    1.0 - (distance / 64.0).min(1.0)
}

/// Compare two images using the specified hashing algorithm.
///
/// # Arguments
/// * `path1` - Path to the first image
/// * `path2` - Path to the second image
/// * `algorithm` - Hashing algorithm to use for comparison
///
/// # Returns
/// * `Option<f32>` - Similarity score between 0.0 and 1.0, or None if comparison fails
///
/// # Note
/// Different algorithms may be better suited for different types of images.
pub fn compare_images_with_algorithm(
    path1: &str,
    path2: &str,
    algorithm: ImageHashAlgorithm,
) -> Option<f32> {
    let img1 = match image::open(path1) {
        Ok(img) => img,
        Err(_) => return None,
    };
    
    let img2 = match image::open(path2) {
        Ok(img) => img,
        Err(_) => return None,
    };
    
    match algorithm {
        ImageHashAlgorithm::Avg => {
            let hash1 = average_hash(&img1).ok()?;
            let hash2 = average_hash(&img2).ok()?;
            Some(hamming_similarity(hash1, hash2))
        }
        ImageHashAlgorithm::Phash => {
            let hash1 = perceptual_hash(&img1).ok()?;
            let hash2 = perceptual_hash(&img2).ok()?;
            Some(hamming_similarity(hash1, hash2))
        }
        ImageHashAlgorithm::Dhash => {
            let hash1 = difference_hash(&img1).ok()?;
            let hash2 = difference_hash(&img2).ok()?;
            Some(hamming_similarity(hash1, hash2))
        }
        ImageHashAlgorithm::Color => {
            let hash1 = color_hash(&img1).ok()?;
            let hash2 = color_hash(&img2).ok()?;
            Some(hamming_similarity(hash1, hash2))
        }
        ImageHashAlgorithm::Combined => {
            let sig1 = generate_image_signature(Path::new(path1)).ok()?;
            let sig2 = generate_image_signature(Path::new(path2)).ok()?;
            Some(compare_image_signatures(&sig1, &sig2))
        }
    }
}



#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Write;
    use tempfile::NamedTempFile;
    
    fn create_test_file(content: &str) -> NamedTempFile {
        let mut file = NamedTempFile::new().unwrap();
        file.write_all(content.as_bytes()).unwrap();
        file
    }
    
    #[test]
    fn test_text_similarity_identical() {
        let file1 = create_test_file("This is a test file with some content.");
        let file2 = create_test_file("This is a test file with some content.");
        
        let similarity = text_similarity(&file1.path(), &file2.path()).unwrap();
        assert!((similarity - 1.0).abs() < f32::EPSILON);
    }
    
    #[test]
    fn test_text_similarity_different() {
        let file1 = create_test_file("This is a test file with some content.");
        let file2 = create_test_file("This is a completely different file with different content.");
        
        let similarity = text_similarity(&file1.path(), &file2.path()).unwrap();
        assert!(similarity < 0.5);
    }
    
    #[test]
    fn test_text_similarity_empty() {
        let file1 = create_test_file("");
        let file2 = create_test_file("");
        
        let similarity = text_similarity(&file1.path(), &file2.path()).unwrap();
        assert!((similarity - 1.0).abs() < f32::EPSILON);
    }
    
    #[test]
    fn test_group_similar_text_files() {
        // Create test files with varying similarity
        let file1 = create_test_file("This is a test file with some content.");
        let file2 = create_test_file("This is a test file with some content."); // Identical to file1
        let file3 = create_test_file("This is a test file with slightly different content."); // Similar to file1
        let file4 = create_test_file("This is completely different content."); // Different
        
        let files = vec![
            file1.path().to_str().unwrap().to_string(),
            file2.path().to_str().unwrap().to_string(),
            file3.path().to_str().unwrap().to_string(),
            file4.path().to_str().unwrap().to_string(),
        ];
        
        // High threshold - only identical files should be grouped
        let groups = group_similar_text_files(&files, 0.9).unwrap();
        assert_eq!(groups.len(), 1);
        assert_eq!(groups[0].len(), 2); // file1 and file2 should be grouped
        
        // Medium threshold - similar files should be grouped
        let groups = group_similar_text_files(&files, 0.7).unwrap();
        assert_eq!(groups.len(), 1);
        assert!(groups[0].len() >= 3); // file1, file2, and file3 should be grouped
        
        // Low threshold - all files might be grouped
        let groups = group_similar_text_files(&files, 0.3).unwrap();
        assert!(!groups.is_empty());
    }
    
    #[test]
    fn test_levenshtein_distance() {
        assert_eq!(levenshtein("kitten", "sitting"), 3);
        assert_eq!(levenshtein("book", "back"), 2);
        assert_eq!(levenshtein("", "test"), 4);
        assert_eq!(levenshtein("test", ""), 4);
        assert_eq!(levenshtein("", ""), 0);
    }
    #[test]
    fn test_levenshtein() {
        assert_eq!(levenshtein("kitten", "sitting"), 3);
        assert_eq!(levenshtein("flaw", "lawn"), 2);
        assert_eq!(levenshtein("", ""), 0);
        assert_eq!(levenshtein("a", "a"), 0);
        assert_eq!(levenshtein("abc", ""), 3);
        assert_eq!(levenshtein("", "abc"), 3);
    }
    
    #[test]
    fn test_hamming_distance() {
        assert_eq!(hamming_distance(0b1010, 0b1111), 2);
        assert_eq!(hamming_distance(0, 0), 0);
        assert_eq!(hamming_distance(u64::MAX, 0), 64);
    }
    
    #[test]
    fn test_hamming_similarity() {
        assert!((hamming_similarity(0b1010, 0b1111) - 0.96875).abs() < f32::EPSILON);
        assert!((hamming_similarity(0, 0) - 1.0).abs() < f32::EPSILON);
        assert!((hamming_similarity(u64::MAX, 0) - 0.0).abs() < f32::EPSILON);
    }
    
    #[test]
    fn test_compare_images_same() -> Result<()> {
        // Create a test image
        let mut img = image::GrayImage::new(100, 100);
        for (x, y, pixel) in img.enumerate_pixels_mut() {
            *pixel = image::Luma([(x + y) as u8]);
        }
        
        // Save to temp files
        let mut file1 = NamedTempFile::new()?;
        let path1 = file1.path().to_owned();
        img.save(&path1)?;
        
        // Compare with itself
        let similarity = compare_images(&path1, &path1)?;
        assert!((similarity - 1.0).abs() < f32::EPSILON, "Image should be identical to itself");
        
        Ok(())
    }
    
    #[test]
    fn test_compare_images_different() -> Result<()> {
        // Create two different test images
        let mut img1 = image::GrayImage::new(100, 100);
        let mut img2 = image::GrayImage::new(100, 100);
        
        for (x, y, pixel) in img1.enumerate_pixels_mut() {
            *pixel = image::Luma([(x + y) as u8]);
        }
        
        for (x, y, pixel) in img2.enumerate_pixels_mut() {
            *pixel = image::Luma([(x * 2 + y) as u8]);
        }
        
        // Save to temp files
        let mut file1 = NamedTempFile::new()?;
        let mut file2 = NamedTempFile::new()?;
        let path1 = file1.path().to_owned();
        let path2 = file2.path().to_owned();
        
        img1.save(&path1)?;
        img2.save(&path2)?;
        
        // Compare different images
        let similarity = compare_images(&path1, &path2)?;
        assert!(similarity < 0.5, "Different images should have low similarity");
        
        Ok(())
    }
    
    #[test]
    fn test_average_hash() -> Result<()> {
        let img = image::GrayImage::from_pixel(8, 8, image::Luma([128u8]));
        let hash = average_hash(&DynamicImage::ImageLuma8(img))?;
        // All pixels are the same, so hash should be 0 or u64::MAX
        assert!(hash == 0 || hash == u64::MAX);
        Ok(())
    }
    
    #[test]
    fn test_perceptual_hash() -> Result<()> {
        let img = image::GrayImage::from_pixel(32, 32, image::Luma([128u8]));
        let hash = perceptual_hash(&DynamicImage::ImageLuma8(img))?;
        // For a uniform image, the hash should be consistent
        assert_ne!(hash, 0);
        assert_ne!(hash, u64::MAX);
        
        // Test that different images produce different hashes
        let mut img2 = image::GrayImage::new(32, 32);
        for (x, y, pixel) in img2.enumerate_pixels_mut() {
            *pixel = image::Luma([(x + y) as u8]);
        }
        let hash2 = perceptual_hash(&DynamicImage::ImageLuma8(img2))?;
        assert_ne!(hash, hash2);
        
        Ok(())
    }
    
    #[test]
    fn test_difference_hash() -> Result<()> {
        let img = image::GrayImage::from_pixel(9, 8, image::Luma([128u8]));
        let hash = difference_hash(&DynamicImage::ImageLuma8(img))?;
        
        // For a uniform image, the difference hash should be all 0s or all 1s
        assert!(hash == 0 || hash == u64::MAX);
        
        // Test with a gradient image
        let mut img2 = image::GrayImage::new(9, 8);
        for (x, y, pixel) in img2.enumerate_pixels_mut() {
            *pixel = image::Luma([(x + y) as u8]);
        }
        let hash2 = difference_hash(&DynamicImage::ImageLuma8(img2))?;
        
        // Should be different from the uniform hash
        let uniform_hash = difference_hash(&DynamicImage::ImageLuma8(
            image::GrayImage::from_pixel(9, 8, image::Luma([128u8]))
        ))?;
        assert_ne!(hash2, uniform_hash);
        
        Ok(())
    }
    
    #[test]
    fn test_color_hash() -> Result<()> {
        // Test with a red image
        let img = image::RgbImage::from_pixel(8, 8, image::Rgb([255, 0, 0]));
        let hash = color_hash(&DynamicImage::ImageRgb8(img))?;
        
        // Test with a different color
        let img2 = image::RgbImage::from_pixel(8, 8, image::Rgb([0, 255, 0]));
        let hash2 = color_hash(&DynamicImage::ImageRgb8(img2))?;
        
        // Different colors should produce different hashes
        assert_ne!(hash, hash2);
        
        // Same color should produce same hash
        let img3 = image::RgbImage::from_pixel(8, 8, image::Rgb([255, 0, 0]));
        let hash3 = color_hash(&DynamicImage::ImageRgb8(img3))?;
        assert_eq!(hash, hash3);
        
        Ok(())
    }
    
    #[test]
    fn test_image_signature() -> Result<()> {
        // Create a test image
        let mut img = image::RgbImage::new(32, 32);
        for (x, y, pixel) in img.enumerate_pixels_mut() {
            *pixel = image::Rgb([(x + y) as u8, x as u8, y as u8]);
        }
        
        // Save to temp file
        let mut file = NamedTempFile::new()?;
        let path = file.path().to_owned();
        DynamicImage::ImageRgb8(img).save(&path)?;
        
        // Generate signature
        let sig = generate_image_signature(&path)?;
        
        // Test that the same image produces the same signature
        let sig2 = generate_image_signature(&path)?;
        assert_eq!(sig.avg_hash, sig2.avg_hash);
        assert_eq!(sig.phash, sig2.phash);
        assert_eq!(sig.dhash, sig2.dhash);
        assert_eq!(sig.color_hash, sig2.color_hash);
        
        // Test comparison
        let similarity = compare_image_signatures(&sig, &sig2);
        assert!((similarity - 1.0).abs() < f32::EPSILON);
        
        // Test with a different image
        let mut img2 = image::RgbImage::new(32, 32);
        for (x, y, pixel) in img2.enumerate_pixels_mut() {
            *pixel = image::Rgb([(x * 2 + y) as u8, (y * 2) as u8, x as u8]);
        }
        let mut file2 = NamedTempFile::new()?;
        let path2 = file2.path().to_owned();
        DynamicImage::ImageRgb8(img2).save(&path2)?;
        
        let sig3 = generate_image_signature(&path2)?;
        let similarity = compare_image_signatures(&sig, &sig3);
        assert!(similarity < 0.9, "Different images should have similarity < 0.9");
        
        Ok(())
    }
    
    #[test]
    fn test_apply_dct_2d() {
        // Test with a simple 4x4 matrix
        let input = vec![1.0, 2.0, 3.0, 4.0, 
                         5.0, 6.0, 7.0, 8.0,
                         9.0, 10.0, 11.0, 12.0,
                         13.0, 14.0, 15.0, 16.0];
        
        let output = apply_dct_2d(&input, 4);
        
        // Check basic properties of DCT
        assert_eq!(output.len(), 16);
        
        // The DC coefficient (first element) should be the average of all inputs
        let avg = input.iter().sum::<f64>() / 16.0;
        assert!((output[0] - avg * 4.0).abs() < 1e-10);
        
        // Test that applying DCT to a constant input gives all zeros except DC
        let const_input = vec![1.0; 16];
        let const_output = apply_dct_2d(&const_input, 4);
        assert!((const_output[0] - 16.0).abs() < 1e-10);
        for &coeff in &const_output[1..] {
            assert!(coeff.abs() < 1e-10, "AC coefficient should be zero for constant input");
        }
    }
}