1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
use std::io::prelude::*;
use std::hash::{Hash, Hasher};
use std::collections::hash_map::DefaultHasher;
use std::collections::HashMap;
use std::fs::{File, read_dir};
use std::io::Result;
use std::path::PathBuf;


fn get_file_data(path: String) -> Result<Vec<u8>> {
    let mut file = File::open(path)?;
    let mut data = Vec::new();
    file.read_to_end(&mut data)?;
    Ok(data)
}

pub fn get_hash(file_data: Vec<u8>) -> u64 {
    let mut hasher = DefaultHasher::new();

    file_data.hash(&mut hasher);

    hasher.finish()
}

pub fn get_file_paths(dir: String) -> Result<Vec<PathBuf>> {
    let dir_list = read_dir(dir)?;
    let mut files_paths = vec![];

    for item in dir_list {
        let i = item?.path();

        if *&i.is_file() {
            files_paths.push(i);
        } else if *&i.is_dir() {
            //wow! So ugly, such bad, much shit_code
            let mut dirs_paths = get_file_paths(i.to_str().unwrap().to_string()).unwrap();
            
            files_paths.append(&mut dirs_paths);
        }
    }

    Ok(files_paths)
}

pub fn print_duplicats(duplicates: Vec<String>) {
    if duplicates.len() == 0 {
        println!("Duplicates not founded");
        return ();
    }

    println!("Duplicates ({}): ", duplicates.len());

    for path in duplicates {
        println!("{}", path);
    }
}

pub fn find_duplicates(files: Vec<PathBuf>) -> Vec<String> {
    let mut store = HashMap::new();
    let mut duplicates: Vec<String> = vec![];

    for file in &files {
        let file_path = file.to_str().unwrap();
        let file_content = get_file_data(file_path.to_string()).unwrap();
        let hash = get_hash(file_content);

        if !store.contains_key(&hash) {
            store.insert(hash, file_path);
        } else {
            duplicates.push(file_path.to_string());
        }
    }

    duplicates
}


#[cfg(test)]
mod tests {
    use std::io::prelude::*;
    use std::fs::{
        File,
        create_dir,
        remove_dir_all
    };
    use super::{
        get_file_data,
        get_hash
    };


    fn create_file(name: &'static str, text: &'static [u8; 9])  {
        let mut f = File::create(name.to_string()).unwrap();
        f.write_all(text).unwrap();
    }

    #[test]
    fn test_get_file_data() {
        create_dir("test-dir").unwrap();
        create_file("test-dir/test.txt", b"some text");

        let content = get_file_data("test-dir/test.txt".to_string()).unwrap();

        assert_eq!(content, "some text".to_string().into_bytes());
        remove_dir_all("test-dir").unwrap();
    }

    #[test]
    fn test_get_hash() {
        create_dir("test-dir2").unwrap();
        create_file("test-dir2/test2.txt", b"some text");

        let content = get_file_data("test-dir2/test2.txt".to_string()).unwrap();
        let hash = get_hash(content);

        assert_eq!(hash, 17575663810583844296);
        remove_dir_all("test-dir2").unwrap();
    }
}