Skip to main content

clineup/path/
duplicates_finder.rs

1use crate::errors::ClineupError;
2use sha2::{Digest, Sha256};
3use std::{collections::HashMap, fs::File, io::Read, path::PathBuf};
4
5/// Calculates the SHA256 hash of a given file by using a 1024 bytes buffer.
6///
7/// # Arguments
8///
9/// * `open_file` - The file to calculate the hash of.
10///
11/// # Returns
12///
13/// The SHA256 hash of the file as a hexadecimal string, or an error if the hash calculation fails.
14pub fn get_hash_of_file(mut open_file: &File) -> Result<String, ClineupError> {
15    let mut hasher = Sha256::new();
16    let mut buffer = [0u8; 1024];
17    loop {
18        match open_file.read(&mut buffer) {
19            Ok(0) => break, // End of file
20            Ok(bytes_read) => {
21                hasher.update(&buffer[..bytes_read]);
22            }
23            Err(_) => {
24                return Err(ClineupError::HashError(
25                    "Something went wrong reading buffer of file".to_string(),
26                ))
27            }
28        }
29    }
30    let result = hasher.finalize();
31    Ok(format!("{:x}", result))
32}
33
34pub struct DuplicatesFinder {
35    _duplicates: HashMap<u64, Vec<String>>,
36}
37
38impl DuplicatesFinder {
39    pub fn new() -> Self {
40        DuplicatesFinder {
41            _duplicates: HashMap::new(),
42        }
43    }
44}
45
46impl Default for DuplicatesFinder {
47    fn default() -> Self {
48        Self::new()
49    }
50}
51/// Checks if the given path is a duplicate file by detecting its
52/// precence in the hashmap.Otherwise store the hash of the file in the hashmap
53///
54/// # Arguments
55///
56/// * `path` - A `PathBuf` representing the path to the file.
57///
58/// # Returns
59///
60/// Returns `Ok(true)` if the file is a duplicate, `Ok(false)` otherwise.
61/// Returns an `Err` if there was an error while checking for duplicates.
62impl DuplicatesFinder {
63    pub fn is_duplicate(&mut self, path: &PathBuf) -> Result<bool, ClineupError> {
64        let metadata = std::fs::metadata(path)?;
65
66        if metadata.is_dir() {
67            return Ok(false);
68        }
69
70        if metadata.len() == 0 {
71            return Ok(false);
72        }
73
74        let open_file = File::open(path)?;
75        let hash_of_file = get_hash_of_file(&open_file)?;
76
77        if let std::collections::hash_map::Entry::Vacant(e) = self._duplicates.entry(metadata.len())
78        {
79            e.insert(vec![hash_of_file]);
80            return Ok(false);
81        }
82
83        if let Some(duplicates) = self._duplicates.get_mut(&metadata.len()) {
84            if !duplicates.contains(&hash_of_file) {
85                duplicates.push(hash_of_file);
86                return Ok(false);
87            }
88        } else {
89            return Ok(false);
90        }
91
92        Ok(true)
93    }
94}