filesystem_hashing/
hasher.rs

1use crate::snapshot::FileMetadata;
2use anyhow::{anyhow, Error};
3use serde::{Deserialize, Serialize};
4use sha3::{Digest, Sha3_256};
5use std::collections::HashMap;
6use std::fs::File;
7use std::io::{Read, Write};
8use std::os::unix::fs::MetadataExt;
9use std::path::Path;
10use std::sync::MutexGuard;
11use std::{env, fs};
12
13#[derive(Clone, Copy, Debug, Deserialize, Serialize)]
14pub enum HashType {
15    MD5,
16    SHA3,
17    BLAKE3,
18}
19
20pub struct HashResult {
21    pub check_sum: Vec<u8>,
22    pub size: u64,
23    pub ino: u64,
24    pub ctime: i64,
25    pub mtime: i64,
26}
27#[allow(unused)]
28pub fn hash_file(
29    path: &Path,
30    file_hashes: &mut MutexGuard<HashMap<String, FileMetadata>>,
31    hash_type: HashType,
32    verbose: bool,
33) -> Result<(), Error> {
34    let mut full_path = String::new();
35    if path.starts_with("./") {
36        if let Ok(cwd) = env::current_dir() {
37            match cwd.to_str() {
38                None => return Err(anyhow!("cannot parse path")),
39                Some(c) => full_path.push_str(c),
40            }
41            full_path.push('/');
42
43            match path.to_str() {
44                None => return Err(anyhow!("cannot parse path")),
45                Some(p) => match p.split("./").last() {
46                    None => return Err(anyhow!("cannot parse path")),
47                    Some(p) => full_path.push_str(p),
48                },
49            }
50        }
51    } else {
52        match path.to_str() {
53            None => return Err(anyhow!("cannot parse path")),
54            Some(p) => {
55                full_path.push_str(p);
56            }
57        }
58    }
59
60    let black_list: Vec<&str> = vec![];
61
62    for entry in black_list {
63        if full_path.starts_with(entry) {
64            return Err(anyhow!("cannot parse path"));
65        }
66    }
67
68    let mut size = 0u64;
69    let mut ino = 0u64;
70    let mut ctime = 0i64;
71    let mut mtime = 0i64;
72
73    if let Ok(metadata) = fs::metadata(full_path) {
74        size = metadata.size();
75        ctime = metadata.ctime();
76        mtime = metadata.mtime();
77        ino = metadata.ino();
78    }
79
80    let mut file_hash: Vec<u8> = Vec::new();
81    let mut file_buffer: Vec<u8> = Vec::new();
82
83    if let Some(p) =path.to_str() {
84        if verbose {
85            println!("{}", p)
86        }
87    }
88    
89    let byte_hash: Result<Vec<u8>, Error> = match hash_type {
90        HashType::MD5 => hash_md5(path),
91        HashType::SHA3 => hash_sha3(path),
92        HashType::BLAKE3 => hash_blake3(path),
93    };
94
95    match path.to_str() {
96        None => return Err(anyhow!("cannot parse path")),
97        Some(p) => {
98            file_hashes.insert(
99                p.to_string(),
100                FileMetadata {
101                    path: p.to_string(),
102                    check_sum: byte_hash?,
103                    size,
104                    ino,
105                    ctime,
106                    mtime,
107                },
108            );
109        }
110    }
111
112    drop(file_hash);
113    Ok(())
114}
115
116fn hash_sha3(bytes: &Path) -> Result<Vec<u8>, Error> {
117    let mut hasher = Sha3_256::new();
118    if let Ok(mut f) = File::open(bytes) {
119        let chunk_size = 0x4000;
120        if let Ok(meta) = f.metadata() {
121            if meta.is_file() {
122                loop {
123                    let mut chunk = Vec::with_capacity(chunk_size);
124                    let n = std::io::Read::by_ref(&mut f)
125                        .take(chunk_size as u64)
126                        .read_to_end(&mut chunk)?;
127                    if n == 0 {
128                        break;
129                    }
130                    sha3::digest::Update::update(&mut hasher, chunk.by_ref());
131                    if n < chunk_size {
132                        break;
133                    }
134                }
135            }
136        }
137    }
138    Ok(hasher.finalize().to_vec())
139}
140
141fn hash_md5(bytes: &Path) -> Result<Vec<u8>, Error> {
142    let mut hasher = md5::Context::new();
143    if let Ok(mut f) = File::open(bytes) {
144        let chunk_size = 0x4000;
145        if let Ok(meta) = f.metadata() {
146            if meta.is_file() {
147                loop {
148                    let mut chunk = Vec::with_capacity(chunk_size);
149                    let n = std::io::Read::by_ref(&mut f)
150                        .take(chunk_size as u64)
151                        .read_to_end(&mut chunk)?;
152                    if n == 0 {
153                        break;
154                    }
155                    hasher.consume(chunk);
156                    if n < chunk_size {
157                        break;
158                    }
159                }
160            }
161        }
162    }
163    Ok(hasher.compute().0.to_vec())
164}
165
166fn hash_blake3(bytes: &Path) -> Result<Vec<u8>, Error> {
167    let mut hasher = blake3::Hasher::new();
168    if let Ok(mut f) = File::open(bytes) {
169        let chunk_size = 0x4000;
170        if let Ok(meta) = f.metadata() {
171            if meta.is_file() {
172                loop {
173                    let mut chunk = Vec::with_capacity(chunk_size);
174                    let n = std::io::Read::by_ref(&mut f)
175                        .take(chunk_size as u64)
176                        .read_to_end(&mut chunk)?;
177                    if n == 0 {
178                        break;
179                    }
180                    hasher.update(chunk.as_ref());
181                    if n < chunk_size {
182                        break;
183                    }
184                }
185            }
186        }
187    }
188    Ok(hasher.finalize().as_bytes().to_vec())
189}
190
191#[cfg(test)]
192mod tests {
193    use sha3::Digest;
194
195    #[test]
196    fn blake3() {
197        let test_string = "aprettylongteststring".as_bytes();
198        let hashed = blake3::hash(test_string).as_bytes().to_vec();
199        assert_eq!(
200            hashed,
201            [
202                0xFD, 0x5F, 0x22, 0xE8, 0x95, 0x82, 0x18, 0xD6, 0x9A, 0x96, 0xAC, 0x77, 0xCD, 0xCD,
203                0xAA, 0xA7, 0x51, 0xCE, 0x81, 0xF3, 0x04, 0x86, 0xC8, 0x49, 0xA6, 0xD7, 0x66, 0x81,
204                0x68, 0xDB, 0x22, 0x2D,
205            ]
206        )
207    }
208
209    #[test]
210    fn md5() {
211        let test_string = "adifferentbutstillprettylongteststring".as_bytes();
212        let hashed = md5::compute(test_string).to_vec();
213        // println!("{:#04X?}", hashed);
214        assert_eq!(
215            hashed,
216            [
217                0x6C, 0x39, 0x5D, 0xC4, 0xC5, 0x81, 0xAE, 0x7A, 0x55, 0x74, 0xC4, 0x5B, 0xE3, 0xFB,
218                0x92, 0x58
219            ]
220        )
221    }
222
223    #[test]
224    fn sha3() {
225        let test_string =
226            "adifferentbutstillprettylongteststringwithaslightlydifferentcontent".as_bytes();
227        let hashed = sha3::Sha3_256::digest(test_string).to_vec();
228        println!("{:#04X?}", hashed);
229        assert_eq!(
230            hashed,
231            [
232                0xA1, 0x55, 0xE2, 0x73, 0x63, 0x51, 0x36, 0xC5, 0x25, 0xFB, 0x36, 0xA8, 0x81, 0xD6,
233                0x02, 0x21, 0xCC, 0xC5, 0x48, 0x9B, 0xE7, 0x18, 0xCC, 0x57, 0xCE, 0x66, 0xBA, 0x78,
234                0xBA, 0x26, 0x33, 0x7E,
235            ]
236        )
237    }
238}