use std::fs::File;
use std::io::{BufReader, ErrorKind, Read};
use std::path::Path;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use super::HashError;
pub const PREHASH_SIZE: usize = 4 * 1024;
const BUFFER_SIZE: usize = 64 * 1024;
pub type Hash = [u8; 32];
#[derive(Debug, Clone)]
pub struct Hasher {
prehash_size: usize,
shutdown_flag: Option<Arc<AtomicBool>>,
}
impl Default for Hasher {
fn default() -> Self {
Self::new()
}
}
impl Hasher {
#[must_use]
pub fn new() -> Self {
Self {
prehash_size: PREHASH_SIZE,
shutdown_flag: None,
}
}
#[must_use]
pub fn with_prehash_size(prehash_size: usize) -> Self {
assert!(prehash_size > 0, "prehash_size must be greater than 0");
Self {
prehash_size,
shutdown_flag: None,
}
}
#[must_use]
pub fn with_shutdown_flag(mut self, flag: Arc<AtomicBool>) -> Self {
self.shutdown_flag = Some(flag);
self
}
fn is_shutdown_requested(&self) -> bool {
self.shutdown_flag
.as_ref()
.is_some_and(|f| f.load(Ordering::SeqCst))
}
pub fn prehash(&self, path: &Path) -> Result<Hash, HashError> {
self.hash_bytes(path, Some(self.prehash_size))
}
pub fn full_hash(&self, path: &Path) -> Result<Hash, HashError> {
self.hash_bytes(path, None)
}
fn hash_bytes(&self, path: &Path, max_bytes: Option<usize>) -> Result<Hash, HashError> {
let file = File::open(path).map_err(|e| self.map_io_error(path, e))?;
let mut reader = BufReader::with_capacity(BUFFER_SIZE, file);
let mut hasher = blake3::Hasher::new();
let mut buffer = vec![0u8; BUFFER_SIZE];
let mut total_read: u64 = 0;
let limit = max_bytes.map(|b| b as u64);
loop {
if self.is_shutdown_requested() {
log::debug!("Hash operation interrupted for: {}", path.display());
return Err(HashError::Io {
path: path.to_path_buf(),
source: std::io::Error::new(ErrorKind::Interrupted, "Operation interrupted"),
});
}
let bytes_to_read = if let Some(max) = limit {
let remaining = max.saturating_sub(total_read);
if remaining == 0 {
break;
}
buffer.len().min(remaining as usize)
} else {
buffer.len()
};
let bytes_read = reader
.read(&mut buffer[..bytes_to_read])
.map_err(|e| self.map_io_error(path, e))?;
if bytes_read == 0 {
break; }
hasher.update(&buffer[..bytes_read]);
total_read += bytes_read as u64;
if let Some(max) = limit {
if total_read >= max {
break;
}
}
}
Ok(*hasher.finalize().as_bytes())
}
fn map_io_error(&self, path: &Path, error: std::io::Error) -> HashError {
match error.kind() {
ErrorKind::NotFound => {
log::debug!("File not found (TOCTOU): {}", path.display());
HashError::NotFound(path.to_path_buf())
}
ErrorKind::PermissionDenied => {
log::warn!("Permission denied: {}", path.display());
HashError::PermissionDenied(path.to_path_buf())
}
_ => {
log::warn!("I/O error for {}: {}", path.display(), error);
HashError::Io {
path: path.to_path_buf(),
source: error,
}
}
}
}
pub fn full_hash_optimized(&self, path: &Path) -> Result<Hash, HashError> {
let file = File::open(path).map_err(|e| self.map_io_error(path, e))?;
let mut reader = BufReader::with_capacity(BUFFER_SIZE, file);
let mut hasher = blake3::Hasher::new();
hasher
.update_reader(&mut reader)
.map_err(|e| self.map_io_error(path, e))?;
Ok(*hasher.finalize().as_bytes())
}
}
#[must_use]
pub fn hash_to_hex(hash: &Hash) -> String {
use std::fmt::Write;
hash.iter().fold(String::with_capacity(64), |mut acc, b| {
let _ = write!(acc, "{b:02x}");
acc
})
}
pub fn hex_to_hash(hex: &str) -> Option<Hash> {
if hex.len() != 64 {
return None;
}
let mut hash = [0u8; 32];
for (i, chunk) in hex.as_bytes().chunks(2).enumerate() {
let byte_str = std::str::from_utf8(chunk).ok()?;
hash[i] = u8::from_str_radix(byte_str, 16).ok()?;
}
Some(hash)
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs::File;
use std::io::Write;
use tempfile::TempDir;
fn create_test_file(dir: &TempDir, name: &str, content: &[u8]) -> std::path::PathBuf {
let path = dir.path().join(name);
let mut file = File::create(&path).unwrap();
file.write_all(content).unwrap();
path
}
#[test]
fn test_hasher_identical_content_same_hash() {
let dir = TempDir::new().unwrap();
let content = b"Hello, world!";
let file1 = create_test_file(&dir, "file1.txt", content);
let file2 = create_test_file(&dir, "file2.txt", content);
let hasher = Hasher::new();
let hash1 = hasher.full_hash(&file1).unwrap();
let hash2 = hasher.full_hash(&file2).unwrap();
assert_eq!(
hash1, hash2,
"Identical content should produce identical hashes"
);
}
#[test]
fn test_hasher_different_content_different_hash() {
let dir = TempDir::new().unwrap();
let file1 = create_test_file(&dir, "file1.txt", b"Hello");
let file2 = create_test_file(&dir, "file2.txt", b"World");
let hasher = Hasher::new();
let hash1 = hasher.full_hash(&file1).unwrap();
let hash2 = hasher.full_hash(&file2).unwrap();
assert_ne!(
hash1, hash2,
"Different content should produce different hashes"
);
}
#[test]
fn test_prehash_small_file_equals_full_hash() {
let dir = TempDir::new().unwrap();
let content = b"Small file content";
let file = create_test_file(&dir, "small.txt", content);
let hasher = Hasher::new();
let prehash = hasher.prehash(&file).unwrap();
let full_hash = hasher.full_hash(&file).unwrap();
assert_eq!(
prehash, full_hash,
"Prehash of small file should equal full hash"
);
}
#[test]
fn test_prehash_large_file_differs_from_full_hash() {
let dir = TempDir::new().unwrap();
let mut content = vec![0u8; 8 * 1024]; for (i, byte) in content.iter_mut().enumerate() {
*byte = (i % 256) as u8;
}
let file = create_test_file(&dir, "large.bin", &content);
let hasher = Hasher::new();
let prehash = hasher.prehash(&file).unwrap();
let full_hash = hasher.full_hash(&file).unwrap();
assert_ne!(
prehash, full_hash,
"Prehash of large file should differ from full hash"
);
}
#[test]
fn test_empty_file_hash() {
let dir = TempDir::new().unwrap();
let file = create_test_file(&dir, "empty.txt", b"");
let hasher = Hasher::new();
let hash = hasher.full_hash(&file).unwrap();
let expected_empty_hash = blake3::hash(b"");
assert_eq!(hash, *expected_empty_hash.as_bytes());
}
#[test]
fn test_hash_deterministic() {
let dir = TempDir::new().unwrap();
let content = b"Deterministic content";
let file = create_test_file(&dir, "det.txt", content);
let hasher = Hasher::new();
let hash1 = hasher.full_hash(&file).unwrap();
let hash2 = hasher.full_hash(&file).unwrap();
let hash3 = hasher.full_hash(&file).unwrap();
assert_eq!(hash1, hash2);
assert_eq!(hash2, hash3);
}
#[test]
fn test_file_not_found_error() {
let hasher = Hasher::new();
let result = hasher.full_hash(Path::new("/nonexistent/file/12345.txt"));
assert!(result.is_err());
match result.unwrap_err() {
HashError::NotFound(path) => {
assert!(path.to_string_lossy().contains("12345.txt"));
}
other => panic!("Expected NotFound error, got: {:?}", other),
}
}
#[test]
fn test_full_hash_optimized_matches_regular() {
let dir = TempDir::new().unwrap();
let content = b"Test content for optimized hash";
let file = create_test_file(&dir, "opt.txt", content);
let hasher = Hasher::new();
let regular = hasher.full_hash(&file).unwrap();
let optimized = hasher.full_hash_optimized(&file).unwrap();
assert_eq!(regular, optimized);
}
#[test]
fn test_hash_to_hex() {
let hash = [0xAB; 32];
let hex = hash_to_hex(&hash);
assert_eq!(hex.len(), 64);
assert!(hex.chars().all(|c| c.is_ascii_hexdigit()));
assert_eq!(hex, "ab".repeat(32));
}
#[test]
fn test_hex_to_hash_roundtrip() {
let original: Hash = [
0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB,
0xCD, 0xEF, 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, 0x23, 0x45, 0x67,
0x89, 0xAB, 0xCD, 0xEF,
];
let hex = hash_to_hex(&original);
let parsed = hex_to_hash(&hex).unwrap();
assert_eq!(original, parsed);
}
#[test]
fn test_hex_to_hash_invalid() {
assert!(hex_to_hash("").is_none());
assert!(hex_to_hash("abc").is_none());
assert!(hex_to_hash("gg".repeat(32).as_str()).is_none());
}
#[test]
fn test_custom_prehash_size() {
let dir = TempDir::new().unwrap();
let mut content = vec![0u8; 2048];
for (i, byte) in content.iter_mut().enumerate() {
*byte = (i % 256) as u8;
}
let file = create_test_file(&dir, "custom.bin", &content);
let hasher_1k = Hasher::with_prehash_size(1024);
let hasher_2k = Hasher::with_prehash_size(2048);
let hash_1k = hasher_1k.prehash(&file).unwrap();
let hash_2k = hasher_2k.prehash(&file).unwrap();
assert_ne!(hash_1k, hash_2k);
let full = Hasher::new().full_hash(&file).unwrap();
assert_eq!(hash_2k, full);
}
#[test]
fn test_shutdown_flag_interrupts_hash() {
let dir = TempDir::new().unwrap();
let content = vec![0u8; 256 * 1024]; let file = create_test_file(&dir, "large.bin", &content);
let shutdown = Arc::new(AtomicBool::new(true)); let hasher = Hasher::new().with_shutdown_flag(shutdown);
let result = hasher.full_hash(&file);
assert!(result.is_err());
match result.unwrap_err() {
HashError::Io { source, .. } => {
assert_eq!(source.kind(), ErrorKind::Interrupted);
}
other => panic!("Expected Io error with Interrupted, got: {:?}", other),
}
}
#[test]
fn test_hasher_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<Hasher>();
}
#[test]
#[should_panic(expected = "prehash_size must be greater than 0")]
fn test_zero_prehash_size_panics() {
let _ = Hasher::with_prehash_size(0);
}
}