use crate::core::error::ScanError;
use crate::core::input::FileInput;
use crate::core::types::FileHash;
use std::io::Read;
use std::path::Path;
#[derive(Debug, Clone, Default)]
pub struct FileHasher {
compute_sha256: bool,
compute_md5: bool,
}
impl FileHasher {
pub fn new() -> Self {
Self::default()
}
pub fn with_sha256(mut self, enabled: bool) -> Self {
self.compute_sha256 = enabled;
self
}
pub fn with_md5(mut self, enabled: bool) -> Self {
self.compute_md5 = enabled;
self
}
pub fn computes_sha256(&self) -> bool {
self.compute_sha256
}
pub fn computes_md5(&self) -> bool {
self.compute_md5
}
pub fn hash_bytes(&self, data: &[u8]) -> FileHash {
let blake3 = blake3::hash(data).to_hex().to_string();
let sha256 = if self.compute_sha256 {
Some(compute_sha256_sync(data))
} else {
None
};
let md5 = if self.compute_md5 {
Some(compute_md5_sync(data))
} else {
None
};
FileHash { blake3, sha256, md5 }
}
pub fn hash_file(&self, path: &Path) -> Result<FileHash, ScanError> {
let file = std::fs::File::open(path).map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
ScanError::FileNotFound {
path: path.display().to_string(),
}
} else {
ScanError::Io(e)
}
})?;
let mut reader = std::io::BufReader::new(file);
self.hash_reader(&mut reader)
}
pub fn hash_reader<R: Read>(&self, reader: &mut R) -> Result<FileHash, ScanError> {
let mut blake3_hasher = blake3::Hasher::new();
let mut sha256_hasher: Option<Sha256State> = if self.compute_sha256 {
Some(Sha256State::new())
} else {
None
};
let mut md5_hasher: Option<Md5State> = if self.compute_md5 {
Some(Md5State::new())
} else {
None
};
let mut buffer = [0u8; 64 * 1024]; loop {
let bytes_read = reader.read(&mut buffer)?;
if bytes_read == 0 {
break;
}
let chunk = &buffer[..bytes_read];
blake3_hasher.update(chunk);
if let Some(ref mut h) = sha256_hasher {
h.update(chunk);
}
if let Some(ref mut h) = md5_hasher {
h.update(chunk);
}
}
let blake3 = blake3_hasher.finalize().to_hex().to_string();
let sha256 = sha256_hasher.map(|h| h.finalize());
let md5 = md5_hasher.map(|h| h.finalize());
Ok(FileHash { blake3, sha256, md5 })
}
pub fn hash_input(&self, input: &FileInput) -> Result<FileHash, ScanError> {
match input {
FileInput::Path(path) => self.hash_file(path),
FileInput::Bytes { data, .. } => Ok(self.hash_bytes(data)),
FileInput::Stream { .. } => Err(ScanError::internal(
"Cannot hash stream synchronously; use hash_input_async",
)),
}
}
}
struct Sha256State {
buffer: Vec<u8>,
}
impl Sha256State {
fn new() -> Self {
Self {
buffer: Vec::new(),
}
}
fn update(&mut self, data: &[u8]) {
self.buffer.extend_from_slice(data);
}
fn finalize(self) -> String {
compute_sha256_sync(&self.buffer)
}
}
struct Md5State {
buffer: Vec<u8>,
}
impl Md5State {
fn new() -> Self {
Self { buffer: Vec::new() }
}
fn update(&mut self, data: &[u8]) {
self.buffer.extend_from_slice(data);
}
fn finalize(self) -> String {
compute_md5_sync(&self.buffer)
}
}
fn compute_sha256_sync(data: &[u8]) -> String {
#[cfg(feature = "sha2")]
{
use sha2::{Sha256, Digest};
let mut hasher = Sha256::new();
hasher.update(data);
format!("{:x}", hasher.finalize())
}
#[cfg(not(feature = "sha2"))]
{
let b3 = blake3::hash(data);
format!("sha256-compat-{}", &b3.to_hex()[..32])
}
}
fn compute_md5_sync(data: &[u8]) -> String {
#[cfg(feature = "md-5")]
{
use md5::{Md5, Digest};
let mut hasher = Md5::new();
hasher.update(data);
format!("{:x}", hasher.finalize())
}
#[cfg(not(feature = "md-5"))]
{
let b3 = blake3::hash(data);
format!("md5-compat-{}", &b3.to_hex()[..16])
}
}
#[cfg(feature = "tokio-runtime")]
pub mod async_hasher {
use super::*;
impl FileHasher {
pub async fn hash_file_async(&self, path: &Path) -> Result<FileHash, ScanError> {
let data = tokio::fs::read(path).await.map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
ScanError::FileNotFound {
path: path.display().to_string(),
}
} else {
ScanError::Io(e)
}
})?;
Ok(self.hash_bytes(&data))
}
pub async fn hash_input_async(&self, input: &FileInput) -> Result<FileHash, ScanError> {
match input {
FileInput::Path(path) => self.hash_file_async(path).await,
FileInput::Bytes { data, .. } => Ok(self.hash_bytes(data)),
FileInput::Stream { reader, .. } => {
let mut guard = reader.lock().await;
let mut data = Vec::new();
let mut buf = [0u8; 64 * 1024];
loop {
let n = futures::AsyncReadExt::read(guard.as_mut(), &mut buf)
.await
.map_err(ScanError::Io)?;
if n == 0 {
break;
}
data.extend_from_slice(&buf[..n]);
}
Ok(self.hash_bytes(&data))
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_hash_bytes_blake3_only() {
let hasher = FileHasher::new();
let hash = hasher.hash_bytes(b"hello world");
assert!(!hash.blake3.is_empty());
assert_eq!(hash.sha256, None);
assert_eq!(hash.md5, None);
}
#[test]
fn test_hash_bytes_with_sha256() {
let hasher = FileHasher::new().with_sha256(true);
let hash = hasher.hash_bytes(b"hello world");
assert!(!hash.blake3.is_empty());
assert!(hash.sha256.is_some());
assert_eq!(hash.md5, None);
}
#[test]
fn test_hash_bytes_all() {
let hasher = FileHasher::new().with_sha256(true).with_md5(true);
let hash = hasher.hash_bytes(b"hello world");
assert!(!hash.blake3.is_empty());
assert!(hash.sha256.is_some());
assert!(hash.md5.is_some());
}
#[test]
fn test_hash_deterministic() {
let hasher = FileHasher::new();
let data = b"test data for hashing";
let hash1 = hasher.hash_bytes(data);
let hash2 = hasher.hash_bytes(data);
assert_eq!(hash1, hash2);
}
#[test]
fn test_hash_different_data() {
let hasher = FileHasher::new();
let hash1 = hasher.hash_bytes(b"data1");
let hash2 = hasher.hash_bytes(b"data2");
assert_ne!(hash1.blake3, hash2.blake3);
}
}