use crate::error::OxenError;
use crate::model::metadata::generic_metadata::GenericMetadata;
use crate::model::{ContentHashable, NewCommit};
use crate::util;
use sha2::{Digest, Sha256};
use std::fs::File;
use std::io::BufReader;
use std::io::prelude::*;
use std::path::Path;
use xxhash_rust::xxh3::{Xxh3, xxh3_128};
pub fn hash_buffer(buffer: &[u8]) -> String {
let val = xxh3_128(buffer);
format!("{val:x}")
}
pub fn hash_str<S: AsRef<str>>(buffer: S) -> String {
let buffer = buffer.as_ref().as_bytes();
hash_buffer(buffer)
}
pub fn hash_str_sha256<S: AsRef<str>>(str: S) -> String {
let mut hasher = Sha256::new();
hasher.update(str.as_ref().as_bytes());
let result = hasher.finalize();
format!("{result:x}")
}
pub fn hash_buffer_128bit(buffer: &[u8]) -> u128 {
xxh3_128(buffer)
}
pub fn compute_commit_hash<E>(commit_data: &NewCommit, entries: &[E]) -> String
where
E: ContentHashable + std::fmt::Debug,
{
let mut commit_hasher = xxhash_rust::xxh3::Xxh3::new();
log::debug!("Hashing {} entries", entries.len());
for entry in entries.iter() {
let hash = entry.content_hash();
let input = hash.as_bytes();
commit_hasher.update(input);
}
log::debug!("Hashing commit data {commit_data:?}");
let commit_str = format!("{commit_data:?}");
commit_hasher.update(commit_str.as_bytes());
let val = commit_hasher.digest();
format!("{val:x}")
}
pub fn hash_file_contents_with_retry(path: &Path) -> Result<String, OxenError> {
let mut timeout = 1;
let mut retries = 0;
let total_retries = 5;
loop {
match hash_file_contents(path) {
Ok(hash) => return Ok(hash),
Err(err) => {
retries += 1;
timeout *= 2;
log::warn!("Error: sleeping {timeout}s failed to hash file {path:?}");
std::thread::sleep(std::time::Duration::from_secs(timeout));
if retries > total_retries {
return Err(err);
}
}
}
}
}
pub fn get_hash_given_metadata(
path: &Path,
metadata: &std::fs::Metadata,
) -> Result<u128, OxenError> {
if metadata.len() < 1_000_000_000 {
hash_small_file_contents(path)
} else {
hash_large_file_contents(path)
}
}
pub fn get_combined_hash(
oxen_metadata_hash: Option<u128>,
content_hash: u128,
) -> Result<u128, OxenError> {
match oxen_metadata_hash {
Some(oxen_metadata) => {
let mut hasher = Xxh3::new();
hasher.update(&content_hash.to_le_bytes());
hasher.update(&oxen_metadata.to_le_bytes());
Ok(hasher.digest128())
}
None => Ok(content_hash),
}
}
pub fn maybe_get_metadata_hash(
oxen_metadata: &Option<GenericMetadata>,
) -> Result<Option<u128>, OxenError> {
if let Some(metadata) = oxen_metadata {
let mut hasher = Xxh3::new();
let metadata_str = serde_json::to_string(&metadata).unwrap();
hasher.update(metadata_str.as_bytes());
Ok(Some(hasher.digest128()))
} else {
Ok(None)
}
}
pub fn get_metadata_hash(oxen_metadata: &Option<GenericMetadata>) -> Result<u128, OxenError> {
let mut hasher = Xxh3::new();
let metadata_str = serde_json::to_string(&oxen_metadata).unwrap();
hasher.update(metadata_str.as_bytes());
Ok(hasher.digest128())
}
pub fn get_hash_and_size(path: &Path) -> Result<(u128, u64), OxenError> {
let file_size = util::fs::metadata(path)?.len();
if file_size < 1_000_000_000 {
Ok((hash_small_file_contents(path)?, file_size))
} else {
Ok((hash_large_file_contents(path)?, file_size))
}
}
pub fn u128_hash_file_contents(path: &Path) -> Result<u128, OxenError> {
let file_size = util::fs::metadata(path)?.len();
if file_size < 1_000_000_000 {
hash_small_file_contents(path)
} else {
hash_large_file_contents(path)
}
}
pub fn hash_file_contents(path: &Path) -> Result<String, OxenError> {
let file_size = util::fs::metadata(path)?.len();
if file_size < 1_000_000_000 {
Ok(format!("{:x}", hash_small_file_contents(path)?))
} else {
Ok(format!("{:x}", hash_large_file_contents(path)?))
}
}
fn hash_small_file_contents(path: &Path) -> Result<u128, OxenError> {
match File::open(path) {
Ok(file) => {
let mut reader = BufReader::new(file);
let mut buffer = Vec::new();
match reader.read_to_end(&mut buffer) {
Ok(_) => {
let result = hash_buffer_128bit(&buffer);
Ok(result)
}
Err(_) => {
eprintln!("Could not read file for hashing {path:?}");
Err(OxenError::basic_str("Could not read file for hashing"))
}
}
}
Err(err) => {
let err =
format!("util::hasher::hash_file_contents Could not open file {path:?} {err:?}");
Err(OxenError::basic_str(err))
}
}
}
fn hash_large_file_contents(path: &Path) -> Result<u128, OxenError> {
let file = File::open(path).map_err(|err| {
eprintln!("Could not open file {path:?} due to {err:?}");
OxenError::basic_str(format!("Could not open file {path:?} due to {err:?}"))
})?;
let mut reader = BufReader::new(file);
let mut hasher = Xxh3::new();
let mut buffer = [0; 4096];
loop {
let count = reader.read(&mut buffer).map_err(|_| {
eprintln!("Could not read file for hashing {path:?}");
OxenError::basic_str("Could not read file for hashing")
})?;
if count == 0 {
break;
}
hasher.update(&buffer[..count]);
}
Ok(hasher.digest128())
}
pub fn hash_path_name(path: impl AsRef<Path>) -> String {
hash_str(path.as_ref().to_str().unwrap())
}