#[cfg(feature = "sha256")]
use sha2::Digest;
use std::fmt;
use std::hash::Hash;
use std::marker::PhantomData;
pub trait Hasher: Clone + Default + PartialEq + Send + Sync + 'static {
type Digest: AsRef<[u8]> + Clone + PartialEq + Eq + Hash + Send + Sync + 'static;
const HASH_SIZE: usize;
fn digest(data: &[u8]) -> Self::Digest;
fn algorithm_name() -> &'static str;
}
#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
pub struct Blake3Hasher;
impl Hasher for Blake3Hasher {
type Digest = [u8; 32];
const HASH_SIZE: usize = 32;
fn digest(data: &[u8]) -> Self::Digest {
*blake3::hash(data).as_bytes()
}
fn algorithm_name() -> &'static str {
"blake3"
}
}
pub type DefaultContentHasher = Blake3Hasher;
#[cfg(feature = "sha256")]
#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
pub struct Sha256Hasher;
#[cfg(feature = "sha256")]
impl Hasher for Sha256Hasher {
type Digest = [u8; 32];
const HASH_SIZE: usize = 32;
fn digest(data: &[u8]) -> Self::Digest {
sha2::Sha256::digest(data).into()
}
fn algorithm_name() -> &'static str {
"sha256"
}
}
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct ContentId<H: Hasher = DefaultContentHasher> {
hash: H::Digest,
_hasher: PhantomData<H>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ContentIdError {
expected: usize,
actual: usize,
}
impl ContentIdError {
#[must_use]
pub fn invalid_length(expected: usize, actual: usize) -> Self {
Self { expected, actual }
}
}
impl fmt::Display for ContentIdError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"hash length {} doesn't match expected size {}",
self.actual, self.expected
)
}
}
impl std::error::Error for ContentIdError {}
impl<H: Hasher> ContentId<H> {
#[must_use]
pub fn from_bytes(data: &[u8]) -> Self {
Self {
hash: H::digest(data),
_hasher: PhantomData,
}
}
pub fn from_hash(hash: impl AsRef<[u8]>) -> Result<Self, ContentIdError>
where
for<'a> H::Digest: TryFrom<&'a [u8]>,
{
let bytes = hash.as_ref();
if bytes.len() != H::HASH_SIZE {
return Err(ContentIdError::invalid_length(H::HASH_SIZE, bytes.len()));
}
let digest = H::Digest::try_from(bytes)
.map_err(|_| ContentIdError::invalid_length(H::HASH_SIZE, bytes.len()))?;
Ok(Self {
hash: digest,
_hasher: PhantomData,
})
}
#[must_use]
pub fn as_bytes(&self) -> &[u8] {
self.hash.as_ref()
}
#[must_use]
pub fn to_hex(&self) -> String {
self.hash
.as_ref()
.iter()
.map(|b| format!("{b:02x}"))
.collect()
}
#[must_use]
pub fn algorithm(&self) -> &'static str {
H::algorithm_name()
}
}
impl<H: Hasher> fmt::Debug for ContentId<H> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let short_hex: String = self
.hash
.as_ref()
.iter()
.take(8)
.map(|b| format!("{b:02x}"))
.collect();
write!(f, "ContentId<{}>({short_hex}...)", H::algorithm_name())
}
}
impl<H: Hasher> fmt::Display for ContentId<H> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.to_hex())
}
}
pub type DefaultContentId = ContentId<DefaultContentHasher>;
pub type ContentIdBlake3 = ContentId<Blake3Hasher>;
#[cfg(feature = "sha256")]
pub type ContentIdSha256 = ContentId<Sha256Hasher>;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_blake3_empty() {
let hash = Blake3Hasher::digest(b"");
let hex: String = hash.iter().map(|b| format!("{b:02x}")).collect();
assert_eq!(
hex,
"af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262"
);
}
#[test]
fn test_blake3_hello() {
let hash = Blake3Hasher::digest(b"hello");
let hex: String = hash.iter().map(|b| format!("{b:02x}")).collect();
assert_eq!(
hex,
"ea8f163db38682925e4491c5e58d4bb3506ef8c14eb78a86e908c5624a67200f"
);
}
#[cfg(feature = "sha256")]
#[test]
fn test_sha256_empty() {
let hash = Sha256Hasher::digest(b"");
let hex: String = hash.iter().map(|b| format!("{b:02x}")).collect();
assert_eq!(
hex,
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
);
}
#[cfg(feature = "sha256")]
#[test]
fn test_sha256_hello() {
let hash = Sha256Hasher::digest(b"hello");
let hex: String = hash.iter().map(|b| format!("{b:02x}")).collect();
assert_eq!(
hex,
"2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
);
}
#[test]
fn test_content_id_from_bytes() {
let cid = ContentIdBlake3::from_bytes(b"test");
assert_eq!(cid.as_bytes().len(), 32);
}
#[test]
fn test_content_id_deterministic() {
let cid1 = ContentIdBlake3::from_bytes(b"same data");
let cid2 = ContentIdBlake3::from_bytes(b"same data");
assert_eq!(cid1, cid2);
}
#[test]
fn test_content_id_different() {
let cid1 = ContentIdBlake3::from_bytes(b"data1");
let cid2 = ContentIdBlake3::from_bytes(b"data2");
assert_ne!(cid1, cid2);
}
#[test]
fn test_content_id_hex() {
let cid = ContentIdBlake3::from_bytes(b"");
assert_eq!(
cid.to_hex(),
"af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262"
);
}
#[test]
fn test_content_id_debug() {
let cid = ContentIdBlake3::from_bytes(b"test");
let debug = format!("{cid:?}");
assert!(debug.contains("ContentId<blake3>"));
}
}