pub mod cache_key;
pub mod link_cache_key;
use std::io::Read;
use std::path::Path;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub struct ContentHash([u8; 32]);
impl ContentHash {
#[must_use]
pub fn from_bytes(bytes: [u8; 32]) -> Self {
Self(bytes)
}
#[must_use]
pub fn to_hex(&self) -> String {
hex_encode(&self.0)
}
#[must_use]
pub fn as_bytes(&self) -> &[u8; 32] {
&self.0
}
#[must_use]
pub fn shard_prefix(&self, levels: usize, bytes_per_level: usize) -> Vec<String> {
let hex = self.to_hex();
let chars_per_level = bytes_per_level * 2;
let required = levels * chars_per_level;
assert!(
required <= hex.len(),
"shard_prefix: levels={levels} * bytes_per_level={bytes_per_level} \
requires {required} hex chars but hash is only {} chars",
hex.len()
);
(0..levels)
.map(|i| {
let start = i * chars_per_level;
let end = start + chars_per_level;
hex[start..end].to_string()
})
.collect()
}
}
impl std::fmt::Display for ContentHash {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.to_hex())
}
}
#[must_use]
pub fn hash_bytes(data: &[u8]) -> ContentHash {
let hash = blake3::hash(data);
ContentHash(*hash.as_bytes())
}
pub struct StreamHasher(blake3::Hasher);
impl StreamHasher {
#[must_use]
pub fn new() -> Self {
Self(blake3::Hasher::new())
}
pub fn update(&mut self, data: &[u8]) -> &mut Self {
self.0.update(data);
self
}
#[must_use]
pub fn finalize(self) -> ContentHash {
ContentHash(*self.0.finalize().as_bytes())
}
}
impl Default for StreamHasher {
fn default() -> Self {
Self::new()
}
}
pub fn hash_reader<R: Read>(mut reader: R) -> std::io::Result<ContentHash> {
let mut hasher = blake3::Hasher::new();
let mut buf = [0u8; 16384];
loop {
let n = reader.read(&mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(ContentHash(*hasher.finalize().as_bytes()))
}
pub fn hash_file(path: &Path) -> std::io::Result<ContentHash> {
let file = std::fs::File::open(path)?;
let meta = file.metadata()?;
if meta.len() == 0 {
return Ok(hash_bytes(b""));
}
let mmap = unsafe { memmap2::Mmap::map(&file)? };
Ok(hash_bytes(&mmap))
}
fn hex_encode(bytes: &[u8]) -> String {
let mut s = String::with_capacity(bytes.len() * 2);
for b in bytes {
use std::fmt::Write;
let _ = write!(s, "{b:02x}");
}
s
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn hash_deterministic() {
let h1 = hash_bytes(b"hello world");
let h2 = hash_bytes(b"hello world");
assert_eq!(h1, h2);
}
#[test]
fn hash_different_inputs() {
let h1 = hash_bytes(b"hello");
let h2 = hash_bytes(b"world");
assert_ne!(h1, h2);
}
#[test]
fn hex_roundtrip() {
let h = hash_bytes(b"test");
let hex = h.to_hex();
assert_eq!(hex.len(), 64);
}
#[test]
fn shard_prefix_works() {
let h = hash_bytes(b"test");
let shards = h.shard_prefix(2, 1);
assert_eq!(shards.len(), 2);
assert_eq!(shards[0].len(), 2);
assert_eq!(shards[1].len(), 2);
}
#[test]
fn shard_prefix_max_valid() {
let h = hash_bytes(b"test");
let shards = h.shard_prefix(32, 1);
assert_eq!(shards.len(), 32);
}
#[test]
#[should_panic(expected = "shard_prefix")]
fn shard_prefix_overflow_panics() {
let h = hash_bytes(b"test");
let _ = h.shard_prefix(33, 1);
}
#[test]
#[should_panic(expected = "shard_prefix")]
fn shard_prefix_large_bytes_per_level_panics() {
let h = hash_bytes(b"test");
let _ = h.shard_prefix(2, 17);
}
}