1pub mod cache_key;
6pub mod link_cache_key;
7
8use std::io::Read;
9use std::path::Path;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
13pub struct ContentHash([u8; 32]);
14
15impl ContentHash {
16 #[must_use]
18 pub fn from_bytes(bytes: [u8; 32]) -> Self {
19 Self(bytes)
20 }
21
22 #[must_use]
24 pub fn to_hex(&self) -> String {
25 hex_encode(&self.0)
26 }
27
28 #[must_use]
30 pub fn as_bytes(&self) -> &[u8; 32] {
31 &self.0
32 }
33
34 #[must_use]
40 pub fn shard_prefix(&self, levels: usize, bytes_per_level: usize) -> Vec<String> {
41 let hex = self.to_hex();
42 let chars_per_level = bytes_per_level * 2;
43 let required = levels * chars_per_level;
44 assert!(
45 required <= hex.len(),
46 "shard_prefix: levels={levels} * bytes_per_level={bytes_per_level} \
47 requires {required} hex chars but hash is only {} chars",
48 hex.len()
49 );
50 (0..levels)
51 .map(|i| {
52 let start = i * chars_per_level;
53 let end = start + chars_per_level;
54 hex[start..end].to_string()
55 })
56 .collect()
57 }
58}
59
60impl std::fmt::Display for ContentHash {
61 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62 write!(f, "{}", self.to_hex())
63 }
64}
65
66#[must_use]
68pub fn hash_bytes(data: &[u8]) -> ContentHash {
69 let hash = blake3::hash(data);
70 ContentHash(*hash.as_bytes())
71}
72
73pub struct StreamHasher(blake3::Hasher);
78
79impl StreamHasher {
80 #[must_use]
82 pub fn new() -> Self {
83 Self(blake3::Hasher::new())
84 }
85
86 pub fn update(&mut self, data: &[u8]) -> &mut Self {
88 self.0.update(data);
89 self
90 }
91
92 #[must_use]
94 pub fn finalize(self) -> ContentHash {
95 ContentHash(*self.0.finalize().as_bytes())
96 }
97}
98
99impl Default for StreamHasher {
100 fn default() -> Self {
101 Self::new()
102 }
103}
104
105pub fn hash_reader<R: Read>(mut reader: R) -> std::io::Result<ContentHash> {
111 let mut hasher = blake3::Hasher::new();
112 let mut buf = [0u8; 16384];
113 loop {
114 let n = reader.read(&mut buf)?;
115 if n == 0 {
116 break;
117 }
118 hasher.update(&buf[..n]);
119 }
120 Ok(ContentHash(*hasher.finalize().as_bytes()))
121}
122
123pub fn hash_file(path: &Path) -> std::io::Result<ContentHash> {
139 let file = std::fs::File::open(path)?;
140 let meta = file.metadata()?;
141
142 if meta.len() == 0 {
143 return Ok(hash_bytes(b""));
144 }
145
146 let mmap = unsafe { memmap2::Mmap::map(&file)? };
149 Ok(hash_bytes(&mmap))
150}
151
152fn hex_encode(bytes: &[u8]) -> String {
153 let mut s = String::with_capacity(bytes.len() * 2);
154 for b in bytes {
155 use std::fmt::Write;
156 let _ = write!(s, "{b:02x}");
157 }
158 s
159}
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164
165 #[test]
166 fn hash_deterministic() {
167 let h1 = hash_bytes(b"hello world");
168 let h2 = hash_bytes(b"hello world");
169 assert_eq!(h1, h2);
170 }
171
172 #[test]
173 fn hash_different_inputs() {
174 let h1 = hash_bytes(b"hello");
175 let h2 = hash_bytes(b"world");
176 assert_ne!(h1, h2);
177 }
178
179 #[test]
180 fn hex_roundtrip() {
181 let h = hash_bytes(b"test");
182 let hex = h.to_hex();
183 assert_eq!(hex.len(), 64);
184 }
185
186 #[test]
187 fn shard_prefix_works() {
188 let h = hash_bytes(b"test");
189 let shards = h.shard_prefix(2, 1);
190 assert_eq!(shards.len(), 2);
191 assert_eq!(shards[0].len(), 2);
192 assert_eq!(shards[1].len(), 2);
193 }
194
195 #[test]
196 fn shard_prefix_max_valid() {
197 let h = hash_bytes(b"test");
199 let shards = h.shard_prefix(32, 1);
200 assert_eq!(shards.len(), 32);
201 }
202
203 #[test]
204 #[should_panic(expected = "shard_prefix")]
205 fn shard_prefix_overflow_panics() {
206 let h = hash_bytes(b"test");
210 let _ = h.shard_prefix(33, 1);
211 }
212
213 #[test]
214 #[should_panic(expected = "shard_prefix")]
215 fn shard_prefix_large_bytes_per_level_panics() {
216 let h = hash_bytes(b"test");
217 let _ = h.shard_prefix(2, 17);
219 }
220}