infiniloom_engine/embedding/
hasher.rs1use super::error::EmbedError;
27use super::normalizer::normalize_for_hash;
28
29#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct HashResult {
32 pub short_id: String,
35
36 pub full_hash: String,
39}
40
41impl HashResult {
42 #[inline]
44 fn from_hash(hash: blake3::Hash) -> Self {
45 let hex = hash.to_hex();
46
47 Self {
48 short_id: format!("ec_{}", &hex[..32]),
50 full_hash: hex.to_string(),
52 }
53 }
54}
55
56#[inline]
74pub fn hash_content(content: &str) -> HashResult {
75 let normalized = normalize_for_hash(content);
76 let hash = blake3::hash(normalized.as_bytes());
77 HashResult::from_hash(hash)
78}
79
80#[inline]
90pub fn hash_normalized(normalized_content: &str) -> HashResult {
91 let hash = blake3::hash(normalized_content.as_bytes());
92 HashResult::from_hash(hash)
93}
94
95#[inline]
99pub fn hash_bytes(bytes: &[u8]) -> HashResult {
100 let hash = blake3::hash(bytes);
101 HashResult::from_hash(hash)
102}
103
104pub fn verify_no_collision(id: &str, hash1: &str, hash2: &str) -> Result<(), EmbedError> {
114 if hash1 != hash2 {
115 return Err(EmbedError::HashCollision {
116 id: id.to_string(),
117 hash1: hash1.to_string(),
118 hash2: hash2.to_string(),
119 });
120 }
121 Ok(())
122}
123
124pub fn compute_integrity_hash(data: &[u8]) -> String {
128 blake3::hash(data).to_hex().to_string()
129}
130
131pub struct IncrementalHasher {
145 hasher: blake3::Hasher,
146}
147
148impl IncrementalHasher {
149 #[inline]
151 pub fn new() -> Self {
152 Self {
153 hasher: blake3::Hasher::new(),
154 }
155 }
156
157 #[inline]
159 pub fn update(&mut self, data: &[u8]) {
160 self.hasher.update(data);
161 }
162
163 #[inline]
165 pub fn update_str(&mut self, s: &str) {
166 self.hasher.update(s.as_bytes());
167 }
168
169 #[inline]
171 pub fn update_u32(&mut self, n: u32) {
172 self.hasher.update(&n.to_le_bytes());
173 }
174
175 #[inline]
177 pub fn update_u64(&mut self, n: u64) {
178 self.hasher.update(&n.to_le_bytes());
179 }
180
181 #[inline]
183 pub fn finalize(self) -> HashResult {
184 HashResult::from_hash(self.hasher.finalize())
185 }
186
187 #[inline]
189 pub fn finalize_hex(self) -> String {
190 self.hasher.finalize().to_hex().to_string()
191 }
192}
193
194impl Default for IncrementalHasher {
195 fn default() -> Self {
196 Self::new()
197 }
198}
199
200#[cfg(test)]
201mod tests {
202 use super::*;
203
204 #[test]
205 fn test_deterministic() {
206 let content = "fn foo() { bar(); }";
207 let h1 = hash_content(content);
208 let h2 = hash_content(content);
209
210 assert_eq!(h1.short_id, h2.short_id);
211 assert_eq!(h1.full_hash, h2.full_hash);
212 }
213
214 #[test]
215 fn test_format() {
216 let h = hash_content("test");
217
218 assert!(h.short_id.starts_with("ec_"));
219 assert_eq!(h.short_id.len(), 3 + 32); assert_eq!(h.full_hash.len(), 64); }
222
223 #[test]
224 fn test_different_content() {
225 let h1 = hash_content("fn foo() {}");
226 let h2 = hash_content("fn bar() {}");
227
228 assert_ne!(h1.short_id, h2.short_id);
229 assert_ne!(h1.full_hash, h2.full_hash);
230 }
231
232 #[test]
233 fn test_cross_platform_consistency() {
234 let variants = [
236 "fn foo() {\n bar();\n}",
237 "fn foo() {\r\n bar();\r\n}",
238 "fn foo() {\r bar();\r}",
239 "fn foo() { \n bar(); \n}",
240 ];
241
242 let hashes: Vec<_> = variants.iter().map(|c| hash_content(c)).collect();
243
244 for i in 1..hashes.len() {
245 assert_eq!(
246 hashes[0].short_id, hashes[i].short_id,
247 "Hash mismatch for variant {i}"
248 );
249 }
250 }
251
252 #[test]
253 fn test_unicode_consistency() {
254 let nfd = "cafe\u{0301}";
256 let nfc = "caf\u{00E9}";
258
259 let h1 = hash_content(nfd);
260 let h2 = hash_content(nfc);
261
262 assert_eq!(h1.short_id, h2.short_id);
263 }
264
265 #[test]
266 fn test_verify_no_collision_ok() {
267 let result = verify_no_collision("ec_test", "abc123", "abc123");
268 assert!(result.is_ok());
269 }
270
271 #[test]
272 fn test_verify_no_collision_detected() {
273 let result = verify_no_collision("ec_test", "abc123", "def456");
274 assert!(result.is_err());
275 assert!(matches!(result, Err(EmbedError::HashCollision { .. })));
276 }
277
278 #[test]
279 fn test_hash_normalized() {
280 let content = "fn foo() { bar(); }";
281 let normalized = normalize_for_hash(content);
282
283 let h1 = hash_content(content);
284 let h2 = hash_normalized(&normalized);
285
286 assert_eq!(h1.short_id, h2.short_id);
287 assert_eq!(h1.full_hash, h2.full_hash);
288 }
289
290 #[test]
291 fn test_hash_bytes() {
292 let bytes = b"hello world";
293 let result = hash_bytes(bytes);
294
295 assert!(result.short_id.starts_with("ec_"));
296 assert_eq!(result.full_hash.len(), 64);
297 }
298
299 #[test]
300 fn test_incremental_hasher() {
301 let concat = "part1part2part3";
303 let h1 = hash_bytes(concat.as_bytes());
304
305 let mut hasher = IncrementalHasher::new();
307 hasher.update(b"part1");
308 hasher.update(b"part2");
309 hasher.update(b"part3");
310 let h2 = hasher.finalize();
311
312 assert_eq!(h1.short_id, h2.short_id);
313 }
314
315 #[test]
316 fn test_incremental_with_numbers() {
317 let mut hasher = IncrementalHasher::new();
318 hasher.update_u32(42);
319 hasher.update_u64(123456789);
320 hasher.update_str("test");
321 let result = hasher.finalize_hex();
322
323 assert_eq!(result.len(), 64);
324 }
325
326 #[test]
327 fn test_compute_integrity_hash() {
328 let data = b"manifest data here";
329 let hash = compute_integrity_hash(data);
330
331 assert_eq!(hash.len(), 64);
332 }
333
334 #[test]
335 fn test_empty_content() {
336 let h1 = hash_content("");
337 let h2 = hash_content("\n\n\n"); assert_eq!(h1.short_id, h2.short_id);
340 }
341
342 #[test]
343 fn test_whitespace_only() {
344 let h1 = hash_content(" ");
345 let h2 = hash_content(" \n \n ");
346
347 assert_eq!(h1.short_id, h2.short_id);
349 }
350
351 #[test]
352 fn test_hash_result_clone() {
353 let result = hash_content("test");
354 let cloned = result.clone();
355
356 assert_eq!(result, cloned);
357 }
358}