infiniloom_engine/embedding/
hasher.rs1use super::error::EmbedError;
27use super::normalizer::normalize_for_hash;
28
29#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct HashResult {
32 pub short_id: String,
35
36 pub full_hash: String,
39}
40
41impl HashResult {
42 #[inline]
44 fn from_hash(hash: blake3::Hash) -> Self {
45 let hex = hash.to_hex();
46
47 Self {
48 short_id: format!("ec_{}", &hex[..32]),
50 full_hash: hex.to_string(),
52 }
53 }
54}
55
56#[inline]
74pub fn hash_content(content: &str) -> HashResult {
75 let normalized = normalize_for_hash(content);
76 let hash = blake3::hash(normalized.as_bytes());
77 HashResult::from_hash(hash)
78}
79
80#[inline]
90pub(super) fn hash_normalized(normalized_content: &str) -> HashResult {
91 let hash = blake3::hash(normalized_content.as_bytes());
92 HashResult::from_hash(hash)
93}
94
95#[inline]
99pub(super) fn hash_bytes(bytes: &[u8]) -> HashResult {
100 let hash = blake3::hash(bytes);
101 HashResult::from_hash(hash)
102}
103
104pub(super) fn verify_no_collision(id: &str, hash1: &str, hash2: &str) -> Result<(), EmbedError> {
114 if hash1 != hash2 {
115 return Err(EmbedError::HashCollision {
116 id: id.to_owned(),
117 hash1: hash1.to_owned(),
118 hash2: hash2.to_owned(),
119 });
120 }
121 Ok(())
122}
123
124pub(super) fn compute_integrity_hash(data: &[u8]) -> String {
128 blake3::hash(data).to_hex().to_string()
129}
130
131pub(super) struct IncrementalHasher {
145 hasher: blake3::Hasher,
146}
147
148impl IncrementalHasher {
149 #[inline]
151 pub(super) fn new() -> Self {
152 Self { hasher: blake3::Hasher::new() }
153 }
154
155 #[inline]
157 pub(super) fn update(&mut self, data: &[u8]) {
158 self.hasher.update(data);
159 }
160
161 #[inline]
163 pub(super) fn update_str(&mut self, s: &str) {
164 self.hasher.update(s.as_bytes());
165 }
166
167 #[inline]
169 pub(super) fn update_u32(&mut self, n: u32) {
170 self.hasher.update(&n.to_le_bytes());
171 }
172
173 #[inline]
175 pub(super) fn update_u64(&mut self, n: u64) {
176 self.hasher.update(&n.to_le_bytes());
177 }
178
179 #[inline]
181 pub(super) fn finalize(self) -> HashResult {
182 HashResult::from_hash(self.hasher.finalize())
183 }
184
185 #[inline]
187 pub(super) fn finalize_hex(self) -> String {
188 self.hasher.finalize().to_hex().to_string()
189 }
190}
191
192impl Default for IncrementalHasher {
193 fn default() -> Self {
194 Self::new()
195 }
196}
197
198#[cfg(test)]
199mod tests {
200 use super::*;
201
202 #[test]
203 fn test_deterministic() {
204 let content = "fn foo() { bar(); }";
205 let h1 = hash_content(content);
206 let h2 = hash_content(content);
207
208 assert_eq!(h1.short_id, h2.short_id);
209 assert_eq!(h1.full_hash, h2.full_hash);
210 }
211
212 #[test]
213 fn test_format() {
214 let h = hash_content("test");
215
216 assert!(h.short_id.starts_with("ec_"));
217 assert_eq!(h.short_id.len(), 3 + 32); assert_eq!(h.full_hash.len(), 64); }
220
221 #[test]
222 fn test_different_content() {
223 let h1 = hash_content("fn foo() {}");
224 let h2 = hash_content("fn bar() {}");
225
226 assert_ne!(h1.short_id, h2.short_id);
227 assert_ne!(h1.full_hash, h2.full_hash);
228 }
229
230 #[test]
231 fn test_cross_platform_consistency() {
232 let variants = [
234 "fn foo() {\n bar();\n}",
235 "fn foo() {\r\n bar();\r\n}",
236 "fn foo() {\r bar();\r}",
237 "fn foo() { \n bar(); \n}",
238 ];
239
240 let hashes: Vec<_> = variants.iter().map(|c| hash_content(c)).collect();
241
242 for i in 1..hashes.len() {
243 assert_eq!(hashes[0].short_id, hashes[i].short_id, "Hash mismatch for variant {i}");
244 }
245 }
246
247 #[test]
248 fn test_unicode_consistency() {
249 let nfd = "cafe\u{0301}";
251 let nfc = "caf\u{00E9}";
253
254 let h1 = hash_content(nfd);
255 let h2 = hash_content(nfc);
256
257 assert_eq!(h1.short_id, h2.short_id);
258 }
259
260 #[test]
261 fn test_verify_no_collision_ok() {
262 let result = verify_no_collision("ec_test", "abc123", "abc123");
263 assert!(result.is_ok());
264 }
265
266 #[test]
267 fn test_verify_no_collision_detected() {
268 let result = verify_no_collision("ec_test", "abc123", "def456");
269 assert!(result.is_err());
270 assert!(matches!(result, Err(EmbedError::HashCollision { .. })));
271 }
272
273 #[test]
274 fn test_hash_normalized() {
275 let content = "fn foo() { bar(); }";
276 let normalized = normalize_for_hash(content);
277
278 let h1 = hash_content(content);
279 let h2 = hash_normalized(&normalized);
280
281 assert_eq!(h1.short_id, h2.short_id);
282 assert_eq!(h1.full_hash, h2.full_hash);
283 }
284
285 #[test]
286 fn test_hash_bytes() {
287 let bytes = b"hello world";
288 let result = hash_bytes(bytes);
289
290 assert!(result.short_id.starts_with("ec_"));
291 assert_eq!(result.full_hash.len(), 64);
292 }
293
294 #[test]
295 fn test_incremental_hasher() {
296 let concat = "part1part2part3";
298 let h1 = hash_bytes(concat.as_bytes());
299
300 let mut hasher = IncrementalHasher::new();
302 hasher.update(b"part1");
303 hasher.update(b"part2");
304 hasher.update(b"part3");
305 let h2 = hasher.finalize();
306
307 assert_eq!(h1.short_id, h2.short_id);
308 }
309
310 #[test]
311 fn test_incremental_with_numbers() {
312 let mut hasher = IncrementalHasher::new();
313 hasher.update_u32(42);
314 hasher.update_u64(123456789);
315 hasher.update_str("test");
316 let result = hasher.finalize_hex();
317
318 assert_eq!(result.len(), 64);
319 }
320
321 #[test]
322 fn test_compute_integrity_hash() {
323 let data = b"manifest data here";
324 let hash = compute_integrity_hash(data);
325
326 assert_eq!(hash.len(), 64);
327 }
328
329 #[test]
330 fn test_empty_content() {
331 let h1 = hash_content("");
332 let h2 = hash_content("\n\n\n"); assert_eq!(h1.short_id, h2.short_id);
335 }
336
337 #[test]
338 fn test_whitespace_only() {
339 let h1 = hash_content(" ");
340 let h2 = hash_content(" \n \n ");
341
342 assert_eq!(h1.short_id, h2.short_id);
344 }
345
346 #[test]
347 fn test_hash_result_clone() {
348 let result = hash_content("test");
349 let cloned = result.clone();
350
351 assert_eq!(result, cloned);
352 }
353}