1use std::io::{BufReader, Read, Seek, SeekFrom};
13use std::path::Path;
14
15use crate::infra::error::InfraError;
16
17#[derive(Debug, Clone, PartialEq, Eq)]
19pub struct HashResult {
20 pub file_hash: String,
22 pub content_hash: Option<String>,
25}
26
27pub trait ContentHasher: Send + Sync {
31 fn hash_file(&self, path: &Path) -> Result<HashResult, InfraError>;
36}
37
38pub struct Djb2Hasher;
44
45impl ContentHasher for Djb2Hasher {
46 fn hash_file(&self, path: &Path) -> Result<HashResult, InfraError> {
47 let file_hash = djb2_file_hash(path)?;
48
49 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
52 let content_hash = if ext.eq_ignore_ascii_case("png") {
53 match png_image_hash(path) {
54 Ok(h) => Some(h),
55 Err(e) => {
56 tracing::warn!(
57 path = %path.display(),
58 error = %e,
59 "png content_hash failed — falling back to file_hash only"
60 );
61 None
62 }
63 }
64 } else {
65 None
66 };
67
68 Ok(HashResult {
69 file_hash,
70 content_hash,
71 })
72 }
73}
74
75pub fn djb2_file_hash(path: &Path) -> Result<String, InfraError> {
80 let file = std::fs::File::open(path).map_err(|e| InfraError::Hash {
81 op: "djb2",
82 reason: format!("open failed: {e}"),
83 })?;
84 let mut reader = BufReader::new(file);
85
86 let mut h: u64 = 5381;
87 let mut buf = [0u8; 8192];
88 loop {
89 let n = reader.read(&mut buf).map_err(|e| InfraError::Hash {
90 op: "djb2",
91 reason: format!("read failed: {e}"),
92 })?;
93 if n == 0 {
94 break;
95 }
96 for &b in &buf[..n] {
99 h = h.wrapping_mul(33).wrapping_add(b as u64);
100 }
101 }
102 Ok(format!("{h:016x}"))
103}
104
105pub fn png_image_hash(path: &Path) -> Result<String, InfraError> {
112 let file = std::fs::File::open(path).map_err(|e| InfraError::Hash {
113 op: "png",
114 reason: format!("open failed: {e}"),
115 })?;
116 let mut reader = BufReader::new(file);
117
118 let mut sig = [0u8; 8];
120 reader.read_exact(&mut sig).map_err(|e| InfraError::Hash {
121 op: "png",
122 reason: format!("read sig failed: {e}"),
123 })?;
124 if sig != [137, 80, 78, 71, 13, 10, 26, 10] {
125 return Err(InfraError::Hash {
126 op: "png",
127 reason: "not a valid PNG file".into(),
128 });
129 }
130
131 let mut h: u64 = 5381;
132 let mut reached_iend = false;
133
134 loop {
135 let mut header = [0u8; 8];
136 match reader.read_exact(&mut header) {
137 Ok(()) => {}
138 Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => break,
139 Err(e) => {
140 return Err(InfraError::Hash {
141 op: "png",
142 reason: format!("read chunk header failed: {e}"),
143 })
144 }
145 }
146 let length = u32::from_be_bytes([header[0], header[1], header[2], header[3]]) as u64;
148 const MAX_CHUNK_LEN: u64 = 0x7FFF_FFFF;
150 if length > MAX_CHUNK_LEN {
151 return Err(InfraError::Hash {
152 op: "png",
153 reason: format!("chunk length exceeds PNG spec maximum: {length}"),
154 });
155 }
156 let chunk_type = &header[4..8];
157
158 if chunk_type == b"IEND" {
159 reached_iend = true;
160 break;
161 }
162
163 if chunk_type == b"IHDR" || chunk_type == b"IDAT" {
164 for &b in chunk_type {
168 h = h.wrapping_mul(33).wrapping_add(b as u64) % 0x100000000;
169 }
170 let mut remaining = length;
172 let mut buf = [0u8; 8192];
173 while remaining > 0 {
174 let to_read = std::cmp::min(remaining, buf.len() as u64) as usize;
175 reader
176 .read_exact(&mut buf[..to_read])
177 .map_err(|e| InfraError::Hash {
178 op: "png",
179 reason: format!("read data failed: {e}"),
180 })?;
181 for &b in &buf[..to_read] {
182 h = h.wrapping_mul(33).wrapping_add(b as u64) % 0x100000000;
183 }
184 remaining -= to_read as u64;
185 }
186 reader
188 .seek(SeekFrom::Current(4))
189 .map_err(|e| InfraError::Hash {
190 op: "png",
191 reason: format!("seek crc failed: {e}"),
192 })?;
193 } else {
194 let skip = i64::try_from(length).map_err(|_| InfraError::Hash {
196 op: "png",
197 reason: format!("chunk length overflow: {length}"),
198 })? + 4;
199 reader
200 .seek(SeekFrom::Current(skip))
201 .map_err(|e| InfraError::Hash {
202 op: "png",
203 reason: format!("seek skip failed: {e}"),
204 })?;
205 }
206 }
207
208 if !reached_iend {
209 return Err(InfraError::Hash {
210 op: "png",
211 reason: "truncated PNG: IEND chunk not found".into(),
212 });
213 }
214
215 Ok(format!("{h:016x}"))
216}
217
218#[cfg(test)]
219mod tests {
220 use super::*;
221
222 fn build_test_png(idat_data: &[u8], text_chunks: &[(&str, &str)]) -> Vec<u8> {
224 let mut buf = Vec::new();
225 buf.extend_from_slice(&[137, 80, 78, 71, 13, 10, 26, 10]);
227
228 let ihdr = [0, 0, 0, 1, 0, 0, 0, 1, 8, 2, 0, 0, 0];
230 buf.extend_from_slice(&(ihdr.len() as u32).to_be_bytes());
231 buf.extend_from_slice(b"IHDR");
232 buf.extend_from_slice(&ihdr);
233 buf.extend_from_slice(&[0, 0, 0, 0]); for (keyword, text) in text_chunks {
237 let data: Vec<u8> = [keyword.as_bytes(), &[0], text.as_bytes()].concat();
238 buf.extend_from_slice(&(data.len() as u32).to_be_bytes());
239 buf.extend_from_slice(b"tEXt");
240 buf.extend_from_slice(&data);
241 buf.extend_from_slice(&[0, 0, 0, 0]); }
243
244 buf.extend_from_slice(&(idat_data.len() as u32).to_be_bytes());
246 buf.extend_from_slice(b"IDAT");
247 buf.extend_from_slice(idat_data);
248 buf.extend_from_slice(&[0, 0, 0, 0]); buf.extend_from_slice(&0u32.to_be_bytes());
252 buf.extend_from_slice(b"IEND");
253 buf.extend_from_slice(&[0, 0, 0, 0]); buf
256 }
257
258 #[test]
263 fn file_hash_non_empty() {
264 let dir = tempfile::tempdir().unwrap();
265 let path = dir.path().join("data.json");
266 std::fs::write(&path, b"{}").unwrap();
267 let hash = djb2_file_hash(&path).unwrap();
268 assert_eq!(hash.len(), 16);
269 assert!(hash.chars().all(|c| c.is_ascii_hexdigit()));
270 }
271
272 #[test]
273 fn file_hash_deterministic() {
274 let dir = tempfile::tempdir().unwrap();
275 let p1 = dir.path().join("a.txt");
276 let p2 = dir.path().join("b.txt");
277 std::fs::write(&p1, b"hello world").unwrap();
278 std::fs::write(&p2, b"hello world").unwrap();
279 assert_eq!(djb2_file_hash(&p1).unwrap(), djb2_file_hash(&p2).unwrap());
280 }
281
282 #[test]
283 fn file_hash_different_content() {
284 let dir = tempfile::tempdir().unwrap();
285 let p1 = dir.path().join("a.txt");
286 let p2 = dir.path().join("b.txt");
287 std::fs::write(&p1, b"content_a").unwrap();
288 std::fs::write(&p2, b"content_b").unwrap();
289 assert_ne!(djb2_file_hash(&p1).unwrap(), djb2_file_hash(&p2).unwrap());
290 }
291
292 #[test]
293 fn file_hash_empty_file() {
294 let dir = tempfile::tempdir().unwrap();
295 let path = dir.path().join("empty");
296 std::fs::write(&path, b"").unwrap();
297 let hash = djb2_file_hash(&path).unwrap();
298 assert_eq!(hash, "0000000000001505");
300 }
301
302 #[test]
307 fn png_hash_valid() {
308 let dir = tempfile::tempdir().unwrap();
309 let path = dir.path().join("test.png");
310 std::fs::write(&path, build_test_png(b"PIXEL_DATA", &[])).unwrap();
311
312 let hash = png_image_hash(&path).unwrap();
313 assert_eq!(hash.len(), 16);
314 assert!(hash.chars().all(|c| c.is_ascii_hexdigit()));
315 }
316
317 #[test]
318 fn png_hash_not_png() {
319 let dir = tempfile::tempdir().unwrap();
320 let path = dir.path().join("not.png");
321 std::fs::write(&path, b"not a png").unwrap();
322 assert!(png_image_hash(&path).is_err());
323 }
324
325 #[test]
326 fn png_same_pixels_different_metadata() {
327 let dir = tempfile::tempdir().unwrap();
328 let p1 = dir.path().join("a.png");
329 let p2 = dir.path().join("b.png");
330 std::fs::write(&p1, build_test_png(b"SAME_PIXELS", &[])).unwrap();
331 std::fs::write(
332 &p2,
333 build_test_png(b"SAME_PIXELS", &[("vdsl", r#"{"seed":42}"#)]),
334 )
335 .unwrap();
336
337 let h1 = png_image_hash(&p1).unwrap();
338 let h2 = png_image_hash(&p2).unwrap();
339 assert_eq!(h1, h2, "same pixels must yield same content_hash");
340 }
341
342 #[test]
343 fn png_different_pixels() {
344 let dir = tempfile::tempdir().unwrap();
345 let p1 = dir.path().join("a.png");
346 let p2 = dir.path().join("b.png");
347 std::fs::write(&p1, build_test_png(b"PIXELS_AAA", &[])).unwrap();
348 std::fs::write(&p2, build_test_png(b"PIXELS_BBB", &[])).unwrap();
349
350 assert_ne!(png_image_hash(&p1).unwrap(), png_image_hash(&p2).unwrap());
351 }
352
353 #[test]
354 fn png_deterministic() {
355 let dir = tempfile::tempdir().unwrap();
356 let p1 = dir.path().join("d1.png");
357 let p2 = dir.path().join("d2.png");
358 let data = build_test_png(b"DETERMINISTIC", &[]);
359 std::fs::write(&p1, &data).unwrap();
360 std::fs::write(&p2, &data).unwrap();
361
362 let h1 = png_image_hash(&p1).unwrap();
363 let h2 = png_image_hash(&p2).unwrap();
364 assert_eq!(h1, h2);
365 assert_ne!(h1, "0000000000001505");
366 }
367
368 #[test]
373 fn hasher_non_png_no_content_hash() {
374 let hasher = Djb2Hasher;
375 let dir = tempfile::tempdir().unwrap();
376 let path = dir.path().join("data.json");
377 std::fs::write(&path, b"{}").unwrap();
378 let result = hasher.hash_file(&path).unwrap();
379 assert_eq!(result.file_hash.len(), 16);
380 assert!(result.content_hash.is_none());
381 }
382
383 #[test]
384 fn hasher_png_has_both_hashes() {
385 let hasher = Djb2Hasher;
386 let dir = tempfile::tempdir().unwrap();
387 let path = dir.path().join("test.png");
388 std::fs::write(&path, build_test_png(b"DATA", &[])).unwrap();
389 let result = hasher.hash_file(&path).unwrap();
390 assert_eq!(result.file_hash.len(), 16);
391 assert!(result.content_hash.is_some());
392 assert_eq!(result.content_hash.as_ref().unwrap().len(), 16);
393 }
394
395 #[test]
396 fn hasher_png_file_hash_differs_from_content_hash() {
397 let hasher = Djb2Hasher;
398 let dir = tempfile::tempdir().unwrap();
399 let path = dir.path().join("test.png");
400 std::fs::write(&path, build_test_png(b"PIXEL_DATA", &[])).unwrap();
401 let result = hasher.hash_file(&path).unwrap();
402 assert_ne!(
404 result.file_hash,
405 result.content_hash.unwrap(),
406 "file_hash (whole file) and content_hash (IHDR+IDAT) should differ"
407 );
408 }
409
410 #[test]
411 fn hasher_png_same_pixels_different_metadata_same_content_different_file() {
412 let hasher = Djb2Hasher;
413 let dir = tempfile::tempdir().unwrap();
414 let p1 = dir.path().join("a.png");
415 let p2 = dir.path().join("b.png");
416 std::fs::write(&p1, build_test_png(b"SAME", &[])).unwrap();
417 std::fs::write(&p2, build_test_png(b"SAME", &[("key", "metadata")])).unwrap();
418 let r1 = hasher.hash_file(&p1).unwrap();
419 let r2 = hasher.hash_file(&p2).unwrap();
420
421 assert_eq!(r1.content_hash, r2.content_hash);
423 assert_ne!(r1.file_hash, r2.file_hash);
425 }
426
427 #[test]
431 #[ignore]
432 fn cross_language_hash_match() {
433 let png_path = Path::new("/tmp/vdsl_hash_test.png");
434 let hash_path = Path::new("/tmp/vdsl_hash_test.lua_hash");
435 assert!(
436 png_path.exists() && hash_path.exists(),
437 "required fixture files not found: /tmp/vdsl_hash_test.png and .lua_hash"
438 );
439 let rust_hash = png_image_hash(png_path).unwrap();
440 let lua_hash = std::fs::read_to_string(hash_path)
441 .unwrap()
442 .trim()
443 .to_string();
444 assert_eq!(
445 rust_hash, lua_hash,
446 "Rust hash ({rust_hash}) must match Lua hash ({lua_hash})"
447 );
448 }
449}