1#![allow(dead_code)]
2use std::collections::HashMap;
9use std::fmt;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
13pub enum FingerprintAlgorithm {
14 Crc32,
16 Adler32,
18 XorHash,
20 BlockHash,
22}
23
24impl FingerprintAlgorithm {
25 pub fn name(&self) -> &'static str {
27 match self {
28 Self::Crc32 => "CRC-32",
29 Self::Adler32 => "Adler-32",
30 Self::XorHash => "XOR Hash",
31 Self::BlockHash => "Block Hash",
32 }
33 }
34}
35
36#[derive(Debug, Clone, PartialEq, Eq)]
38pub struct Fingerprint {
39 pub algorithm: FingerprintAlgorithm,
41 pub hash: String,
43 pub file_size: u64,
45 pub blocks_processed: u64,
47}
48
49impl Fingerprint {
50 pub fn new(algorithm: FingerprintAlgorithm, hash: &str, file_size: u64) -> Self {
52 Self {
53 algorithm,
54 hash: hash.to_string(),
55 file_size,
56 blocks_processed: 0,
57 }
58 }
59
60 pub fn with_blocks(mut self, blocks: u64) -> Self {
62 self.blocks_processed = blocks;
63 self
64 }
65}
66
67impl fmt::Display for Fingerprint {
68 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
69 write!(f, "{}:{}", self.algorithm.name(), self.hash)
70 }
71}
72
73#[derive(Debug, Clone, PartialEq, Eq)]
75pub enum VerifyResult {
76 Match,
78 Mismatch {
80 expected: String,
82 actual: String,
84 },
85 SizeChanged {
87 expected: u64,
89 actual: u64,
91 },
92}
93
94impl VerifyResult {
95 pub fn is_ok(&self) -> bool {
97 matches!(self, Self::Match)
98 }
99}
100
101fn compute_crc32(data: &[u8]) -> u32 {
103 let mut crc: u32 = 0xFFFF_FFFF;
104 for &byte in data {
105 crc ^= u32::from(byte);
106 for _ in 0..8 {
107 if crc & 1 != 0 {
108 crc = (crc >> 1) ^ 0xEDB8_8320;
109 } else {
110 crc >>= 1;
111 }
112 }
113 }
114 !crc
115}
116
117fn compute_adler32(data: &[u8]) -> u32 {
119 let mut a: u32 = 1;
120 let mut b: u32 = 0;
121 for &byte in data {
122 a = (a + u32::from(byte)) % 65521;
123 b = (b + a) % 65521;
124 }
125 (b << 16) | a
126}
127
128fn compute_xor_hash(data: &[u8]) -> u32 {
130 let mut hash: u32 = 0;
131 for chunk in data.chunks(4) {
132 let mut val: u32 = 0;
133 for (i, &byte) in chunk.iter().enumerate() {
134 val |= u32::from(byte) << (i * 8);
135 }
136 hash ^= val;
137 }
138 hash
139}
140
141#[allow(clippy::cast_precision_loss)]
143fn compute_block_hash(data: &[u8], block_size: usize) -> (u32, u64) {
144 let mut combined: u32 = 0;
145 let mut blocks: u64 = 0;
146 for chunk in data.chunks(block_size.max(1)) {
147 let block_crc = compute_crc32(chunk);
148 combined = combined.wrapping_add(block_crc);
149 blocks += 1;
150 }
151 (combined, blocks)
152}
153
154pub struct FingerprintEngine {
156 algorithm: FingerprintAlgorithm,
158 block_size: usize,
160 cache: HashMap<String, Fingerprint>,
162}
163
164impl FingerprintEngine {
165 pub fn new(algorithm: FingerprintAlgorithm) -> Self {
167 Self {
168 algorithm,
169 block_size: 4096,
170 cache: HashMap::new(),
171 }
172 }
173
174 pub fn with_block_size(mut self, size: usize) -> Self {
176 self.block_size = size;
177 self
178 }
179
180 #[allow(clippy::cast_precision_loss)]
182 pub fn compute(&self, data: &[u8]) -> Fingerprint {
183 let file_size = data.len() as u64;
184 match self.algorithm {
185 FingerprintAlgorithm::Crc32 => {
186 let crc = compute_crc32(data);
187 Fingerprint::new(self.algorithm, &format!("{crc:08x}"), file_size)
188 }
189 FingerprintAlgorithm::Adler32 => {
190 let adler = compute_adler32(data);
191 Fingerprint::new(self.algorithm, &format!("{adler:08x}"), file_size)
192 }
193 FingerprintAlgorithm::XorHash => {
194 let xor = compute_xor_hash(data);
195 Fingerprint::new(self.algorithm, &format!("{xor:08x}"), file_size)
196 }
197 FingerprintAlgorithm::BlockHash => {
198 let (hash, blocks) = compute_block_hash(data, self.block_size);
199 Fingerprint::new(self.algorithm, &format!("{hash:08x}"), file_size)
200 .with_blocks(blocks)
201 }
202 }
203 }
204
205 pub fn compute_and_cache(&mut self, name: &str, data: &[u8]) -> Fingerprint {
207 let fp = self.compute(data);
208 self.cache.insert(name.to_string(), fp.clone());
209 fp
210 }
211
212 pub fn verify(&self, data: &[u8], expected: &Fingerprint) -> VerifyResult {
214 #[allow(clippy::cast_precision_loss)]
215 let actual_size = data.len() as u64;
216 if actual_size != expected.file_size {
217 return VerifyResult::SizeChanged {
218 expected: expected.file_size,
219 actual: actual_size,
220 };
221 }
222 let actual_fp = self.compute(data);
223 if actual_fp.hash == expected.hash {
224 VerifyResult::Match
225 } else {
226 VerifyResult::Mismatch {
227 expected: expected.hash.clone(),
228 actual: actual_fp.hash,
229 }
230 }
231 }
232
233 pub fn get_cached(&self, name: &str) -> Option<&Fingerprint> {
235 self.cache.get(name)
236 }
237
238 pub fn cache_size(&self) -> usize {
240 self.cache.len()
241 }
242
243 pub fn clear_cache(&mut self) {
245 self.cache.clear();
246 }
247}
248
249#[cfg(test)]
250mod tests {
251 use super::*;
252
253 const TEST_DATA: &[u8] = b"Hello, proxy fingerprint test data for OxiMedia framework!";
254
255 #[test]
256 fn test_algorithm_name() {
257 assert_eq!(FingerprintAlgorithm::Crc32.name(), "CRC-32");
258 assert_eq!(FingerprintAlgorithm::Adler32.name(), "Adler-32");
259 assert_eq!(FingerprintAlgorithm::XorHash.name(), "XOR Hash");
260 assert_eq!(FingerprintAlgorithm::BlockHash.name(), "Block Hash");
261 }
262
263 #[test]
264 fn test_crc32_deterministic() {
265 let a = compute_crc32(TEST_DATA);
266 let b = compute_crc32(TEST_DATA);
267 assert_eq!(a, b);
268 }
269
270 #[test]
271 fn test_adler32_deterministic() {
272 let a = compute_adler32(TEST_DATA);
273 let b = compute_adler32(TEST_DATA);
274 assert_eq!(a, b);
275 }
276
277 #[test]
278 fn test_xor_hash_deterministic() {
279 let a = compute_xor_hash(TEST_DATA);
280 let b = compute_xor_hash(TEST_DATA);
281 assert_eq!(a, b);
282 }
283
284 #[test]
285 fn test_crc32_different_data() {
286 let a = compute_crc32(b"hello");
287 let b = compute_crc32(b"world");
288 assert_ne!(a, b);
289 }
290
291 #[test]
292 fn test_compute_crc32_fingerprint() {
293 let engine = FingerprintEngine::new(FingerprintAlgorithm::Crc32);
294 let fp = engine.compute(TEST_DATA);
295 assert_eq!(fp.algorithm, FingerprintAlgorithm::Crc32);
296 assert_eq!(fp.file_size, TEST_DATA.len() as u64);
297 assert!(!fp.hash.is_empty());
298 }
299
300 #[test]
301 fn test_compute_adler32_fingerprint() {
302 let engine = FingerprintEngine::new(FingerprintAlgorithm::Adler32);
303 let fp = engine.compute(TEST_DATA);
304 assert_eq!(fp.algorithm, FingerprintAlgorithm::Adler32);
305 }
306
307 #[test]
308 fn test_compute_block_hash_fingerprint() {
309 let engine = FingerprintEngine::new(FingerprintAlgorithm::BlockHash).with_block_size(16);
310 let fp = engine.compute(TEST_DATA);
311 assert_eq!(fp.algorithm, FingerprintAlgorithm::BlockHash);
312 assert!(fp.blocks_processed > 0);
313 }
314
315 #[test]
316 fn test_verify_match() {
317 let engine = FingerprintEngine::new(FingerprintAlgorithm::Crc32);
318 let fp = engine.compute(TEST_DATA);
319 let result = engine.verify(TEST_DATA, &fp);
320 assert!(result.is_ok());
321 assert_eq!(result, VerifyResult::Match);
322 }
323
324 #[test]
325 fn test_verify_mismatch() {
326 let engine = FingerprintEngine::new(FingerprintAlgorithm::Crc32);
327 let fp = engine.compute(TEST_DATA);
328 let _tampered = b"Tampered data that is different from the original proxy data!";
329 let mut tampered_same_size = TEST_DATA.to_vec();
331 tampered_same_size[0] = b'X';
332 let result = engine.verify(&tampered_same_size, &fp);
333 assert!(!result.is_ok());
334 assert!(matches!(result, VerifyResult::Mismatch { .. }));
335 }
336
337 #[test]
338 fn test_verify_size_changed() {
339 let engine = FingerprintEngine::new(FingerprintAlgorithm::Crc32);
340 let fp = engine.compute(TEST_DATA);
341 let shorter = &TEST_DATA[..10];
342 let result = engine.verify(shorter, &fp);
343 assert!(matches!(result, VerifyResult::SizeChanged { .. }));
344 }
345
346 #[test]
347 fn test_cache_operations() {
348 let mut engine = FingerprintEngine::new(FingerprintAlgorithm::Crc32);
349 assert_eq!(engine.cache_size(), 0);
350 engine.compute_and_cache("proxy_a.mp4", TEST_DATA);
351 assert_eq!(engine.cache_size(), 1);
352 assert!(engine.get_cached("proxy_a.mp4").is_some());
353 assert!(engine.get_cached("nonexistent").is_none());
354 engine.clear_cache();
355 assert_eq!(engine.cache_size(), 0);
356 }
357
358 #[test]
359 fn test_fingerprint_display() {
360 let fp = Fingerprint::new(FingerprintAlgorithm::Crc32, "abcd1234", 100);
361 let display = format!("{fp}");
362 assert_eq!(display, "CRC-32:abcd1234");
363 }
364
365 #[test]
366 fn test_empty_data() {
367 let engine = FingerprintEngine::new(FingerprintAlgorithm::Crc32);
368 let fp = engine.compute(b"");
369 assert_eq!(fp.file_size, 0);
370 let fp2 = engine.compute(b"");
372 assert_eq!(fp.hash, fp2.hash);
373 }
374}
375
376#[inline]
384fn rgb_to_luma(r: u8, g: u8, b: u8) -> u8 {
385 let y = (306u32 * r as u32 + 601u32 * g as u32 + 117u32 * b as u32) >> 10;
388 y.min(255) as u8
389}
390
391fn decode_luma(data: &[u8], width: usize, height: usize, channels: usize) -> Option<Vec<u8>> {
403 if data.len() != width * height * channels {
404 return None;
405 }
406 match channels {
407 1 => Some(data.to_vec()),
408 3 => Some(
409 data.chunks_exact(3)
410 .map(|px| rgb_to_luma(px[0], px[1], px[2]))
411 .collect(),
412 ),
413 4 => Some(
414 data.chunks_exact(4)
415 .map(|px| rgb_to_luma(px[0], px[1], px[2]))
416 .collect(),
417 ),
418 _ => None,
419 }
420}
421
422fn bilinear_resize(src: &[u8], src_w: usize, src_h: usize, out_w: usize, out_h: usize) -> Vec<u8> {
426 let mut out = vec![0u8; out_w * out_h];
427 for oy in 0..out_h {
428 for ox in 0..out_w {
429 let sx_f = (ox as f64 + 0.5) * src_w as f64 / out_w as f64 - 0.5;
431 let sy_f = (oy as f64 + 0.5) * src_h as f64 / out_h as f64 - 0.5;
432 let x0 = (sx_f.floor() as isize).clamp(0, src_w as isize - 1) as usize;
433 let y0 = (sy_f.floor() as isize).clamp(0, src_h as isize - 1) as usize;
434 let x1 = (x0 + 1).min(src_w - 1);
435 let y1 = (y0 + 1).min(src_h - 1);
436 let wx = (sx_f - x0 as f64).clamp(0.0, 1.0);
437 let wy = (sy_f - y0 as f64).clamp(0.0, 1.0);
438 let p00 = src[y0 * src_w + x0] as f64;
439 let p10 = src[y0 * src_w + x1] as f64;
440 let p01 = src[y1 * src_w + x0] as f64;
441 let p11 = src[y1 * src_w + x1] as f64;
442 let v = p00 * (1.0 - wx) * (1.0 - wy)
443 + p10 * wx * (1.0 - wy)
444 + p01 * (1.0 - wx) * wy
445 + p11 * wx * wy;
446 out[oy * out_w + ox] = v.round() as u8;
447 }
448 }
449 out
450}
451
452#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
454pub struct PerceptualHash(pub u64);
455
456impl PerceptualHash {
457 #[must_use]
461 pub fn hamming_distance(self, other: Self) -> u32 {
462 (self.0 ^ other.0).count_ones()
463 }
464
465 #[must_use]
467 pub fn is_similar(self, other: Self, threshold: u32) -> bool {
468 self.hamming_distance(other) <= threshold
469 }
470
471 #[must_use]
473 pub fn to_hex(self) -> String {
474 format!("{:016x}", self.0)
475 }
476
477 pub fn from_hex(s: &str) -> Option<Self> {
483 u64::from_str_radix(s, 16).ok().map(Self)
484 }
485}
486
487impl std::fmt::Display for PerceptualHash {
488 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
489 write!(f, "{}", self.to_hex())
490 }
491}
492
493pub fn dhash(data: &[u8], width: usize, height: usize, channels: usize) -> Option<PerceptualHash> {
510 let luma = decode_luma(data, width, height, channels)?;
511 let small = bilinear_resize(&luma, width, height, 9, 8);
513 let mut bits: u64 = 0;
514 for row in 0..8 {
515 for col in 0..8 {
516 let left = small[row * 9 + col];
517 let right = small[row * 9 + col + 1];
518 bits = (bits << 1) | u64::from(left > right);
519 }
520 }
521 Some(PerceptualHash(bits))
522}
523
524pub fn phash(data: &[u8], width: usize, height: usize, channels: usize) -> Option<PerceptualHash> {
545 const RESIZE: usize = 32;
546 const DCT_SIZE: usize = 8;
547
548 let luma = decode_luma(data, width, height, channels)?;
549 let small = bilinear_resize(&luma, width, height, RESIZE, RESIZE);
550
551 let mut dct_block = [0.0f64; DCT_SIZE * DCT_SIZE];
554 for u in 0..DCT_SIZE {
555 for v in 0..DCT_SIZE {
556 let mut sum = 0.0f64;
557 for x in 0..RESIZE {
558 for y in 0..RESIZE {
559 let px = small[x * RESIZE + y] as f64;
560 let cu =
561 std::f64::consts::PI * (2 * x + 1) as f64 * u as f64 / (2 * RESIZE) as f64;
562 let cv =
563 std::f64::consts::PI * (2 * y + 1) as f64 * v as f64 / (2 * RESIZE) as f64;
564 sum += px * cu.cos() * cv.cos();
565 }
566 }
567 dct_block[u * DCT_SIZE + v] = sum;
568 }
569 }
570
571 let values_for_mean: Vec<f64> = dct_block.iter().skip(1).copied().collect();
574 let mean = values_for_mean.iter().sum::<f64>() / values_for_mean.len() as f64;
575
576 let mut bits: u64 = 0;
577 for (i, &coeff) in dct_block.iter().enumerate() {
578 if i == 0 {
579 bits <<= 1;
581 continue;
582 }
583 bits = (bits << 1) | u64::from(coeff > mean);
584 }
585 Some(PerceptualHash(bits))
586}
587
588#[must_use]
593pub fn proxy_matches_source(
594 proxy_hash: PerceptualHash,
595 source_hash: PerceptualHash,
596 threshold: u32,
597) -> bool {
598 proxy_hash.is_similar(source_hash, threshold)
599}
600
601#[cfg(test)]
602mod perceptual_tests {
603 use super::*;
604
605 fn solid_rgb(w: usize, h: usize, r: u8, g: u8, b: u8) -> Vec<u8> {
607 let mut v = Vec::with_capacity(w * h * 3);
608 for _ in 0..w * h {
609 v.push(r);
610 v.push(g);
611 v.push(b);
612 }
613 v
614 }
615
616 fn gradient_gray(w: usize, h: usize) -> Vec<u8> {
618 let mut v = Vec::with_capacity(w * h);
619 for _y in 0..h {
620 for x in 0..w {
621 v.push((x * 255 / (w - 1).max(1)) as u8);
622 }
623 }
624 v
625 }
626
627 #[test]
628 fn test_rgb_to_luma_black() {
629 assert_eq!(rgb_to_luma(0, 0, 0), 0);
630 }
631
632 #[test]
633 fn test_rgb_to_luma_white() {
634 let y = rgb_to_luma(255, 255, 255);
635 assert!(y >= 254);
637 }
638
639 #[test]
640 fn test_decode_luma_rgb() {
641 let data = solid_rgb(4, 4, 128, 64, 32);
642 let luma = decode_luma(&data, 4, 4, 3).expect("decode should succeed");
643 assert_eq!(luma.len(), 16);
644 assert!(luma.windows(2).all(|w| w[0] == w[1]));
646 }
647
648 #[test]
649 fn test_decode_luma_gray() {
650 let data = vec![100u8; 9];
651 let luma = decode_luma(&data, 3, 3, 1).expect("decode gray should succeed");
652 assert_eq!(luma.len(), 9);
653 assert!(luma.iter().all(|&v| v == 100));
654 }
655
656 #[test]
657 fn test_decode_luma_rgba() {
658 let mut data = Vec::new();
659 for _ in 0..4 {
660 data.extend_from_slice(&[200, 100, 50, 255]); }
662 let luma = decode_luma(&data, 2, 2, 4).expect("decode rgba should succeed");
663 assert_eq!(luma.len(), 4);
664 }
665
666 #[test]
667 fn test_decode_luma_wrong_size_returns_none() {
668 let data = vec![0u8; 10];
669 assert!(decode_luma(&data, 4, 4, 3).is_none());
670 }
671
672 #[test]
673 fn test_decode_luma_unsupported_channels() {
674 let data = vec![0u8; 8];
675 assert!(decode_luma(&data, 2, 2, 2).is_none());
676 }
677
678 #[test]
679 fn test_dhash_identical_images_zero_distance() {
680 let img = gradient_gray(16, 16);
681 let h1 = dhash(&img, 16, 16, 1).expect("dhash should succeed");
682 let h2 = dhash(&img, 16, 16, 1).expect("dhash should succeed");
683 assert_eq!(h1.hamming_distance(h2), 0);
684 }
685
686 #[test]
687 fn test_dhash_solid_image() {
688 let img = vec![128u8; 32 * 32];
690 let h = dhash(&img, 32, 32, 1).expect("dhash should succeed");
691 assert_eq!(h.0, 0u64);
692 }
693
694 #[test]
695 fn test_dhash_different_images_non_zero_distance() {
696 let img_a = gradient_gray(16, 16);
697 let img_b: Vec<u8> = img_a.iter().rev().copied().collect();
699 let ha = dhash(&img_a, 16, 16, 1).expect("dhash a");
700 let hb = dhash(&img_b, 16, 16, 1).expect("dhash b");
701 assert_ne!(ha.0, hb.0);
703 }
704
705 #[test]
706 fn test_dhash_rgb_input() {
707 let img = solid_rgb(16, 16, 80, 160, 200);
708 let h = dhash(&img, 16, 16, 3).expect("dhash rgb");
709 assert_eq!(h.0, 0u64);
711 }
712
713 #[test]
714 fn test_dhash_wrong_size_returns_none() {
715 let data = vec![0u8; 5];
716 assert!(dhash(&data, 4, 4, 3).is_none());
717 }
718
719 #[test]
720 fn test_phash_identical_images_zero_distance() {
721 let img = gradient_gray(32, 32);
722 let h1 = phash(&img, 32, 32, 1).expect("phash should succeed");
723 let h2 = phash(&img, 32, 32, 1).expect("phash should succeed");
724 assert_eq!(h1.hamming_distance(h2), 0);
725 }
726
727 #[test]
728 fn test_phash_wrong_size_returns_none() {
729 let data = vec![0u8; 7];
730 assert!(phash(&data, 3, 3, 3).is_none());
731 }
732
733 #[test]
734 fn test_perceptual_hash_hamming_distance() {
735 let a = PerceptualHash(0b1010_1010);
736 let b = PerceptualHash(0b0101_0101);
737 assert_eq!(a.hamming_distance(b), 8);
739 }
740
741 #[test]
742 fn test_perceptual_hash_is_similar_within_threshold() {
743 let a = PerceptualHash(0u64);
744 let b = PerceptualHash(0b111u64); assert!(a.is_similar(b, 5));
746 assert!(!a.is_similar(b, 2));
747 }
748
749 #[test]
750 fn test_perceptual_hash_hex_roundtrip() {
751 let h = PerceptualHash(0xDEAD_BEEF_CAFE_1234);
752 let hex = h.to_hex();
753 let restored = PerceptualHash::from_hex(&hex).expect("from_hex should succeed");
754 assert_eq!(h, restored);
755 }
756
757 #[test]
758 fn test_perceptual_hash_display() {
759 let h = PerceptualHash(0);
760 assert_eq!(format!("{h}"), "0000000000000000");
761 }
762
763 #[test]
764 fn test_perceptual_hash_from_hex_invalid() {
765 assert!(PerceptualHash::from_hex("xyz").is_none());
766 }
767
768 #[test]
769 fn test_proxy_matches_source_similar() {
770 let h1 = PerceptualHash(0b1111_0000);
771 let h2 = PerceptualHash(0b1110_0000); assert!(proxy_matches_source(h1, h2, 10));
773 }
774
775 #[test]
776 fn test_proxy_matches_source_dissimilar() {
777 let h1 = PerceptualHash(0u64);
778 let h2 = PerceptualHash(u64::MAX); assert!(!proxy_matches_source(h1, h2, 10));
780 }
781
782 #[test]
783 fn test_bilinear_resize_same_size() {
784 let src: Vec<u8> = (0..16).collect();
785 let out = bilinear_resize(&src, 4, 4, 4, 4);
786 assert_eq!(out.len(), 16);
787 assert_eq!(out[0], src[0]);
789 assert_eq!(out[15], src[15]);
790 }
791
792 #[test]
793 fn test_bilinear_resize_downscale() {
794 let src = vec![100u8; 64]; let out = bilinear_resize(&src, 8, 8, 4, 4);
796 assert_eq!(out.len(), 16);
797 for &v in &out {
798 assert_eq!(v, 100);
799 }
800 }
801}