hamming_bitwise_fast/
lib.rs

1//! A fast, zero-dependency implementation of bitwise Hamming Distance using
2//! a method amenable to auto-vectorization.
3
4/// Calculate the bitwise Hamming distance between two byte slices.
5///
6/// While this implementation does not explicitly use SIMD, it uses
7/// a technique that is amenable to auto-vectorization. Its performance
8/// is similar to or faster than more complex implementations that use
9/// explicit SIMD instructions for specific architectures.
10///
11/// # Panics
12///
13/// Panics if the two slices are not the same length.
14#[inline]
15pub fn hamming_bitwise_fast(x: &[u8], y: &[u8]) -> u32 {
16    assert_eq!(x.len(), y.len());
17
18    // Process 8 bytes at a time using u64
19    let mut distance = x
20        .chunks_exact(8)
21        .zip(y.chunks_exact(8))
22        .map(|(x_chunk, y_chunk)| {
23            // This is safe because we know the chunks are exactly 8 bytes.
24            // Also, we don't care whether the platform uses little-endian or big-endian
25            // byte order. Since we're only XORing values, we just care that the
26            // endianness is the same for both.
27            let x_val = u64::from_ne_bytes(x_chunk.try_into().unwrap());
28            let y_val = u64::from_ne_bytes(y_chunk.try_into().unwrap());
29            (x_val ^ y_val).count_ones()
30        })
31        .sum::<u32>();
32
33    if x.len() % 8 != 0 {
34        distance += x
35            .chunks_exact(8)
36            .remainder()
37            .iter()
38            .zip(y.chunks_exact(8).remainder())
39            .map(|(x_byte, y_byte)| (x_byte ^ y_byte).count_ones())
40            .sum::<u32>();
41    }
42
43    distance
44}
45
46#[doc(hidden)]
47#[inline]
48pub fn naive_hamming_distance(x: &[u8], y: &[u8]) -> u64 {
49    assert_eq!(x.len(), y.len());
50    let mut distance: u32 = 0;
51    for i in 0..x.len() {
52        distance += (x[i] ^ y[i]).count_ones();
53    }
54    distance as u64
55}
56
57#[doc(hidden)]
58#[inline]
59pub fn naive_hamming_distance_iter(x: &[u8], y: &[u8]) -> u64 {
60    x.iter()
61        .zip(y)
62        .fold(0, |a, (b, c)| a + (*b ^ *c).count_ones()) as u64
63}
64
65#[test]
66fn all_same_results() {
67    let a ="cd8e98b29187133982909fc8b30e39c7b4dca73128ece9cf22ce64eefcf75a3adb0f129b1b00f63a20209e83cb873df707f1af6a4e3558941556b215461a9cbbbce984233c8b8a51e8bd2d1e7f6500caf59fb497440d15365b81e75d3ca4fc9947d5fcb97a0a7b5e44a6b93ee4f622c9b3157991fecac58f364b23f01fd8621e";
68    let b = "860e297e5ce51d3bee094b69bedaaf4ec5d74aa639fec1980ac8d6debb77ff8a323350ab4217867a2521d1248f878dc71f39ede3ea357ef39065da261f9ab470ce6884a3e8a6727d1a3c2614ab66481683f63c01de17b4f59d11659ab5a4310121fccc69418839ff6783f9ce7d760ac8e3db7824eef28d0f12fc6b3c1ef8d75c";
69    let a = hex::decode(a).unwrap();
70    let b = hex::decode(b).unwrap();
71
72    let expected = naive_hamming_distance(&a, &b);
73
74    // Compare with naive_iter implementation
75    assert_eq!(expected, naive_hamming_distance_iter(&a, &b));
76
77    // Compare with auto vectorized implementation
78    assert_eq!(expected, hamming_bitwise_fast(&a, &b) as u64);
79
80    // Compare with hamming crate
81    assert_eq!(expected, hamming::distance_fast(&a, &b).unwrap());
82
83    // Compare with hamming_rs crate (x86/x86_64 only)
84    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
85    assert_eq!(expected, hamming_rs::distance_faster(&a, &b));
86
87    // Compare with simsimd crate
88    assert_eq!(
89        expected,
90        simsimd::BinarySimilarity::hamming(&a, &b).unwrap() as u64
91    );
92}