superbit/simhash/
mod.rs

1mod bitarray;
2mod fast_sim_hash;
3mod sim_hash;
4mod sim_hasher;
5mod superbit;
6
7pub use bitarray::BitArray;
8pub use fast_sim_hash::FastSimHash;
9pub use sim_hash::SimHash;
10pub use sim_hasher::{SimSipHasher64, SimSipHasher128, Xxh3Hasher64, Xxh3Hasher128};
11pub use superbit::SuperBitSimHash;
12
13use core::mem;
14
15use num_traits::{One, Zero};
16use std::f64::consts::PI;
17use std::fmt::Debug;
18use std::hash::Hash;
19use std::ops::{BitAnd, BitOr, BitOrAssign, BitXor, Not, Shl, Shr, ShrAssign};
20
21pub trait SimHashBits:
22    Sized
23    + Clone
24    + Copy
25    + Zero
26    + One
27    + Debug
28    + PartialOrd
29    + PartialEq
30    + Not<Output = Self>
31    + BitAnd<Output = Self>
32    + BitOr<Output = Self>
33    + BitXor<Output = Self>
34    + BitOrAssign
35    + Shl<usize, Output = Self>
36    + Shr<usize, Output = Self>
37    + ShrAssign<usize>
38    + Hash
39    + Eq
40{
41    fn count_ones(self) -> usize;
42
43    fn to_u32_high_bits(self) -> u32;
44
45    fn to_u64_high_bits(self) -> u64;
46
47    fn hamming_distance(&self, rhs: &Self) -> usize;
48
49    fn hamming_angle(&self, rhs: &Self) -> f64 {
50        self.hamming_distance(rhs) as f64 * (PI / Self::bit_length() as f64)
51    }
52
53    fn bit_length() -> usize;
54}
55
56macro_rules! prim_int_impl {
57    ($T:ty, $S:ty, $U:ty) => {
58        impl SimHashBits for $T {
59            #[inline]
60            fn count_ones(self) -> usize {
61                <$T>::count_ones(self) as usize
62            }
63
64            #[inline]
65            fn to_u32_high_bits(self) -> u32 {
66                (self >> ((mem::size_of::<$T>() * 8) - 32)) as u32
67            }
68
69            #[inline]
70            fn to_u64_high_bits(self) -> u64 {
71                (self >> ((mem::size_of::<$T>() * 8) - 64)) as u64
72            }
73
74            #[inline]
75            fn hamming_distance(&self, rhs: &Self) -> usize {
76                (self ^ rhs).count_ones() as usize
77            }
78
79            #[inline]
80            fn bit_length() -> usize {
81                mem::size_of::<$T>() * 8
82            }
83        }
84    };
85}
86
87prim_int_impl!(u64, i64, u64);
88prim_int_impl!(u128, i128, u128);