jam_rs/
hash_functions.rs

1//! A list of hash functions to compare
2//!
3//! Constants chosen by testing different digits of pi;
4use crate::cli::HashAlgorithms;
5const KEY1: u64 = 0xe121_19c4_114f_22a7; // = 0x4528_21e6_38d0_1377 ^ 0xa409_3822_299f_31d0;
6const KEY2: u32 = 0x60e5; //(0xbe54_66cf_34e9_0c6c ^ 0x082e_fa98_ec4e_6c89) & 63;
7
8// Standard xxhash function for all sizes
9#[inline]
10pub fn xxhash3(kmer: &[u8]) -> u64 {
11    xxhash_rust::xxh3::xxh3_64(kmer)
12}
13
14// Standard xxhash function for all sizes
15#[inline]
16pub fn xxhash3_u64(kmer: u64) -> u64 {
17    xxhash_rust::xxh3::xxh3_64(&kmer.to_be_bytes())
18}
19
20// Specialized hash function for kmers < 32
21// Simplified version of ahash-fallback from the ahash crate
22#[inline]
23pub fn ahash(kmer: u64) -> u64 {
24    let temp = (kmer ^ KEY1) as u128 * 6364136223846793005_u128;
25    let temp2 = ((temp & 0xffff_ffff_ffff_ffff) as u64) ^ ((temp >> 64) as u64); // XOR the lower 64 bits with the upper 64 bits.
26    temp2.rotate_left(KEY2)
27}
28
29// Faster version of murmur3 with equivalent output
30#[inline]
31pub fn murmur3(kmer: &[u8]) -> u64 {
32    fastmurmur3::murmur3_x64_128(kmer, 42) as u64
33}
34
35#[inline]
36pub fn murmur3_u64(kmer: u64) -> u64 {
37    fastmurmur3::murmur3_x64_128(&kmer.to_be_bytes(), 42) as u64
38}
39
40/// Stores a function pointer to a hash function
41#[derive(Clone)]
42pub enum Function<'a> {
43    Large(&'a (dyn Fn(&[u8]) -> u64 + Send + Sync)),
44    Small(&'a (dyn Fn(u64) -> u64 + Send + Sync)),
45}
46
47impl Function<'_> {
48    pub fn get_large(&self) -> Option<&dyn Fn(&[u8]) -> u64> {
49        match self {
50            Function::Large(f) => Some(f),
51            _ => None,
52        }
53    }
54    pub fn get_small(&self) -> Option<&dyn Fn(u64) -> u64> {
55        match self {
56            Function::Small(f) => Some(f),
57            _ => None,
58        }
59    }
60
61    pub fn from_alg(algo: HashAlgorithms, kmer_size: u8) -> Self {
62        if kmer_size < 32 {
63            match algo {
64                HashAlgorithms::Ahash => Function::Small(&ahash),
65                HashAlgorithms::Murmur3 => Function::Small(&murmur3_u64),
66                HashAlgorithms::Xxhash => Function::Small(&xxhash3_u64),
67                HashAlgorithms::Default => Function::Small(&ahash),
68            }
69        } else {
70            match algo {
71                HashAlgorithms::Murmur3 => Function::Large(&murmur3),
72                HashAlgorithms::Xxhash | HashAlgorithms::Default => Function::Large(&xxhash3),
73                _ => panic!("Hash function not supported for kmer size > 32"),
74            }
75        }
76    }
77}
78
79#[cfg(test)]
80mod tests {
81    use super::*;
82
83    #[test]
84    fn test_xxhash3() {
85        assert_eq!(xxhash3(b"AAAAAAAAAAA"), 0x92994E9987384EE2);
86    }
87
88    #[test]
89    fn test_ahash() {
90        assert_eq!(ahash(0xAAAAAAAAAAAAAAA), 6369629604220809163);
91    }
92
93    #[test]
94    fn test_murmur3() {
95        assert_eq!(murmur3(b"AAAAAAAAAAA"), 7773142420371383521);
96    }
97
98    #[test]
99    fn test_xxhash3_u64() {
100        assert_eq!(xxhash3_u64(0xAAAAAAAAAAAAAAA), 5855080426738543665);
101    }
102
103    #[test]
104    fn test_murmur3_u64() {
105        assert_eq!(murmur3_u64(0xAAAAAAAAAAAAAAA), 442865051503200633);
106    }
107
108    #[test]
109    fn function_test() {
110        let f = Function::from_alg(HashAlgorithms::Ahash, 21);
111        assert_eq!(
112            f.get_small().unwrap()(0xAAAAAAAAAAAAAAA),
113            6369629604220809163
114        );
115        let f = Function::from_alg(HashAlgorithms::Murmur3, 21);
116        assert_eq!(
117            f.get_small().unwrap()(0xAAAAAAAAAAAAAAA),
118            442865051503200633
119        );
120        let f = Function::from_alg(HashAlgorithms::Xxhash, 21);
121        assert_eq!(
122            f.get_small().unwrap()(0xAAAAAAAAAAAAAAA),
123            5855080426738543665
124        );
125        let f = Function::from_alg(HashAlgorithms::Default, 21);
126        assert_eq!(
127            f.get_small().unwrap()(0xAAAAAAAAAAAAAAA),
128            6369629604220809163
129        );
130        let f = Function::from_alg(HashAlgorithms::Murmur3, 32);
131        assert_eq!(f.get_large().unwrap()(b"AAAAAAAAAAA"), 7773142420371383521);
132        let f = Function::from_alg(HashAlgorithms::Xxhash, 32);
133        assert_eq!(f.get_large().unwrap()(b"AAAAAAAAAAA"), 10563560822279786210);
134        let f = Function::from_alg(HashAlgorithms::Default, 32);
135        assert_eq!(f.get_large().unwrap()(b"AAAAAAAAAAA"), 10563560822279786210);
136    }
137}