1use std::array::from_fn;
3use std::hash::BuildHasher;
4use std::hash::BuildHasherDefault;
5use std::hash::DefaultHasher;
6
7use super::intrinsics;
8use crate::KmerHasher;
9use crate::S;
10use packed_seq::Seq;
11use packed_seq::complement_base;
12use wide::u32x8;
13
14type SeedHasher = BuildHasherDefault<DefaultHasher>;
15
16const HASHES_F: [u32; 4] = [
19 0x3c8b_fbb3_95c6_0474u64 as u32,
20 0x3193_c185_62a0_2b4cu64 as u32,
21 0x2032_3ed0_8257_2324u64 as u32,
22 0x2955_49f5_4be2_4456u64 as u32,
23];
24
25pub trait CharHasher: Clone {
30 const CANONICAL: bool;
32 const R: u32;
34 const BITS_PER_CHAR: usize;
36
37 fn new(k: usize) -> Self {
38 Self::new_with_seed(k, None)
39 }
40 fn new_with_seed(k: usize, seed: Option<u32>) -> Self;
42 fn k(&self) -> usize;
44 fn f(&self, b: u8) -> u32;
46 fn c(&self, b: u8) -> u32;
48 fn f_rot(&self, b: u8) -> u32;
50 fn c_rot(&self, b: u8) -> u32;
52 fn simd_f(&self, b: u32x8) -> u32x8;
54 fn simd_c(&self, b: u32x8) -> u32x8;
56 fn simd_f_rot(&self, b: u32x8) -> u32x8;
58 fn simd_c_rot(&self, b: u32x8) -> u32x8;
60
61 #[inline(always)]
63 fn fw_init(&self) -> u32 {
64 let mut fw = 0u32;
65 for _ in 0..self.k() - 1 {
66 fw = fw.rotate_left(Self::R) ^ self.f(0);
67 }
68 fw
69 }
70
71 #[inline(always)]
73 fn rc_init(&self) -> u32 {
74 let mut rc = 0u32;
75 for _ in 0..self.k() - 1 {
76 rc = rc.rotate_right(Self::R) ^ self.c_rot(0);
77 }
78 rc
79 }
80}
81
82#[derive(Clone)]
88pub struct NtHasher<const CANONICAL: bool = true, const R: u32 = 7> {
89 k: usize,
90 f: [u32; 4],
91 c: [u32; 4],
92 f_rot: [u32; 4],
93 c_rot: [u32; 4],
94 simd_f: u32x8,
95 simd_c: u32x8,
96 simd_f_rot: u32x8,
97 simd_c_rot: u32x8,
98}
99
100impl<const CANONICAL: bool, const R: u32> NtHasher<CANONICAL, R> {
101 #[inline(always)]
102 pub fn new(k: usize) -> Self {
103 CharHasher::new(k)
104 }
105 #[inline(always)]
106 pub fn new_with_seed(k: usize, seed: u32) -> Self {
107 CharHasher::new_with_seed(k, Some(seed))
108 }
109}
110
111impl<const CANONICAL: bool, const R: u32> CharHasher for NtHasher<CANONICAL, R> {
112 const CANONICAL: bool = CANONICAL;
113 const R: u32 = R;
114 const BITS_PER_CHAR: usize = 2;
115
116 #[inline(always)]
117 fn new_with_seed(k: usize, seed: Option<u32>) -> Self {
118 let rot = k as u32 - 1;
119 let hasher = SeedHasher::new();
120 let f = match seed {
121 None => HASHES_F,
122 Some(seed) => from_fn(|i| hasher.hash_one(HASHES_F[i] ^ seed) as u32),
123 };
124 let c = from_fn(|i| f[complement_base(i as u8) as usize]);
125 let f_rot = f.map(|h| h.rotate_left(rot * R));
126 let c_rot = c.map(|h| h.rotate_left(rot * R));
127 let idx = [0, 1, 2, 3, 0, 1, 2, 3];
128 let simd_f = idx.map(|i| f[i]).into();
129 let simd_c = idx.map(|i| c[i]).into();
130 let simd_f_rot = idx.map(|i| f_rot[i]).into();
131 let simd_c_rot = idx.map(|i| c_rot[i]).into();
132
133 Self {
134 k,
135 f,
136 c,
137 f_rot,
138 c_rot,
139 simd_f,
140 simd_c,
141 simd_f_rot,
142 simd_c_rot,
143 }
144 }
145
146 #[inline(always)]
147 fn k(&self) -> usize {
148 self.k
149 }
150
151 #[inline(always)]
152 fn f(&self, b: u8) -> u32 {
153 unsafe { *self.f.get_unchecked(b as usize) }
154 }
155 #[inline(always)]
156 fn c(&self, b: u8) -> u32 {
157 unsafe { *self.c.get_unchecked(b as usize) }
158 }
159 #[inline(always)]
160 fn f_rot(&self, b: u8) -> u32 {
161 unsafe { *self.f_rot.get_unchecked(b as usize) }
162 }
163 #[inline(always)]
164 fn c_rot(&self, b: u8) -> u32 {
165 unsafe { *self.c_rot.get_unchecked(b as usize) }
166 }
167
168 #[inline(always)]
169 fn simd_f(&self, b: u32x8) -> u32x8 {
170 intrinsics::table_lookup(self.simd_f, b)
171 }
172 #[inline(always)]
173 fn simd_c(&self, b: u32x8) -> u32x8 {
174 intrinsics::table_lookup(self.simd_c, b)
175 }
176 #[inline(always)]
177 fn simd_f_rot(&self, b: u32x8) -> u32x8 {
178 intrinsics::table_lookup(self.simd_f_rot, b)
179 }
180 #[inline(always)]
181 fn simd_c_rot(&self, b: u32x8) -> u32x8 {
182 intrinsics::table_lookup(self.simd_c_rot, b)
183 }
184}
185
186#[derive(Clone)]
192pub struct MulHasher<const CANONICAL: bool = true, const R: u32 = 7> {
193 k: usize,
194 rot: u32,
195 mul: u32,
196}
197
198impl<const CANONICAL: bool, const R: u32> MulHasher<CANONICAL, R> {
199 #[inline(always)]
200 pub fn new(k: usize) -> Self {
201 CharHasher::new(k)
202 }
203 #[inline(always)]
204 pub fn new_with_seed(k: usize, seed: u32) -> Self {
205 CharHasher::new_with_seed(k, Some(seed))
206 }
207}
208
209const C: u32 = 0x517cc1b727220a95u64 as u32;
211
212impl<const CANONICAL: bool, const R: u32> CharHasher for MulHasher<CANONICAL, R> {
213 const CANONICAL: bool = CANONICAL;
214 const R: u32 = R;
215 const BITS_PER_CHAR: usize = 8;
216
217 #[inline(always)]
218 fn new_with_seed(k: usize, seed: Option<u32>) -> Self {
219 Self {
220 k,
221 rot: (k as u32 - 1) % 32,
222 mul: C ^ match seed {
223 None => 0,
224 Some(seed) => (SeedHasher::new().hash_one(seed) as u32) << 1,
226 },
227 }
228 }
229
230 #[inline(always)]
231 fn k(&self) -> usize {
232 self.k
233 }
234
235 #[inline(always)]
236 fn f(&self, b: u8) -> u32 {
237 (b as u32).wrapping_mul(self.mul)
238 }
239 #[inline(always)]
240 fn c(&self, b: u8) -> u32 {
241 (complement_base(b) as u32).wrapping_mul(self.mul)
242 }
243 #[inline(always)]
244 fn f_rot(&self, b: u8) -> u32 {
245 (b as u32).wrapping_mul(self.mul).rotate_left(self.rot * R)
246 }
247 #[inline(always)]
248 fn c_rot(&self, b: u8) -> u32 {
249 (complement_base(b) as u32)
250 .wrapping_mul(self.mul)
251 .rotate_left(self.rot * R)
252 }
253
254 #[inline(always)]
255 fn simd_f(&self, b: u32x8) -> u32x8 {
256 b * self.mul.into()
257 }
258 #[inline(always)]
259 fn simd_c(&self, b: u32x8) -> u32x8 {
260 packed_seq::complement_base_simd(b) * self.mul.into()
261 }
262 #[inline(always)]
263 fn simd_f_rot(&self, b: u32x8) -> u32x8 {
264 let r = b * self.mul.into();
265 let rot = self.rot * R % 32;
266 (r << rot) | (r >> (32 - rot))
267 }
268 #[inline(always)]
269 fn simd_c_rot(&self, b: u32x8) -> u32x8 {
270 let r = packed_seq::complement_base_simd(b) * self.mul.into();
271 let rot = self.rot * R % 32;
272 (r << rot) | (r >> (32 - rot))
273 }
274}
275
276impl<CH: CharHasher> KmerHasher for CH {
277 const CANONICAL: bool = CH::CANONICAL;
278
279 fn new(k: usize) -> Self {
280 Self::new(k)
281 }
282
283 fn k(&self) -> usize {
284 self.k()
285 }
286
287 #[inline(always)]
288 fn in_out_mapper_scalar<'s>(&self, seq: impl Seq<'s>) -> impl FnMut((u8, u8)) -> u32 {
289 assert!(seq.bits_per_char() <= CH::BITS_PER_CHAR);
290
291 let mut fw = self.fw_init();
292 let mut rc = self.rc_init();
293
294 move |(a, r)| {
295 let fw_out = fw.rotate_left(CH::R) ^ self.f(a);
296 fw = fw_out ^ self.f_rot(r);
297 if Self::CANONICAL {
298 let rc_out = rc.rotate_right(CH::R) ^ self.c_rot(a);
299 rc = rc_out ^ self.c(r);
300 fw_out.wrapping_add(rc_out)
301 } else {
302 fw_out
303 }
304 }
305 }
306
307 #[inline(always)]
308 fn in_out_mapper_simd<'s>(&self, seq: impl Seq<'s>) -> impl FnMut((S, S)) -> S {
309 assert!(seq.bits_per_char() <= CH::BITS_PER_CHAR);
310 let mut fw = S::splat(self.fw_init());
311 let mut rc = S::splat(self.rc_init());
312
313 move |(a, r)| {
314 let fw_out = ((fw << CH::R) | (fw >> (32 - CH::R))) ^ self.simd_f(a);
315 fw = fw_out ^ self.simd_f_rot(r);
316 if Self::CANONICAL {
317 let rc_out = ((rc >> CH::R) | (rc << (32 - CH::R))) ^ self.simd_c_rot(a);
318 rc = rc_out ^ self.simd_c(r);
319 fw_out + rc_out
321 } else {
322 fw_out
323 }
324 }
325 }
326
327 #[inline(always)]
328 fn mapper<'s>(&self, seq: impl Seq<'s>) -> impl FnMut(u8) -> u32 {
329 assert!(seq.bits_per_char() <= CH::BITS_PER_CHAR);
330
331 let mut fw = 0u32;
332 let mut rc = 0u32;
333 move |a| {
334 fw = fw.rotate_left(CH::R) ^ self.f(a);
335 if Self::CANONICAL {
336 rc = rc.rotate_right(CH::R) ^ self.c_rot(a);
337 fw.wrapping_add(rc)
338 } else {
339 fw
340 }
341 }
342 }
343}