1use std::array::from_fn;
3use std::hash::BuildHasher;
4use std::hash::BuildHasherDefault;
5use std::hash::DefaultHasher;
6
7use super::intrinsics;
8use crate::KmerHasher;
9use crate::S;
10use packed_seq::Seq;
11use packed_seq::complement_base;
12use wide::u32x8;
13
14type SeedHasher = BuildHasherDefault<DefaultHasher>;
15
16const HASHES_F: [u32; 4] = [
19 0x3c8b_fbb3_95c6_0474u64 as u32,
20 0x3193_c185_62a0_2b4cu64 as u32,
21 0x2032_3ed0_8257_2324u64 as u32,
22 0x2955_49f5_4be2_4456u64 as u32,
23];
24
25pub trait CharHasher: Clone {
30 const CANONICAL: bool;
32 const R: u32;
34 const BITS_PER_CHAR: usize;
36
37 fn new(k: usize) -> Self {
38 Self::new_with_seed(k, None)
39 }
40 fn new_with_seed(k: usize, seed: Option<u32>) -> Self;
42 fn k(&self) -> usize;
44 fn f(&self, b: u8) -> u32;
46 fn c(&self, b: u8) -> u32;
48 fn f_rot(&self, b: u8) -> u32;
50 fn c_rot(&self, b: u8) -> u32;
52 fn simd_f(&self, b: u32x8) -> u32x8;
54 fn simd_c(&self, b: u32x8) -> u32x8;
56 fn simd_f_rot(&self, b: u32x8) -> u32x8;
58 fn simd_c_rot(&self, b: u32x8) -> u32x8;
60
61 fn fw_init(&self) -> u32;
62 fn rc_init(&self) -> u32;
63}
64
65#[derive(Clone)]
71pub struct NtHasher<const CANONICAL: bool = true, const R: u32 = 7> {
72 k: usize,
73 f: [u32; 4],
74 c: [u32; 4],
75 f_rot: [u32; 4],
76 c_rot: [u32; 4],
77 simd_f: u32x8,
78 simd_c: u32x8,
79 simd_f_rot: u32x8,
80 simd_c_rot: u32x8,
81 fw_init: u32,
82 rc_init: u32,
83}
84
85impl<const CANONICAL: bool, const R: u32> NtHasher<CANONICAL, R> {
86 #[inline(always)]
87 pub fn new(k: usize) -> Self {
88 CharHasher::new(k)
89 }
90 #[inline(always)]
91 pub fn new_with_seed(k: usize, seed: u32) -> Self {
92 CharHasher::new_with_seed(k, Some(seed))
93 }
94}
95
96impl<const CANONICAL: bool, const R: u32> CharHasher for NtHasher<CANONICAL, R> {
97 const CANONICAL: bool = CANONICAL;
98 const R: u32 = R;
99 const BITS_PER_CHAR: usize = 2;
100
101 #[inline(always)]
102 fn new_with_seed(k: usize, seed: Option<u32>) -> Self {
103 let rot = k as u32 - 1;
104 let hasher = SeedHasher::new();
105 let f = match seed {
106 None => HASHES_F,
107 Some(seed) => from_fn(|i| hasher.hash_one(HASHES_F[i] ^ seed) as u32),
108 };
109 let c = from_fn(|i| f[complement_base(i as u8) as usize]);
110 let f_rot = f.map(|h| h.rotate_left(rot * R));
111 let c_rot = c.map(|h| h.rotate_left(rot * R));
112 let idx = [0, 1, 2, 3, 0, 1, 2, 3];
113 let simd_f = idx.map(|i| f[i]).into();
114 let simd_c = idx.map(|i| c[i]).into();
115 let simd_f_rot = idx.map(|i| f_rot[i]).into();
116 let simd_c_rot = idx.map(|i| c_rot[i]).into();
117
118 let mut fw_init = 0u32;
120 for _ in 0..k - 1 {
121 fw_init = fw_init.rotate_left(Self::R) ^ f[0];
122 }
123
124 let mut rc_init = 0u32;
126 for _ in 0..k - 1 {
127 rc_init = rc_init.rotate_right(Self::R) ^ c_rot[0];
128 }
129
130 Self {
131 k,
132 f,
133 c,
134 f_rot,
135 c_rot,
136 simd_f,
137 simd_c,
138 simd_f_rot,
139 simd_c_rot,
140 fw_init,
141 rc_init,
142 }
143 }
144
145 #[inline(always)]
146 fn k(&self) -> usize {
147 self.k
148 }
149
150 #[inline(always)]
151 fn f(&self, b: u8) -> u32 {
152 unsafe { *self.f.get_unchecked(b as usize) }
153 }
154 #[inline(always)]
155 fn c(&self, b: u8) -> u32 {
156 unsafe { *self.c.get_unchecked(b as usize) }
157 }
158 #[inline(always)]
159 fn f_rot(&self, b: u8) -> u32 {
160 unsafe { *self.f_rot.get_unchecked(b as usize) }
161 }
162 #[inline(always)]
163 fn c_rot(&self, b: u8) -> u32 {
164 unsafe { *self.c_rot.get_unchecked(b as usize) }
165 }
166
167 #[inline(always)]
168 fn simd_f(&self, b: u32x8) -> u32x8 {
169 intrinsics::table_lookup(self.simd_f, b)
170 }
171 #[inline(always)]
172 fn simd_c(&self, b: u32x8) -> u32x8 {
173 intrinsics::table_lookup(self.simd_c, b)
174 }
175 #[inline(always)]
176 fn simd_f_rot(&self, b: u32x8) -> u32x8 {
177 intrinsics::table_lookup(self.simd_f_rot, b)
178 }
179 #[inline(always)]
180 fn simd_c_rot(&self, b: u32x8) -> u32x8 {
181 intrinsics::table_lookup(self.simd_c_rot, b)
182 }
183 #[inline(always)]
184 fn fw_init(&self) -> u32 {
185 self.fw_init
186 }
187 #[inline(always)]
188 fn rc_init(&self) -> u32 {
189 self.rc_init
190 }
191}
192
193#[derive(Clone)]
199pub struct MulHasher<const CANONICAL: bool = true, const R: u32 = 7> {
200 k: usize,
201 rot: u32,
202 mul: u32,
203 fw_init: u32,
204 rc_init: u32,
205}
206
207impl<const CANONICAL: bool, const R: u32> MulHasher<CANONICAL, R> {
208 #[inline(always)]
209 pub fn new(k: usize) -> Self {
210 CharHasher::new(k)
211 }
212 #[inline(always)]
213 pub fn new_with_seed(k: usize, seed: u32) -> Self {
214 CharHasher::new_with_seed(k, Some(seed))
215 }
216}
217
218const C: u32 = 0x517cc1b727220a95u64 as u32;
220
221impl<const CANONICAL: bool, const R: u32> CharHasher for MulHasher<CANONICAL, R> {
222 const CANONICAL: bool = CANONICAL;
223 const R: u32 = R;
224 const BITS_PER_CHAR: usize = 8;
225
226 #[inline(always)]
227 fn new_with_seed(k: usize, seed: Option<u32>) -> Self {
228 let rot = (k as u32 - 1) % 32;
229 let mul = C ^ match seed {
230 None => 0,
231 Some(seed) => (SeedHasher::new().hash_one(seed) as u32) << 1,
233 };
234
235 let mut fw_init = 0u32;
237 for _ in 0..k - 1 {
238 fw_init = fw_init.rotate_left(Self::R) ^ (0 as u32).wrapping_mul(mul);
239 }
240
241 let mut rc_init = 0u32;
243 for _ in 0..k - 1 {
244 rc_init = rc_init.rotate_right(Self::R)
245 ^ (complement_base(0) as u32)
246 .wrapping_mul(mul)
247 .rotate_left(rot * R);
248 }
249
250 Self {
251 k,
252 rot,
253 mul,
254 fw_init,
255 rc_init,
256 }
257 }
258
259 #[inline(always)]
260 fn k(&self) -> usize {
261 self.k
262 }
263
264 #[inline(always)]
265 fn f(&self, b: u8) -> u32 {
266 (b as u32).wrapping_mul(self.mul)
267 }
268 #[inline(always)]
269 fn c(&self, b: u8) -> u32 {
270 (complement_base(b) as u32).wrapping_mul(self.mul)
271 }
272 #[inline(always)]
273 fn f_rot(&self, b: u8) -> u32 {
274 (b as u32).wrapping_mul(self.mul).rotate_left(self.rot * R)
275 }
276 #[inline(always)]
277 fn c_rot(&self, b: u8) -> u32 {
278 (complement_base(b) as u32)
279 .wrapping_mul(self.mul)
280 .rotate_left(self.rot * R)
281 }
282
283 #[inline(always)]
284 fn simd_f(&self, b: u32x8) -> u32x8 {
285 b * self.mul.into()
286 }
287 #[inline(always)]
288 fn simd_c(&self, b: u32x8) -> u32x8 {
289 packed_seq::complement_base_simd(b) * self.mul.into()
290 }
291 #[inline(always)]
292 fn simd_f_rot(&self, b: u32x8) -> u32x8 {
293 let r = b * self.mul.into();
294 let rot = self.rot * R % 32;
295 (r << rot) | (r >> (32 - rot))
296 }
297 #[inline(always)]
298 fn simd_c_rot(&self, b: u32x8) -> u32x8 {
299 let r = packed_seq::complement_base_simd(b) * self.mul.into();
300 let rot = self.rot * R % 32;
301 (r << rot) | (r >> (32 - rot))
302 }
303 #[inline(always)]
304 fn fw_init(&self) -> u32 {
305 self.fw_init
306 }
307 #[inline(always)]
308 fn rc_init(&self) -> u32 {
309 self.rc_init
310 }
311}
312
313impl<CH: CharHasher> KmerHasher for CH {
314 const CANONICAL: bool = CH::CANONICAL;
315
316 fn new(k: usize) -> Self {
317 Self::new(k)
318 }
319
320 fn k(&self) -> usize {
321 self.k()
322 }
323
324 #[inline(always)]
325 fn in_out_mapper_scalar<'s>(&self, seq: impl Seq<'s>) -> impl FnMut((u8, u8)) -> u32 {
326 assert!(seq.bits_per_char() <= CH::BITS_PER_CHAR);
327
328 let mut fw = self.fw_init();
329 let mut rc = self.rc_init();
330
331 move |(a, r)| {
332 let fw_out = fw.rotate_left(CH::R) ^ self.f(a);
333 fw = fw_out ^ self.f_rot(r);
334 if Self::CANONICAL {
335 let rc_out = rc.rotate_right(CH::R) ^ self.c_rot(a);
336 rc = rc_out ^ self.c(r);
337 fw_out.wrapping_add(rc_out)
338 } else {
339 fw_out
340 }
341 }
342 }
343
344 #[inline(always)]
345 fn in_out_mapper_simd<'s>(&self, seq: impl Seq<'s>) -> impl FnMut((S, S)) -> S {
346 assert!(seq.bits_per_char() <= CH::BITS_PER_CHAR);
347 let mut fw = S::splat(self.fw_init());
348 let mut rc = S::splat(self.rc_init());
349 let shl = S::splat(CH::R);
350 let shr = S::splat(32 - CH::R);
351
352 move |(a, r)| {
353 let fw_out = ((fw << shl) | (fw >> shr)) ^ self.simd_f(a);
354 fw = fw_out ^ self.simd_f_rot(r);
355 if Self::CANONICAL {
356 let rc_out = ((rc >> shl) | (rc << shr)) ^ self.simd_c_rot(a);
357 rc = rc_out ^ self.simd_c(r);
358 fw_out + rc_out
360 } else {
361 fw_out
362 }
363 }
364 }
365
366 #[inline(always)]
367 fn mapper<'s>(&self, seq: impl Seq<'s>) -> impl FnMut(u8) -> u32 {
368 assert!(seq.bits_per_char() <= CH::BITS_PER_CHAR);
369
370 let mut fw = 0u32;
371 let mut rc = 0u32;
372 move |a| {
373 fw = fw.rotate_left(CH::R) ^ self.f(a);
374 if Self::CANONICAL {
375 rc = rc.rotate_right(CH::R) ^ self.c_rot(a);
376 fw.wrapping_add(rc)
377 } else {
378 fw
379 }
380 }
381 }
382}