1use crate::{
16 constants::*,
17 tables::{srol, srol_n, srol_table, sror},
18 util::extend_hashes,
19 NtHashError, };
21
22pub type Result<T> = crate::Result<T>;
24
25pub struct NtHash<'a> {
33 seq: &'a [u8],
34 k: u16,
35 pos: usize,
36 initialized: bool,
37 fwd_hash: u64,
38 rev_hash: u64,
39 hashes: Vec<u64>,
40}
41
42impl<'a> NtHash<'a> {
43 pub fn new(seq: &'a [u8], k: u16, num_hashes: u8, pos: usize) -> Result<Self> {
56 if k == 0 {
57 return Err(NtHashError::InvalidK);
58 }
59 let len = seq.len();
60 let k_usz = k as usize;
61 if len < k_usz {
62 return Err(NtHashError::SequenceTooShort { seq_len: len, k });
63 }
64 if pos > len - k_usz {
65 return Err(NtHashError::PositionOutOfRange { pos, seq_len: len });
66 }
67 Ok(Self {
68 seq: seq,
69 k,
70 pos,
71 initialized: false,
72 fwd_hash: 0,
73 rev_hash: 0,
74 hashes: vec![0; num_hashes as usize],
75 })
76 }
77
78 pub fn roll(&mut self) -> bool {
81 if !self.initialized {
82 return self.init();
83 }
84 let k_usz = self.k as usize;
85 if self.pos >= self.seq.len() - k_usz {
86 return false;
87 }
88 let incoming = self.seq[self.pos + k_usz];
89 if SEED_TAB[incoming as usize] == SEED_N {
90 self.pos += k_usz;
91 return self.init();
92 }
93 let outgoing = self.seq[self.pos];
94 self.fwd_hash = next_forward_hash(self.fwd_hash, self.k, outgoing, incoming);
95 self.rev_hash = next_reverse_hash(self.rev_hash, self.k, outgoing, incoming);
96 self.update_hashes();
97 self.pos += 1;
98 true
99 }
100
101 pub fn roll_back(&mut self) -> bool {
103 if !self.initialized && !self.init() {
104 return false;
105 }
106 if self.pos == 0 {
107 return false;
108 }
109 let incoming = self.seq[self.pos - 1];
110 if SEED_TAB[incoming as usize] == SEED_N {
111 if self.pos < self.k as usize {
112 return false;
113 }
114 self.pos -= self.k as usize;
115 return self.init();
116 }
117 let outgoing = self.seq[self.pos + self.k as usize - 1];
118 self.fwd_hash = prev_forward_hash(self.fwd_hash, self.k, outgoing, incoming);
119 self.rev_hash = prev_reverse_hash(self.rev_hash, self.k, outgoing, incoming);
120 self.update_hashes();
121 self.pos -= 1;
122 true
123 }
124
125 pub fn peek(&mut self) -> bool {
127 if self.pos >= self.seq.len() - self.k as usize {
128 return false;
129 }
130 let incoming = self.seq[self.pos + self.k as usize];
131 self.peek_char(incoming)
132 }
133
134 pub fn peek_char(&mut self, incoming: u8) -> bool {
136 if !self.initialized && !self.init() {
137 return false;
138 }
139 if SEED_TAB[incoming as usize] == SEED_N {
140 return false;
141 }
142 let outgoing = self.seq[self.pos];
143 let fwd = next_forward_hash(self.fwd_hash, self.k, outgoing, incoming);
144 let rev = next_reverse_hash(self.rev_hash, self.k, outgoing, incoming);
145 self.fill_hash_buffer(fwd, rev);
146 true
147 }
148
149 pub fn peek_back(&mut self) -> bool {
151 if self.pos == 0 {
152 return false;
153 }
154 let incoming = self.seq[self.pos - 1];
155 self.peek_back_char(incoming)
156 }
157
158 pub fn peek_back_char(&mut self, incoming: u8) -> bool {
160 if !self.initialized && !self.init() {
161 return false;
162 }
163 if SEED_TAB[incoming as usize] == SEED_N {
164 return false;
165 }
166 let outgoing = self.seq[self.pos + self.k as usize - 1];
167 let fwd = prev_forward_hash(self.fwd_hash, self.k, outgoing, incoming);
168 let rev = prev_reverse_hash(self.rev_hash, self.k, outgoing, incoming);
169 self.fill_hash_buffer(fwd, rev);
170 true
171 }
172
173 #[inline(always)]
175 pub fn hashes(&self) -> &[u64] {
176 &self.hashes
177 }
178
179 #[inline(always)]
181 pub fn pos(&self) -> usize {
182 self.pos
183 }
184
185 #[inline(always)]
187 pub fn forward_hash(&self) -> u64 {
188 self.fwd_hash
189 }
190
191 #[inline(always)]
193 pub fn reverse_hash(&self) -> u64 {
194 self.rev_hash
195 }
196
197 fn init(&mut self) -> bool {
199 let k_usz = self.k as usize;
200 while self.pos <= self.seq.len() - k_usz {
201 let mut skip = 0;
202 if has_invalid_base(&self.seq[self.pos..], k_usz, &mut skip) {
203 self.pos += skip + 1;
204 continue;
205 }
206 self.fwd_hash = base_forward_hash(&self.seq[self.pos..], self.k);
207 self.rev_hash = base_reverse_hash(&self.seq[self.pos..], self.k);
208 self.update_hashes();
209 self.initialized = true;
210 return true;
211 }
212 false
213 }
214
215 #[inline(always)]
216 fn update_hashes(&mut self) {
217 extend_hashes(
218 self.fwd_hash,
219 self.rev_hash,
220 self.k as u32,
221 &mut self.hashes,
222 );
223 }
224
225 #[inline(always)]
226 fn fill_hash_buffer(&mut self, fwd: u64, rev: u64) {
227 extend_hashes(fwd, rev, self.k as u32, &mut self.hashes);
228 }
229}
230
231#[inline(always)]
232pub fn has_invalid_base(seq: &[u8], k: usize, pos_n: &mut usize) -> bool {
233 if let Some(idx) = seq[..k]
234 .iter()
235 .rposition(|&c| SEED_TAB[c as usize] == SEED_N)
236 {
237 *pos_n = idx;
238 true
239 } else {
240 false
241 }
242}
243
244#[inline]
245pub fn base_forward_hash(seq: &[u8], k: u16) -> u64 {
246 let k = k as usize;
247 let mut h = 0_u64;
248
249 for chunk in seq[..k - k % 4].chunks_exact(4) {
250 h = srol_n(h, 4);
251
252 let idx = (CONVERT_TAB[chunk[0] as usize] as usize) * 64
254 + (CONVERT_TAB[chunk[1] as usize] as usize) * 16
255 + (CONVERT_TAB[chunk[2] as usize] as usize) * 4
256 + CONVERT_TAB[chunk[3] as usize] as usize;
257 h ^= TETRAMER_TAB[idx & 0xFF];
258 }
259
260 h = srol_n(h, (k % 4) as u32);
261 match k % 4 {
262 3 => {
263 let idx = (CONVERT_TAB[seq[k - 3] as usize] as usize) * 16
264 + (CONVERT_TAB[seq[k - 2] as usize] as usize) * 4
265 + CONVERT_TAB[seq[k - 1] as usize] as usize;
266 h ^= TRIMER_TAB[idx & 0x3F];
267 }
268 2 => {
269 let idx = (CONVERT_TAB[seq[k - 2] as usize] as usize) * 4
270 + CONVERT_TAB[seq[k - 1] as usize] as usize;
271 h ^= DIMER_TAB[idx & 0x0F];
272 }
273 1 => h ^= SEED_TAB[seq[k - 1] as usize],
274 _ => {}
275 }
276 h
277}
278
279#[inline]
280pub fn base_reverse_hash(seq: &[u8], k: u16) -> u64 {
281 let k = k as usize;
282 let mut h = 0_u64;
283
284 match k % 4 {
286 3 => {
287 let idx = (RC_CONVERT_TAB[seq[k - 1] as usize] as usize) * 16
288 + (RC_CONVERT_TAB[seq[k - 2] as usize] as usize) * 4
289 + RC_CONVERT_TAB[seq[k - 3] as usize] as usize;
290 h ^= TRIMER_TAB[idx & 0x3F];
291 }
292 2 => {
293 let idx = (RC_CONVERT_TAB[seq[k - 1] as usize] as usize) * 4
294 + RC_CONVERT_TAB[seq[k - 2] as usize] as usize;
295 h ^= DIMER_TAB[idx & 0x0F];
296 }
297 1 => {
298 let c = seq[k - 1] & CP_OFF;
299 h ^= SEED_TAB[c as usize];
300 }
301 _ => {}
302 }
303
304 let mut i = k - k % 4;
306 while i >= 4 {
307 h = srol_n(h, 4);
309
310 let idx = (RC_CONVERT_TAB[seq[i - 1] as usize] as usize) * 64
312 + (RC_CONVERT_TAB[seq[i - 2] as usize] as usize) * 16
313 + (RC_CONVERT_TAB[seq[i - 3] as usize] as usize) * 4
314 + RC_CONVERT_TAB[seq[i - 4] as usize] as usize;
315 h ^= TETRAMER_TAB[idx & 0xFF];
316
317 i -= 4;
318 }
319 h
320}
321
322#[inline(always)]
323fn next_forward_hash(prev: u64, k: u16, char_out: u8, char_in: u8) -> u64 {
324 let mut h = srol(prev);
325 h ^= SEED_TAB[char_in as usize];
326 h ^= srol_table(char_out, k as u32);
327 h
328}
329
330#[inline(always)]
331fn prev_forward_hash(prev: u64, k: u16, char_out: u8, char_in: u8) -> u64 {
332 let mut h = prev ^ srol_table(char_in, k as u32);
333 h ^= SEED_TAB[char_out as usize];
334 sror(h)
335}
336
337#[inline(always)]
338fn next_reverse_hash(prev: u64, k: u16, char_out: u8, char_in: u8) -> u64 {
339 let mut h = prev ^ srol_table(char_in & CP_OFF, k as u32);
340 h ^= SEED_TAB[(char_out & CP_OFF) as usize];
341 sror(h)
342}
343
344#[inline(always)]
345fn prev_reverse_hash(prev: u64, k: u16, char_out: u8, char_in: u8) -> u64 {
346 let mut h = srol(prev);
347 h ^= SEED_TAB[(char_in & CP_OFF) as usize];
348 h ^= srol_table(char_out & CP_OFF, k as u32);
349 h
350}
351
352pub struct NtHashBuilder<'a> {
358 seq: &'a [u8],
359 k: u16,
360 num_hashes: u8,
361 pos: usize,
362}
363
364impl<'a> NtHashBuilder<'a> {
365 pub fn new(seq: &'a [u8]) -> Self {
367 NtHashBuilder {
368 seq,
369 k: 0,
370 num_hashes: 1,
371 pos: 0,
372 }
373 }
374
375 pub fn k(mut self, k: u16) -> Self {
377 self.k = k;
378 self
379 }
380
381 pub fn num_hashes(mut self, m: u8) -> Self {
383 self.num_hashes = m;
384 self
385 }
386
387 pub fn pos(mut self, pos: usize) -> Self {
389 self.pos = pos;
390 self
391 }
392
393 pub fn finish(self) -> Result<NtHashIter<'a>> {
395 let hasher = NtHash::new(self.seq, self.k, self.num_hashes, self.pos)?;
396 Ok(NtHashIter {
397 hasher,
398 done: false,
399 })
400 }
401}
402
403pub struct NtHashIter<'a> {
405 hasher: NtHash<'a>,
406 done: bool,
407}
408
409impl<'a> Iterator for NtHashIter<'a> {
410 type Item = (usize, Vec<u64>);
411
412 fn next(&mut self) -> Option<Self::Item> {
413 if self.done {
414 return None;
415 }
416 if !self.hasher.roll() {
417 self.done = true;
418 return None;
419 }
420 let out = (self.hasher.pos(), self.hasher.hashes().to_owned());
421 Some(out)
422 }
423}
424
425impl<'a> IntoIterator for NtHashBuilder<'a> {
426 type Item = (usize, Vec<u64>);
427 type IntoIter = NtHashIter<'a>;
428
429 fn into_iter(self) -> Self::IntoIter {
430 self.finish().expect("invalid NtHashBuilder configuration")
431 }
432}