1use std::collections::VecDeque;
20
21use crate::{
22 constants::*,
23 kmer::{base_forward_hash, base_reverse_hash},
24 tables::{srol, srol_table, sror},
25 util::extend_hashes,
26 NtHashError, Result,
27};
28
29pub struct BlindNtHash {
37 window: VecDeque<u8>,
38 k: u16,
39 pos: isize,
40 fwd_hash: u64,
41 rev_hash: u64,
42 hashes: Vec<u64>,
43}
44
45impl BlindNtHash {
46 pub fn new(seq: &[u8], k: u16, num_hashes: u8, pos: isize) -> Result<Self> {
55 if k == 0 {
56 return Err(NtHashError::InvalidK);
57 }
58 let len = seq.len();
59 let k_usz = k as usize;
60
61 if pos < 0 || (pos as usize) > len - k_usz {
62 return Err(NtHashError::PositionOutOfRange {
63 pos: pos as usize,
64 seq_len: len,
65 });
66 }
67
68 let slice = &seq[(pos as usize)..(pos as usize + k_usz)];
69 let mut window = VecDeque::with_capacity(k_usz);
70 window.extend(slice.iter().copied());
71
72 let fwd_hash = base_forward_hash(slice, k);
73 let rev_hash = base_reverse_hash(slice, k);
74
75 let mut hashes = vec![0; num_hashes as usize];
76 extend_hashes(fwd_hash, rev_hash, k as u32, &mut hashes);
77
78 Ok(Self {
79 window,
80 k,
81 pos,
82 fwd_hash,
83 rev_hash,
84 hashes,
85 })
86 }
87
88 pub fn roll(&mut self, char_in: u8) -> bool {
90 let char_out = self
91 .window
92 .pop_front()
93 .expect("window length is always k > 0");
94 self.window.push_back(char_in);
95
96 self.fwd_hash = next_forward_hash(self.fwd_hash, self.k, char_out, char_in);
97 self.rev_hash = next_reverse_hash(self.rev_hash, self.k, char_out, char_in);
98 extend_hashes(
99 self.fwd_hash,
100 self.rev_hash,
101 self.k as u32,
102 &mut self.hashes,
103 );
104 self.pos += 1;
105 true
106 }
107
108 pub fn roll_back(&mut self, char_in: u8) -> bool {
109 debug_assert_eq!(self.window.len(), self.k as usize);
110 let char_out = self
111 .window
112 .pop_back()
113 .expect("window length is always k > 0");
114 self.window.push_front(char_in);
115
116 self.fwd_hash = prev_forward_hash(self.fwd_hash, self.k, char_out, char_in);
117 self.rev_hash = prev_reverse_hash(self.rev_hash, self.k, char_out, char_in);
118 extend_hashes(
119 self.fwd_hash,
120 self.rev_hash,
121 self.k as u32,
122 &mut self.hashes,
123 );
124 self.pos -= 1;
125 true
126 }
127
128 pub fn peek(&mut self, char_in: u8) {
130 let char_out = *self.window.front().unwrap();
131 let fwd = next_forward_hash(self.fwd_hash, self.k, char_out, char_in);
132 let rev = next_reverse_hash(self.rev_hash, self.k, char_out, char_in);
133 extend_hashes(fwd, rev, self.k as u32, &mut self.hashes);
134 }
135
136 pub fn peek_back(&mut self, char_in: u8) {
137 let char_out = *self.window.back().unwrap();
138 let fwd = prev_forward_hash(self.fwd_hash, self.k, char_out, char_in);
139 let rev = prev_reverse_hash(self.rev_hash, self.k, char_out, char_in);
140 extend_hashes(fwd, rev, self.k as u32, &mut self.hashes);
141 }
142
143 #[inline(always)]
144 pub fn hashes(&self) -> &[u64] {
145 &self.hashes
146 }
147
148 #[inline(always)]
149 pub fn pos(&self) -> isize {
150 self.pos
151 }
152
153 #[inline(always)]
154 pub fn forward_hash(&self) -> u64 {
155 self.fwd_hash
156 }
157
158 #[inline(always)]
159 pub fn reverse_hash(&self) -> u64 {
160 self.rev_hash
161 }
162}
163
164#[inline(always)]
165fn next_forward_hash(prev: u64, k: u16, char_out: u8, char_in: u8) -> u64 {
166 srol(prev) ^ SEED_TAB[char_in as usize] ^ srol_table(char_out, k as u32)
167}
168
169#[inline(always)]
170fn prev_forward_hash(prev: u64, k: u16, char_out: u8, char_in: u8) -> u64 {
171 sror(prev ^ srol_table(char_in, k as u32) ^ SEED_TAB[char_out as usize])
172}
173
174#[inline(always)]
175fn next_reverse_hash(prev: u64, k: u16, char_out: u8, char_in: u8) -> u64 {
176 sror(prev ^ srol_table(char_in & CP_OFF, k as u32) ^ SEED_TAB[(char_out & CP_OFF) as usize])
177}
178
179#[inline(always)]
180fn prev_reverse_hash(prev: u64, k: u16, char_out: u8, char_in: u8) -> u64 {
181 srol(prev) ^ SEED_TAB[(char_in & CP_OFF) as usize] ^ srol_table(char_out & CP_OFF, k as u32)
182}
183
184pub struct BlindNtHashBuilder<'a> {
185 seq: &'a [u8],
186 k: u16,
187 num_hashes: u8,
188 start_pos: usize,
189}
190
191impl<'a> BlindNtHashBuilder<'a> {
192 pub fn new(seq: &'a [u8]) -> Self {
193 Self {
194 seq,
195 k: 0,
196 num_hashes: 1,
197 start_pos: 0,
198 }
199 }
200
201 pub fn k(mut self, k: u16) -> Self {
202 self.k = k;
203 self
204 }
205
206 pub fn num_hashes(mut self, m: u8) -> Self {
207 self.num_hashes = m;
208 self
209 }
210
211 pub fn pos(mut self, pos: usize) -> Self {
212 self.start_pos = pos;
213 self
214 }
215
216 pub fn finish(self) -> Result<BlindNtHashIter<'a>> {
217 let hasher = BlindNtHash::new(self.seq, self.k, self.num_hashes, self.start_pos as isize)?;
218 let end = self.seq.len() - self.k as usize;
219 Ok(BlindNtHashIter {
220 seq: self.seq,
221 end,
222 hasher,
223 first: true,
224 })
225 }
226}
227
228pub struct BlindNtHashIter<'a> {
229 seq: &'a [u8],
230 end: usize,
231 hasher: BlindNtHash,
232 first: bool,
233}
234
235impl<'a> Iterator for BlindNtHashIter<'a> {
236 type Item = (usize, Vec<u64>);
237
238 fn next(&mut self) -> Option<Self::Item> {
239 if self.first {
240 self.first = false;
241 return Some((self.hasher.pos() as usize, self.hasher.hashes().to_vec()));
242 }
243
244 let cur = self.hasher.pos() as usize;
245 if cur >= self.end {
246 return None;
247 }
248
249 let incoming = self.seq[cur + self.hasher.k as usize];
250 self.hasher.roll(incoming);
251
252 Some((self.hasher.pos() as usize, self.hasher.hashes().to_vec()))
253 }
254}
255
256impl<'a> IntoIterator for BlindNtHashBuilder<'a> {
257 type Item = (usize, Vec<u64>);
258 type IntoIter = BlindNtHashIter<'a>;
259
260 fn into_iter(self) -> Self::IntoIter {
261 self.finish()
262 .expect("invalid BlindNtHashBuilder configuration")
263 }
264}