1#[cfg(feature = "serde")]
4use serde::{self, Deserialize, Serialize};
5
6use crate::Mphf;
7use std::borrow::Borrow;
8use std::fmt::Debug;
9use std::hash::Hash;
10use std::iter::ExactSizeIterator;
11
12#[derive(Debug, Clone)]
15#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
16pub struct BoomHashMap<K: Hash, D> {
17 mphf: Mphf<K>,
18 pub(crate) keys: Vec<K>,
19 pub(crate) values: Vec<D>,
20}
21
22impl<K, D> BoomHashMap<K, D>
23where
24 K: Hash + Debug + PartialEq,
25 D: Debug,
26{
27 fn create_map(mut keys: Vec<K>, mut values: Vec<D>, mphf: Mphf<K>) -> BoomHashMap<K, D> {
28 for i in 0..keys.len() {
30 loop {
31 let kmer_slot = mphf.hash(&keys[i]) as usize;
32 if i == kmer_slot {
33 break;
34 }
35 keys.swap(i, kmer_slot);
36 values.swap(i, kmer_slot);
37 }
38 }
39 BoomHashMap { mphf, keys, values }
40 }
41
42 pub fn new(keys: Vec<K>, data: Vec<D>) -> BoomHashMap<K, D> {
44 let mphf = Mphf::new(1.7, &keys);
45 Self::create_map(keys, data, mphf)
46 }
47
48 pub fn get<Q: ?Sized>(&self, kmer: &Q) -> Option<&D>
50 where
51 K: Borrow<Q>,
52 Q: Hash + Eq,
53 {
54 let maybe_pos = self.mphf.try_hash(kmer);
55 match maybe_pos {
56 Some(pos) => {
57 let hashed_kmer = &self.keys[pos as usize];
58 if kmer == hashed_kmer.borrow() {
59 Some(&self.values[pos as usize])
60 } else {
61 None
62 }
63 }
64 None => None,
65 }
66 }
67
68 pub fn get_key_id<Q: ?Sized>(&self, kmer: &Q) -> Option<usize>
70 where
71 K: Borrow<Q>,
72 Q: Hash + Eq,
73 {
74 let maybe_pos = self.mphf.try_hash(kmer);
75 match maybe_pos {
76 Some(pos) => {
77 let hashed_kmer = &self.keys[pos as usize];
78 if kmer == hashed_kmer.borrow() {
79 Some(pos as usize)
80 } else {
81 None
82 }
83 }
84 None => None,
85 }
86 }
87
88 pub fn len(&self) -> usize {
90 self.keys.len()
91 }
92
93 pub fn is_empty(&self) -> bool {
94 self.keys.is_empty()
95 }
96
97 pub fn get_key(&self, id: usize) -> Option<&K> {
98 let max_key_id = self.len();
99 if id > max_key_id {
100 None
101 } else {
102 Some(&self.keys[id])
103 }
104 }
105
106 pub fn iter(&self) -> BoomIterator<K, D> {
107 BoomIterator {
108 hash: self,
109 index: 0,
110 }
111 }
112}
113
114impl<K, D> core::iter::FromIterator<(K, D)> for BoomHashMap<K, D>
115where
116 K: Hash + Debug + PartialEq,
117 D: Debug,
118{
119 fn from_iter<I: IntoIterator<Item = (K, D)>>(iter: I) -> Self {
120 let mut keys = Vec::new();
121 let mut values = Vec::new();
122
123 for (k, v) in iter {
124 keys.push(k);
125 values.push(v);
126 }
127 Self::new(keys, values)
128 }
129}
130
131impl<K, D> BoomHashMap<K, D>
132where
133 K: Hash + Debug + PartialEq + Send + Sync,
134 D: Debug,
135{
136 pub fn new_parallel(keys: Vec<K>, data: Vec<D>) -> BoomHashMap<K, D> {
138 let mphf = Mphf::new_parallel(1.7, &keys, None);
139 Self::create_map(keys, data, mphf)
140 }
141}
142
143pub struct BoomIterator<'a, K: Hash + 'a, D: 'a> {
145 hash: &'a BoomHashMap<K, D>,
146 index: usize,
147}
148
149impl<'a, K: Hash, D> Iterator for BoomIterator<'a, K, D> {
150 type Item = (&'a K, &'a D);
151
152 fn next(&mut self) -> Option<Self::Item> {
153 if self.index == self.hash.keys.len() {
154 return None;
155 }
156
157 let elements = Some((&self.hash.keys[self.index], &self.hash.values[self.index]));
158 self.index += 1;
159
160 elements
161 }
162
163 fn size_hint(&self) -> (usize, Option<usize>) {
164 let remaining = self.hash.keys.len() - self.index;
165 (remaining, Some(remaining))
166 }
167}
168
169impl<'a, K: Hash, D1> ExactSizeIterator for BoomIterator<'a, K, D1> {}
170
171impl<'a, K: Hash, D> IntoIterator for &'a BoomHashMap<K, D> {
172 type Item = (&'a K, &'a D);
173 type IntoIter = BoomIterator<'a, K, D>;
174
175 fn into_iter(self) -> BoomIterator<'a, K, D> {
176 BoomIterator {
177 hash: self,
178 index: 0,
179 }
180 }
181}
182
183#[derive(Debug, Clone)]
188#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
189pub struct BoomHashMap2<K: Hash, D1, D2> {
190 mphf: Mphf<K>,
191 keys: Vec<K>,
192 values: Vec<D1>,
193 aux_values: Vec<D2>,
194}
195
196pub struct Boom2Iterator<'a, K: Hash + 'a, D1: 'a, D2: 'a> {
197 hash: &'a BoomHashMap2<K, D1, D2>,
198 index: usize,
199}
200
201impl<'a, K: Hash, D1, D2> Iterator for Boom2Iterator<'a, K, D1, D2> {
202 type Item = (&'a K, &'a D1, &'a D2);
203
204 fn next(&mut self) -> Option<Self::Item> {
205 if self.index == self.hash.keys.len() {
206 return None;
207 }
208
209 let elements = Some((
210 &self.hash.keys[self.index],
211 &self.hash.values[self.index],
212 &self.hash.aux_values[self.index],
213 ));
214 self.index += 1;
215 elements
216 }
217
218 fn size_hint(&self) -> (usize, Option<usize>) {
219 let remaining = self.hash.keys.len() - self.index;
220 (remaining, Some(remaining))
221 }
222}
223
224impl<'a, K: Hash, D1, D2> ExactSizeIterator for Boom2Iterator<'a, K, D1, D2> {}
225
226impl<'a, K: Hash, D1, D2> IntoIterator for &'a BoomHashMap2<K, D1, D2> {
227 type Item = (&'a K, &'a D1, &'a D2);
228 type IntoIter = Boom2Iterator<'a, K, D1, D2>;
229
230 fn into_iter(self) -> Boom2Iterator<'a, K, D1, D2> {
231 Boom2Iterator {
232 hash: self,
233 index: 0,
234 }
235 }
236}
237
238impl<K, D1, D2> BoomHashMap2<K, D1, D2>
239where
240 K: Hash + Debug + PartialEq,
241 D1: Debug,
242 D2: Debug,
243{
244 fn create_map(
245 mut keys: Vec<K>,
246 mut values: Vec<D1>,
247 mut aux_values: Vec<D2>,
248 mphf: Mphf<K>,
249 ) -> BoomHashMap2<K, D1, D2> {
250 for i in 0..keys.len() {
252 loop {
253 let kmer_slot = mphf.hash(&keys[i]) as usize;
254 if i == kmer_slot {
255 break;
256 }
257 keys.swap(i, kmer_slot);
258 values.swap(i, kmer_slot);
259 aux_values.swap(i, kmer_slot);
260 }
261 }
262
263 BoomHashMap2 {
264 mphf,
265 keys,
266 values,
267 aux_values,
268 }
269 }
270
271 pub fn new(keys: Vec<K>, values: Vec<D1>, aux_values: Vec<D2>) -> BoomHashMap2<K, D1, D2> {
273 let mphf = Mphf::new(1.7, &keys);
274 Self::create_map(keys, values, aux_values, mphf)
275 }
276
277 pub fn get<Q: ?Sized>(&self, kmer: &Q) -> Option<(&D1, &D2)>
278 where
279 K: Borrow<Q>,
280 Q: Hash + Eq,
281 {
282 let maybe_pos = self.mphf.try_hash(kmer);
283 match maybe_pos {
284 Some(pos) => {
285 let hashed_kmer = &self.keys[pos as usize];
286 if kmer == hashed_kmer.borrow() {
287 Some((&self.values[pos as usize], &self.aux_values[pos as usize]))
288 } else {
289 None
290 }
291 }
292 None => None,
293 }
294 }
295
296 pub fn get_key_id<Q: ?Sized>(&self, kmer: &Q) -> Option<usize>
297 where
298 K: Borrow<Q>,
299 Q: Hash + Eq,
300 {
301 let maybe_pos = self.mphf.try_hash(kmer);
302 match maybe_pos {
303 Some(pos) => {
304 let hashed_kmer = &self.keys[pos as usize];
305 if kmer == hashed_kmer.borrow() {
306 Some(pos as usize)
307 } else {
308 None
309 }
310 }
311 None => None,
312 }
313 }
314
315 pub fn len(&self) -> usize {
316 self.keys.len()
317 }
318
319 pub fn is_empty(&self) -> bool {
320 self.keys.is_empty()
321 }
322
323 pub fn iter(&self) -> Boom2Iterator<K, D1, D2> {
325 Boom2Iterator {
326 hash: self,
327 index: 0,
328 }
329 }
330
331 pub fn get_key(&self, id: usize) -> Option<&K> {
332 let max_key_id = self.len();
333 if id > max_key_id {
334 None
335 } else {
336 Some(&self.keys[id])
337 }
338 }
339}
340
341impl<K, D1, D2> core::iter::FromIterator<(K, D1, D2)> for BoomHashMap2<K, D1, D2>
342where
343 K: Hash + Debug + PartialEq,
344 D1: Debug,
345 D2: Debug,
346{
347 fn from_iter<I: IntoIterator<Item = (K, D1, D2)>>(iter: I) -> Self {
348 let mut keys = Vec::new();
349 let mut values1 = Vec::new();
350 let mut values2 = Vec::new();
351
352 for (k, v1, v2) in iter {
353 keys.push(k);
354 values1.push(v1);
355 values2.push(v2);
356 }
357 Self::new(keys, values1, values2)
358 }
359}
360
361impl<K, D1, D2> BoomHashMap2<K, D1, D2>
362where
363 K: Hash + Debug + PartialEq + Send + Sync,
364 D1: Debug,
365 D2: Debug,
366{
367 pub fn new_parallel(keys: Vec<K>, data: Vec<D1>, aux_data: Vec<D2>) -> BoomHashMap2<K, D1, D2> {
369 let mphf = Mphf::new_parallel(1.7, &keys, None);
370 Self::create_map(keys, data, aux_data, mphf)
371 }
372}
373
374#[derive(Debug, Clone)]
377#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
378pub struct NoKeyBoomHashMap<K, D1> {
379 pub mphf: Mphf<K>,
380 pub values: Vec<D1>,
381}
382
383impl<K, D1> core::iter::FromIterator<(K, D1)> for NoKeyBoomHashMap<K, D1>
384where
385 K: Hash + Debug + PartialEq + Send + Sync,
386 D1: Debug,
387{
388 fn from_iter<I: IntoIterator<Item = (K, D1)>>(iter: I) -> Self {
389 let mut keys = Vec::new();
390 let mut values1 = Vec::new();
391
392 for (k, v1) in iter {
393 keys.push(k);
394 values1.push(v1);
395 }
396 Self::new_parallel(keys, values1)
397 }
398}
399
400impl<K, D1> NoKeyBoomHashMap<K, D1>
401where
402 K: Hash + Debug + PartialEq + Send + Sync,
403 D1: Debug,
404{
405 pub fn new_parallel(mut keys: Vec<K>, mut values: Vec<D1>) -> NoKeyBoomHashMap<K, D1> {
406 let mphf = Mphf::new_parallel(1.7, &keys, None);
407 for i in 0..keys.len() {
408 loop {
409 let kmer_slot = mphf.hash(&keys[i]) as usize;
410 if i == kmer_slot {
411 break;
412 }
413 keys.swap(i, kmer_slot);
414 values.swap(i, kmer_slot);
415 }
416 }
417
418 NoKeyBoomHashMap { mphf, values }
419 }
420
421 pub fn new_with_mphf(mphf: Mphf<K>, values: Vec<D1>) -> NoKeyBoomHashMap<K, D1> {
422 NoKeyBoomHashMap { mphf, values }
423 }
424
425 pub fn get<Q: ?Sized>(&self, kmer: &Q) -> Option<&D1>
427 where
428 K: Borrow<Q>,
429 Q: Hash + Eq,
430 {
431 let maybe_pos = self.mphf.try_hash(kmer);
432 match maybe_pos {
433 Some(pos) => Some(&self.values[pos as usize]),
434 _ => None,
435 }
436 }
437}
438
439#[derive(Debug, Clone)]
442#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
443pub struct NoKeyBoomHashMap2<K, D1, D2> {
444 pub mphf: Mphf<K>,
445 pub values: Vec<D1>,
446 pub aux_values: Vec<D2>,
447}
448
449impl<K, D1, D2> core::iter::FromIterator<(K, D1, D2)> for NoKeyBoomHashMap2<K, D1, D2>
450where
451 K: Hash + Debug + PartialEq + Send + Sync,
452 D1: Debug,
453 D2: Debug,
454{
455 fn from_iter<I: IntoIterator<Item = (K, D1, D2)>>(iter: I) -> Self {
456 let mut keys = Vec::new();
457 let mut values1 = Vec::new();
458 let mut values2 = Vec::new();
459
460 for (k, v1, v2) in iter {
461 keys.push(k);
462 values1.push(v1);
463 values2.push(v2);
464 }
465 Self::new_parallel(keys, values1, values2)
466 }
467}
468
469impl<K, D1, D2> NoKeyBoomHashMap2<K, D1, D2>
470where
471 K: Hash + Debug + PartialEq + Send + Sync,
472 D1: Debug,
473 D2: Debug,
474{
475 pub fn new_parallel(
476 mut keys: Vec<K>,
477 mut values: Vec<D1>,
478 mut aux_values: Vec<D2>,
479 ) -> NoKeyBoomHashMap2<K, D1, D2> {
480 let mphf = Mphf::new_parallel(1.7, &keys, None);
481 for i in 0..keys.len() {
482 loop {
483 let kmer_slot = mphf.hash(&keys[i]) as usize;
484 if i == kmer_slot {
485 break;
486 }
487 keys.swap(i, kmer_slot);
488 values.swap(i, kmer_slot);
489 aux_values.swap(i, kmer_slot);
490 }
491 }
492 NoKeyBoomHashMap2 {
493 mphf,
494 values,
495 aux_values,
496 }
497 }
498
499 pub fn new_with_mphf(
500 mphf: Mphf<K>,
501 values: Vec<D1>,
502 aux_values: Vec<D2>,
503 ) -> NoKeyBoomHashMap2<K, D1, D2> {
504 NoKeyBoomHashMap2 {
505 mphf,
506 values,
507 aux_values,
508 }
509 }
510
511 pub fn get<Q: ?Sized>(&self, kmer: &Q) -> Option<(&D1, &D2)>
513 where
514 K: Borrow<Q>,
515 Q: Hash + Eq,
516 {
517 let maybe_pos = self.mphf.try_hash(kmer);
518 maybe_pos.map(|pos| (&self.values[pos as usize], &self.aux_values[pos as usize]))
519 }
520}