1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
use core::hash::{BuildHasher, Hash};
pub use minimizer_queue::DefaultHashBuilder;
use minimizer_queue::{ImplicitMinimizerQueue, MinimizerQueue};
use num_traits::{AsPrimitive, PrimInt};

/// An iterator over the positions of the minimizers of a sequence.
pub struct MinimizerPosIterator<'a, T: PrimInt + Hash = u64, S: BuildHasher = DefaultHashBuilder> {
    pub(crate) seq: &'a [u8],
    pub(crate) queue: ImplicitMinimizerQueue<S>,
    pub(crate) width: usize,
    pub(crate) mmer: T,
    pub(crate) mmer_mask: T,
    pub(crate) encoding: [u8; 256],
    pub(crate) base_width: usize,
    pub(crate) min_pos: usize,
    pub(crate) end: usize,
}

impl<'a, T: PrimInt + Hash + 'static, S: BuildHasher> Iterator for MinimizerPosIterator<'a, T, S>
where
    u8: AsPrimitive<T>,
{
    type Item = usize;

    fn next(&mut self) -> Option<Self::Item> {
        if self.queue.is_empty() {
            if self.base_width > self.seq.len() {
                return None;
            }
            for i in 0..(self.base_width - self.width) {
                self.mmer = (self.mmer << 2)
                    | (unsafe { *self.encoding.as_ptr().add(self.seq[i] as usize) }.as_());
            }
            for i in (self.base_width - self.width)..self.base_width {
                self.mmer = ((self.mmer << 2) & self.mmer_mask)
                    | (unsafe { *self.encoding.as_ptr().add(self.seq[i] as usize) }.as_());
                self.queue.insert(&self.mmer);
            }
            self.min_pos = self.queue.get_min_pos();
            Some(self.min_pos)
        } else {
            let mut min_pos = self.min_pos;
            while self.end < self.seq.len() && min_pos == self.min_pos {
                self.mmer = ((self.mmer << 2) & self.mmer_mask)
                    | (unsafe { *self.encoding.as_ptr().add(self.seq[self.end] as usize) }.as_());
                self.queue.insert(&self.mmer);
                self.end += 1;
                min_pos = self.end - self.base_width + self.queue.get_min_pos();
            }
            if self.end >= self.seq.len() {
                return None;
            }
            self.min_pos = min_pos;
            Some(self.min_pos)
        }
    }
}

/// An iterator over the minimizers of a sequence and their positions.
pub struct MinimizerIterator<'a, T: PrimInt + Hash = u64, S: BuildHasher = DefaultHashBuilder> {
    pub(crate) seq: &'a [u8],
    pub(crate) queue: MinimizerQueue<T, S>,
    pub(crate) width: usize,
    pub(crate) mmer: T,
    pub(crate) mmer_mask: T,
    pub(crate) encoding: [u8; 256],
    pub(crate) base_width: usize,
    pub(crate) min_pos: (T, usize),
    pub(crate) end: usize,
}

impl<'a, T: PrimInt + Hash + 'static, S: BuildHasher> Iterator for MinimizerIterator<'a, T, S>
where
    u8: AsPrimitive<T>,
{
    type Item = (T, usize);

    fn next(&mut self) -> Option<Self::Item> {
        if self.queue.is_empty() {
            if self.base_width > self.seq.len() {
                return None;
            }
            for i in 0..(self.base_width - self.width) {
                self.mmer = (self.mmer << 2)
                    | (unsafe { *self.encoding.as_ptr().add(self.seq[i] as usize) }.as_());
            }
            for i in (self.base_width - self.width)..self.base_width {
                self.mmer = ((self.mmer << 2) & self.mmer_mask)
                    | (unsafe { *self.encoding.as_ptr().add(self.seq[i] as usize) }.as_());
                self.queue.insert(self.mmer);
            }
            self.min_pos = self.queue.get_min_pos();
            Some(self.min_pos)
        } else {
            let mut min_pos = self.min_pos;
            while self.end < self.seq.len() && min_pos.1 == self.min_pos.1 {
                self.mmer = ((self.mmer << 2) & self.mmer_mask)
                    | (unsafe { *self.encoding.as_ptr().add(self.seq[self.end] as usize) }.as_());
                self.queue.insert(self.mmer);
                self.end += 1;
                let _min_pos = self.queue.get_min_pos();
                min_pos = (_min_pos.0, self.end - self.base_width + _min_pos.1);
            }
            if self.end >= self.seq.len() {
                return None;
            }
            self.min_pos = min_pos;
            Some(self.min_pos)
        }
    }
}