minimizer_queue/
lib.rs

1use core::cmp::Ordering;
2use core::hash::{BuildHasher, Hash};
3use std::collections::VecDeque;
4use strength_reduce::StrengthReducedU16;
5
6/// Default hasher for [`MinimizerQueue`] and [`ImplicitMinimizerQueue`].
7pub type DefaultHashBuilder = wyhash2::WyHash;
8
9/// A monotone queue that can compute consecutive minimizers in constant time.
10///
11/// # Examples
12///
13/// ```
14/// use minimizer_queue::MinimizerQueue;
15///
16/// let mut queue = MinimizerQueue::new(3); // width 3
17/// queue.insert(1);
18/// queue.insert(2);
19/// queue.insert(3);
20/// queue.get_min(); // element with the smallest hash among 1, 2 and 3
21///
22/// queue.insert(4);
23/// queue.get_min(); // element with the smallest hash among 2, 3 and 4
24/// ```
25pub struct MinimizerQueue<T: Hash + Copy, S: BuildHasher = DefaultHashBuilder> {
26    deq: VecDeque<(T, u64, u16)>,
27    width: StrengthReducedU16,
28    hash_builder: S,
29    pos: u16,
30}
31
32impl<T: Hash + Copy> MinimizerQueue<T> {
33    /// Creates an empty `MinimizerQueue` with the given width.
34    #[inline]
35    pub fn new(width: u16) -> Self {
36        Self::with_seed(width, width as u64)
37    }
38
39    /// Creates an empty `MinimizerQueue` with the given width and seed.
40    /// Changing the seed will change the ordering of the minimizers.
41    #[inline]
42    pub fn with_seed(width: u16, seed: u64) -> Self {
43        Self::with_hasher(width, DefaultHashBuilder::with_seed(seed))
44    }
45}
46
47impl<T: Hash + Copy, S: BuildHasher> MinimizerQueue<T, S> {
48    /// Creates an empty `MinimizerQueue` with the given width and hasher.
49    /// The hasher will define the ordering of the minimizers, based on their hashes.
50    pub fn with_hasher(width: u16, hash_builder: S) -> Self {
51        Self {
52            deq: VecDeque::with_capacity(width as usize),
53            width: StrengthReducedU16::new(width),
54            hash_builder,
55            pos: 0,
56        }
57    }
58
59    /// Returns the width of the `MinimizerQueue`.
60    #[inline]
61    pub fn width(&self) -> usize {
62        self.width.get() as usize
63    }
64
65    /// Returns `true` if the `MinimizerQueue` is empty.
66    #[inline]
67    pub fn is_empty(&self) -> bool {
68        self.deq.is_empty()
69    }
70
71    /// Returns `true` if there are multiple minimizers in the queue.
72    #[inline]
73    pub fn multiple_mins(&self) -> bool {
74        self.deq.len() >= 2 && self.deq[0].1 == self.deq[1].1
75    }
76
77    /// Returns the leftmost minimizer in the queue.
78    #[inline]
79    pub fn get_min(&self) -> T {
80        debug_assert!(!self.deq.is_empty(), "MinimizerQueue is empty");
81        self.deq[0].0
82    }
83
84    /// Returns the leftmost minimizer and its relative position in the queue.
85    #[inline]
86    pub fn get_min_pos(&self) -> (T, usize) {
87        debug_assert!(!self.deq.is_empty(), "MinimizerQueue is empty");
88        let (x, _, pos) = self.deq[0];
89        let rel_pos = ((self.width.get() - self.pos + pos) % self.width) as usize;
90        (x, rel_pos)
91    }
92
93    /// Returns the innermost minimizer and its relative position in the queue, with a second choice in case of tie.
94    #[inline]
95    pub fn get_inner_min_pos(&self) -> (T, usize, Option<(T, usize)>) {
96        debug_assert!(!self.deq.is_empty(), "MinimizerQueue is empty");
97        let width = self.width.get();
98        let start = width - self.pos;
99        let (mut x, hash, x_pos) = self.deq[0];
100        let mut x_pos = (start + x_pos) % self.width;
101        let mut i = 1;
102        while i < self.deq.len() && self.deq[i].1 == hash {
103            let (y, _, y_pos) = self.deq[i];
104            let y_pos = (start + y_pos) % self.width;
105            match x_pos.cmp(&(width - 1 - y_pos)) {
106                Ordering::Less => {
107                    x = y;
108                    x_pos = y_pos;
109                }
110                Ordering::Equal => return (x, x_pos as usize, Some((y, y_pos as usize))),
111                Ordering::Greater => return (x, x_pos as usize, None),
112            }
113            i += 1;
114        }
115        (x, x_pos as usize, None)
116    }
117
118    /// Inserts `x` in the queue and updates the current minimizer.
119    #[inline]
120    pub fn insert(&mut self, x: T) {
121        self.insert_with_hash(x, self.hash_builder.hash_one(x))
122    }
123
124    /// Inserts `x` in the queue with the given hash and updates the current minimizer.
125    pub fn insert_with_hash(&mut self, x: T, hash: u64) {
126        if !self.deq.is_empty() && self.deq[0].2 == self.pos {
127            self.deq.pop_front();
128        }
129        let mut i = self.deq.len();
130        while i > 0 && hash < self.deq[i - 1].1 {
131            i -= 1;
132        }
133        self.deq.truncate(i);
134        self.deq.push_back((x, hash, self.pos));
135        self.pos = (self.pos + 1) % self.width;
136    }
137
138    /// Clears the deque, removing all elements.
139    #[inline]
140    pub fn clear(&mut self) {
141        self.deq.clear()
142    }
143}
144
145/// A monotone queue that can compute the positions of consecutive minimizers in constant time.
146///
147/// # Examples
148///
149/// ```
150/// use minimizer_queue::ImplicitMinimizerQueue;
151///
152/// let mut queue = ImplicitMinimizerQueue::new(3); // width 3
153/// queue.insert(&1);
154/// queue.insert(&2);
155/// queue.insert(&3);
156/// queue.get_min_pos(); // position of the element with the smallest hash among 1, 2 and 3
157///
158/// queue.insert(&4);
159/// queue.get_min_pos(); // position of the element with the smallest hash among 2, 3 and 4
160/// ```
161pub struct ImplicitMinimizerQueue<S: BuildHasher = DefaultHashBuilder> {
162    deq: VecDeque<(u64, u16)>,
163    width: StrengthReducedU16,
164    hash_builder: S,
165    pos: u16,
166}
167
168impl ImplicitMinimizerQueue {
169    /// Creates an empty `ImplicitMinimizerQueue` with the given width.
170    #[inline]
171    pub fn new(width: u16) -> Self {
172        Self::with_seed(width, width as u64)
173    }
174
175    /// Creates an empty `ImplicitMinimizerQueue` with the given width and seed.
176    /// Changing the seed will change the ordering of the minimizers.
177    #[inline]
178    pub fn with_seed(width: u16, seed: u64) -> Self {
179        Self::with_hasher(width, DefaultHashBuilder::with_seed(seed))
180    }
181}
182
183impl<S: BuildHasher> ImplicitMinimizerQueue<S> {
184    /// Creates an empty `ImplicitMinimizerQueue` with the given width and hasher.
185    /// The hasher will define the ordering of the minimizers, based on their hashes.
186    pub fn with_hasher(width: u16, hash_builder: S) -> Self {
187        Self {
188            deq: VecDeque::with_capacity(width as usize),
189            width: StrengthReducedU16::new(width),
190            hash_builder,
191            pos: 0,
192        }
193    }
194
195    /// Returns the width of the `ImplicitMinimizerQueue`.
196    #[inline]
197    pub fn width(&self) -> usize {
198        self.width.get() as usize
199    }
200
201    /// Returns `true` if the `ImplicitMinimizerQueue` is empty.
202    #[inline]
203    pub fn is_empty(&self) -> bool {
204        self.deq.is_empty()
205    }
206
207    /// Returns `true` if there are multiple minimizers in the queue.
208    #[inline]
209    pub fn multiple_mins(&self) -> bool {
210        self.deq.len() >= 2 && self.deq[0].0 == self.deq[1].0
211    }
212
213    /// Returns the relative position of the leftmost minimizer.
214    #[inline]
215    pub fn get_min_pos(&self) -> usize {
216        debug_assert!(!self.deq.is_empty(), "ImplicitMinimizerQueue is empty");
217        let (_, pos) = self.deq[0];
218        ((self.width.get() - self.pos + pos) % self.width) as usize
219    }
220
221    /// Returns the relative position of the innermost minimizer, with a second choice in case of tie.
222    #[inline]
223    pub fn get_inner_min_pos(&self) -> (usize, Option<usize>) {
224        debug_assert!(!self.deq.is_empty(), "MinimizerQueue is empty");
225        let width = self.width.get();
226        let start = width - self.pos;
227        let (hash, x_pos) = self.deq[0];
228        let mut x_pos = (start + x_pos) % self.width;
229        let mut i = 1;
230        while i < self.deq.len() && self.deq[i].0 == hash {
231            let (_, y_pos) = self.deq[i];
232            let y_pos = (start + y_pos) % self.width;
233            match x_pos.cmp(&(width - 1 - y_pos)) {
234                Ordering::Less => {
235                    x_pos = y_pos;
236                }
237                Ordering::Equal => return (x_pos as usize, Some(y_pos as usize)),
238                Ordering::Greater => return (x_pos as usize, None),
239            }
240            i += 1;
241        }
242        (x_pos as usize, None)
243    }
244
245    /// Inserts `x` in the queue and updates the current minimizer.
246    #[inline]
247    pub fn insert<T: Hash>(&mut self, x: &T) {
248        self.insert_hash(self.hash_builder.hash_one(x))
249    }
250
251    /// Inserts `x` in the queue with the given hash and updates the current minimizer.
252    pub fn insert_hash(&mut self, hash: u64) {
253        if !self.deq.is_empty() && self.deq[0].1 == self.pos {
254            self.deq.pop_front();
255        }
256        let mut i = self.deq.len();
257        while i > 0 && hash < self.deq[i - 1].0 {
258            i -= 1;
259        }
260        self.deq.truncate(i);
261        self.deq.push_back((hash, self.pos));
262        self.pos = (self.pos + 1) % self.width;
263    }
264
265    /// Clears the deque, removing all elements.
266    #[inline]
267    pub fn clear(&mut self) {
268        self.deq.clear()
269    }
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275    use nohash_hasher::BuildNoHashHasher;
276
277    #[test]
278    fn test_get_min() {
279        let mut queue = MinimizerQueue::with_hasher(3, BuildNoHashHasher::<usize>::default());
280
281        let vals = [1usize, 2, 3, 0, 7, 8, 9, 100, 3, 4, 7, 8];
282        let mut mins = Vec::with_capacity(vals.len() - queue.width() + 1);
283
284        for &val in vals.iter().take(queue.width() - 1) {
285            queue.insert(val);
286        }
287        for &val in vals.iter().skip(queue.width() - 1) {
288            queue.insert(val);
289            mins.push(queue.get_min());
290        }
291
292        assert_eq!(mins, vec![1, 0, 0, 0, 7, 8, 3, 3, 3, 4]);
293    }
294
295    #[test]
296    fn test_get_min_pos() {
297        let mut queue = MinimizerQueue::with_hasher(3, BuildNoHashHasher::<usize>::default());
298
299        let vals = [1usize, 2, 3, 0, 7, 8, 9, 100, 3, 4, 7, 8];
300        let mut mins_pos = Vec::with_capacity(vals.len() - queue.width() + 1);
301
302        for &val in vals.iter().take(queue.width() - 1) {
303            queue.insert(val);
304        }
305        for &val in vals.iter().skip(queue.width() - 1) {
306            queue.insert(val);
307            mins_pos.push(queue.get_min_pos());
308        }
309
310        assert_eq!(
311            mins_pos,
312            vec![
313                (1, 0),
314                (0, 2),
315                (0, 1),
316                (0, 0),
317                (7, 0),
318                (8, 0),
319                (3, 2),
320                (3, 1),
321                (3, 0),
322                (4, 0)
323            ]
324        );
325    }
326
327    #[test]
328    fn test_implicit_get_min_pos() {
329        let mut queue =
330            ImplicitMinimizerQueue::with_hasher(3, BuildNoHashHasher::<usize>::default());
331
332        let vals = [1usize, 2, 3, 0, 7, 8, 9, 100, 3, 4, 7, 8];
333        let mut mins_pos = Vec::with_capacity(vals.len() - queue.width() + 1);
334
335        for val in vals.iter().take(queue.width() - 1) {
336            queue.insert(val);
337        }
338        for val in vals.iter().skip(queue.width() - 1) {
339            queue.insert(val);
340            mins_pos.push(queue.get_min_pos());
341        }
342
343        assert_eq!(mins_pos, vec![0, 2, 1, 0, 0, 0, 2, 1, 0, 0]);
344    }
345
346    #[test]
347    fn test_get_inner_min_pos() {
348        let mut queue = MinimizerQueue::with_hasher(3, BuildNoHashHasher::<usize>::default());
349
350        let vals = [1usize, 2, 3, 2, 2, 3, 1];
351        let mut inner_mins_pos = Vec::with_capacity(vals.len() - queue.width() + 1);
352
353        for &val in vals.iter().take(queue.width() - 1) {
354            queue.insert(val);
355        }
356        for &val in vals.iter().skip(queue.width() - 1) {
357            queue.insert(val);
358            inner_mins_pos.push(queue.get_inner_min_pos());
359        }
360
361        assert_eq!(
362            inner_mins_pos,
363            vec![
364                (1, 0, None),
365                (2, 0, Some((2, 2))),
366                (2, 1, None),
367                (2, 1, None),
368                (1, 2, None),
369            ]
370        );
371    }
372
373    #[test]
374    fn test_implicit_get_inner_min_pos() {
375        let mut queue =
376            ImplicitMinimizerQueue::with_hasher(3, BuildNoHashHasher::<usize>::default());
377
378        let vals = [1usize, 2, 3, 2, 2, 3, 1];
379        let mut inner_mins_pos = Vec::with_capacity(vals.len() - queue.width() + 1);
380
381        for val in vals.iter().take(queue.width() - 1) {
382            queue.insert(val);
383        }
384        for val in vals.iter().skip(queue.width() - 1) {
385            queue.insert(val);
386            inner_mins_pos.push(queue.get_inner_min_pos());
387        }
388
389        assert_eq!(
390            inner_mins_pos,
391            vec![(0, None), (0, Some(2)), (1, None), (1, None), (2, None),]
392        );
393    }
394}