Skip to main content

vers_vecs/bit_vec/fast_rs_vec/
iter.rs

1use crate::bit_vec::fast_rs_vec::{BLOCK_SIZE, SELECT_BLOCK_SIZE, SUPER_BLOCK_SIZE};
2use crate::RsVec;
3use std::iter::FusedIterator;
4use std::num::NonZeroUsize;
5
6impl RsVec {
7    /// Get an iterator over the bits in the vector. The iterator will return the indices of the
8    /// 0-bits or the 1-bits in the vector, depending on the constant `ZERO`
9    /// (if true, 0-bits are returned).
10    ///
11    /// It uses the select data structure to speed up iteration.
12    /// It is also faster than calling `select` on each rank, because the iterator exploits
13    /// the linear access pattern.
14    ///
15    /// This method has convenience methods `iter0` and `iter1`.
16    pub fn select_iter<const ZERO: bool>(&self) -> SelectIter<'_, ZERO> {
17        SelectIter::new(self)
18    }
19
20    /// Convert vector into an iterator over the bits in the vector. The iterator will return the indices of the
21    /// 0-bits or the 1-bits in the vector, depending on the constant `ZERO`
22    /// (if true, 0-bits are returned).
23    ///
24    /// It uses the select data structure to speed up iteration.
25    /// It is also faster than calling `select` on each rank, because the iterator exploits
26    /// the linear access pattern.
27    ///
28    /// This method has convenience methods `into_iter0` and `into_iter1`.
29    pub fn into_select_iter<const ZERO: bool>(self) -> SelectIntoIter<ZERO> {
30        SelectIntoIter::new(self)
31    }
32
33    /// Get an iterator over the 0-bits in the vector that uses the select data structure to speed
34    /// up iteration.
35    /// It is faster than calling `select0` on each rank, because the iterator
36    /// exploits the linear access pattern.
37    ///
38    /// See [`SelectIter`] for more information.
39    pub fn iter0(&self) -> SelectIter<'_, true> {
40        self.select_iter()
41    }
42
43    /// Get an iterator over the 1-bits in the vector that uses the select data structure to speed
44    /// up iteration.
45    /// It is faster than calling `select1` on each rank, because the iterator
46    /// exploits the linear access pattern.
47    ///
48    /// See [`SelectIter`] for more information.
49    pub fn iter1(&self) -> SelectIter<'_, false> {
50        self.select_iter()
51    }
52
53    /// Convert vector into an iterator over the 0-bits in the vector
54    /// that uses the select data structure to speed up iteration.
55    /// It is faster than calling `select0` on each rank, because the iterator
56    /// exploits the linear access pattern.
57    ///
58    /// See [`SelectIntoIter`] for more information.
59    pub fn into_iter0(self) -> SelectIntoIter<true> {
60        self.into_select_iter()
61    }
62
63    /// Convert vector into an iterator over the 1-bits in the vector
64    /// that uses the select data structure to speed up iteration.
65    /// It is faster than calling `select1` on each rank, because the iterator
66    /// exploits the linear access pattern.
67    ///
68    /// See [`SelectIntoIter`] for more information.
69    pub fn into_iter1(self) -> SelectIntoIter<false> {
70        self.into_select_iter()
71    }
72}
73
74macro_rules! gen_iter_impl {
75    ($($life:lifetime, )? $name:ident) => {
76        impl<$($life,)? const ZERO: bool> $name<$($life,)? ZERO> {
77
78            /// Create a new iterator over the given bit-vector. Initialize the caches for select queries
79            fn new(vec: $(&$life)? RsVec) -> Self {
80                if vec.is_empty() {
81                    return Self {
82                        vec,
83                        next_rank: 0,
84                        next_rank_back: None,
85                        last_super_block: 0,
86                        last_super_block_back: 0,
87                        last_block: 0,
88                        last_block_back: 0,
89                    };
90                }
91
92                let blocks_len = vec.blocks.len();
93                let super_blocks_len = vec.super_blocks.len();
94                let rank0 = vec.rank0;
95                let rank1 = vec.rank1;
96
97                Self {
98                    vec,
99                    next_rank: 0,
100                    next_rank_back: Some(if ZERO { rank0 } else { rank1 }).and_then(|x| if x > 0 { Some(x - 1) } else { None }),
101                    last_super_block: 0,
102                    last_super_block_back: super_blocks_len - 1,
103                    last_block: 0,
104                    last_block_back: blocks_len - 1,
105                }
106            }
107
108            /// Same implementation like select0, but uses cached indices of last query to speed up search
109            fn select_next_0(&mut self) -> Option<usize> {
110                let mut rank = self.next_rank;
111
112                if rank >= self.vec.rank0 || self.next_rank_back.is_none() || rank > self.next_rank_back.unwrap() {
113                    return None;
114                }
115
116                let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_0;
117                let mut block_index = 0;
118
119                if self.vec.super_blocks.len() > (self.last_super_block + 1)
120                    && self.vec.super_blocks[self.last_super_block + 1].zeros > rank
121                {
122                    // instantly jump to the last searched position
123                    super_block = self.last_super_block;
124                    rank -= self.vec.super_blocks[super_block].zeros;
125
126                    // check if current block contains the one and if yes, we don't need to search
127                    // this is true IF the last_block is either the last block in a super block,
128                    // in which case it must be this block, because we know the rank is within the super block,
129                    // OR if the next block has a rank higher than the current rank
130                    if self.last_block % (SUPER_BLOCK_SIZE / BLOCK_SIZE) == 15
131                        || self.vec.blocks.len() > self.last_block + 1
132                            && self.vec.blocks[self.last_block + 1].zeros as usize > rank
133                    {
134                        // instantly jump to the last searched position
135                        block_index = self.last_block;
136                        rank -= self.vec.blocks[block_index].zeros as usize;
137                    }
138                } else {
139                    super_block = self.vec.search_super_block0(super_block, rank);
140                    self.last_super_block = super_block;
141                    rank -= self.vec.super_blocks[super_block].zeros;
142                }
143
144                // if the block index is not zero, we already found the block, and need only update the word
145                if block_index == 0 {
146                    block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
147                    self.vec.search_block0(rank, &mut block_index);
148
149                    self.last_block = block_index;
150                    rank -= self.vec.blocks[block_index].zeros as usize;
151                }
152
153                self.next_rank += 1;
154                Some(self.vec.search_word_in_block0(rank, block_index))
155            }
156
157            /// Same implementation like ``select_next_0``, but backwards
158            fn select_next_0_back(&mut self) -> Option<usize> {
159                let mut rank = self.next_rank_back?;
160
161                if self.next_rank_back.is_none() || rank < self.next_rank {
162                    return None;
163                }
164
165                let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_0;
166                let mut block_index = 0;
167
168                if self.vec.super_blocks[self.last_super_block_back].zeros < rank
169                {
170                    // instantly jump to the last searched position
171                    super_block = self.last_super_block_back;
172                    rank -= self.vec.super_blocks[super_block].zeros;
173
174                    // check if current block contains the one and if yes, we don't need to search
175                    // this is true IF the zeros before the last block are less than the rank,
176                    // since the block before then can't contain it
177                    if self.vec.blocks[self.last_block_back].zeros as usize <= rank
178                    {
179                        // instantly jump to the last searched position
180                        block_index = self.last_block_back;
181                        rank -= self.vec.blocks[block_index].zeros as usize;
182                    }
183                } else {
184                    super_block = self.vec.search_super_block0(super_block, rank);
185                    self.last_super_block_back = super_block;
186                    rank -= self.vec.super_blocks[super_block].zeros;
187                }
188
189                // if the block index is not zero, we already found the block, and need only update the word
190                if block_index == 0 {
191                    block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
192                    self.vec.search_block0(rank, &mut block_index);
193
194                    self.last_block_back = block_index;
195                    rank -= self.vec.blocks[block_index].zeros as usize;
196                }
197
198                self.next_rank_back = self.next_rank_back.and_then(|x| if x > 0 { Some(x - 1) } else { None });
199                Some(self.vec.search_word_in_block0(rank, block_index))
200            }
201
202            #[must_use]
203            #[allow(clippy::assertions_on_constants)]
204            fn select_next_1(&mut self) -> Option<usize> {
205                let mut rank = self.next_rank;
206
207                if rank >= self.vec.rank1 || self.next_rank_back.is_none() || rank > self.next_rank_back.unwrap() {
208                    return None;
209                }
210
211                let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_1;
212                let mut block_index = 0;
213
214                // check if the last super block still contains the rank, and if yes, we don't need to search
215                if self.vec.super_blocks.len() > (self.last_super_block + 1)
216                    && (self.last_super_block + 1) * SUPER_BLOCK_SIZE
217                        - self.vec.super_blocks[self.last_super_block + 1].zeros
218                        > rank
219                {
220                    // instantly jump to the last searched position
221                    super_block = self.last_super_block;
222                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
223                    rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
224
225                    // check if current block contains the one and if yes, we don't need to search
226                    // this is true IF the last_block is either the last block in a super block,
227                    // in which case it must be this block, because we know the rank is within the super block,
228                    // OR if the next block has a rank higher than the current rank
229                    if self.last_block % (SUPER_BLOCK_SIZE / BLOCK_SIZE) == 15
230                        || self.vec.blocks.len() > self.last_block + 1
231                            && (self.last_block + 1 - block_at_super_block) * BLOCK_SIZE
232                                - self.vec.blocks[self.last_block + 1].zeros as usize
233                                > rank
234                    {
235                        // instantly jump to the last searched position
236                        block_index = self.last_block;
237                        let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
238                        rank -= (block_index - block_at_super_block) * BLOCK_SIZE
239                            - self.vec.blocks[block_index].zeros as usize;
240                    }
241                } else {
242                    super_block = self.vec.search_super_block1(super_block, rank);
243
244                    self.last_super_block = super_block;
245                    rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
246                }
247
248                // if the block index is not zero, we already found the block, and need only update the word
249                if block_index == 0 {
250                    // full binary search for block that contains the rank, manually loop-unrolled, because
251                    // LLVM doesn't do it for us, but it gains just under 20% performance
252                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
253                    block_index = block_at_super_block;
254                    self.vec
255                        .search_block1(rank, block_at_super_block, &mut block_index);
256
257                    self.last_block = block_index;
258                    rank -= (block_index - block_at_super_block) * BLOCK_SIZE
259                        - self.vec.blocks[block_index].zeros as usize;
260                }
261
262                self.next_rank += 1;
263                Some(self.vec.search_word_in_block1(rank, block_index))
264            }
265
266            #[must_use]
267            #[allow(clippy::assertions_on_constants)]
268            fn select_next_1_back(&mut self) -> Option<usize> {
269                let mut rank = self.next_rank_back?;
270
271                if self.next_rank_back.is_none() || rank < self.next_rank {
272                    return None;
273                }
274
275                let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_1;
276                let mut block_index = 0;
277
278                // check if the last super block still contains the rank, and if yes, we don't need to search
279                if (self.last_super_block_back) * SUPER_BLOCK_SIZE
280                        - self.vec.super_blocks[self.last_super_block_back].zeros
281                        < rank
282                {
283                    // instantly jump to the last searched position
284                    super_block = self.last_super_block_back;
285                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
286                    rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
287
288                    // check if current block contains the one and if yes, we don't need to search
289                    // this is true IF the ones before the last block are less than the rank,
290                    // since the block before then can't contain it
291                    if (self.last_block_back - block_at_super_block) * BLOCK_SIZE
292                        - self.vec.blocks[self.last_block_back].zeros as usize
293                            <= rank
294                    {
295                        // instantly jump to the last searched position
296                        block_index = self.last_block_back;
297                        let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
298                        rank -= (block_index - block_at_super_block) * BLOCK_SIZE
299                            - self.vec.blocks[block_index].zeros as usize;
300                    }
301                } else {
302                    super_block = self.vec.search_super_block1(super_block, rank);
303
304                    self.last_super_block_back = super_block;
305                    rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
306                }
307
308                // if the block index is not zero, we already found the block, and need only update the word
309                if block_index == 0 {
310                    // full binary search for block that contains the rank, manually loop-unrolled, because
311                    // LLVM doesn't do it for us, but it gains just under 20% performance
312                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
313                    block_index = block_at_super_block;
314                    self.vec
315                        .search_block1(rank, block_at_super_block, &mut block_index);
316
317                    self.last_block_back = block_index;
318                    rank -= (block_index - block_at_super_block) * BLOCK_SIZE
319                        - self.vec.blocks[block_index].zeros as usize;
320                }
321
322                self.next_rank_back = self.next_rank_back.and_then(|x| if x > 0 { Some(x - 1) } else { None });
323                Some(self.vec.search_word_in_block1(rank, block_index))
324            }
325
326            /// Advances the iterator by `n` elements. Returns an error if the iterator does not have
327            /// enough elements left. Does not call `next` internally.
328            /// This method is currently being added to the iterator trait, see
329            /// [this issue](https://github.com/rust-lang/rust/issues/77404).
330            /// As soon as it is stabilized, this method will be removed and replaced with a custom
331            /// implementation in the iterator impl.
332            pub(super) fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
333                if self.len() >= n {
334                    self.next_rank += n;
335                    Ok(())
336                } else {
337                    let len = self.len();
338                    self.next_rank += len;
339                    Err(NonZeroUsize::new(n - len).unwrap())
340                }
341            }
342
343            /// Advances the iterator back by `n` elements. Returns an error if the iterator does not have
344            /// enough elements left. Does not call `next_back` internally.
345            /// This method is currently being added to the iterator trait, see
346            /// [this issue](https://github.com/rust-lang/rust/issues/77404).
347            /// As soon as it is stabilized, this method will be removed and replaced with a custom
348            /// implementation in the double ended iterator impl.
349            pub(super) fn advance_back_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
350                if self.len() >= n {
351                    self.next_rank_back = self.next_rank_back.map(|x| x - n);
352                    Ok(())
353                } else {
354                    let len = self.len();
355                    self.next_rank_back = self.next_rank_back.map(|x| x - len);
356                    Err(NonZeroUsize::new(n - len).unwrap())
357                }
358            }
359        }
360
361        impl<$($life,)? const ZERO: bool> Iterator for $name<$($life,)? ZERO> {
362            type Item = usize;
363
364            fn next(&mut self) -> Option<Self::Item> {
365                if ZERO {
366                    self.select_next_0()
367                } else {
368                    self.select_next_1()
369                }
370            }
371
372            fn size_hint(&self) -> (usize, Option<usize>) {
373                (self.len(), Some(self.len()))
374            }
375
376            fn count(self) -> usize
377            where
378                Self: Sized,
379            {
380                self.len()
381            }
382
383            fn last(mut self) -> Option<Self::Item>
384            where
385                Self: Sized,
386            {
387                if self.len() == 0 {
388                    None
389                } else {
390                    self.advance_by(self.len() - 1)
391                        .ok()
392                        .and_then(|()| self.next())
393                }
394            }
395
396            fn nth(&mut self, n: usize) -> Option<Self::Item> {
397                if ZERO {
398                    self.advance_by(n).ok().and_then(|()| self.select_next_0())
399                } else {
400                    self.advance_by(n).ok().and_then(|()| self.select_next_1())
401                }
402            }
403        }
404
405        impl<$($life,)? const ZERO: bool> DoubleEndedIterator for $name<$($life,)? ZERO> {
406            fn next_back(&mut self) -> Option<Self::Item> {
407                if ZERO {
408                    self.select_next_0_back()
409                } else {
410                    self.select_next_1_back()
411                }
412            }
413
414            fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
415                if ZERO {
416                    self.advance_back_by(n).ok().and_then(|()| self.select_next_0_back())
417                } else {
418                    self.advance_back_by(n).ok().and_then(|()| self.select_next_1_back())
419                }
420            }
421        }
422
423        impl<$($life,)? const ZERO: bool> FusedIterator for $name<$($life,)? ZERO> {}
424
425        impl<$($life,)? const ZERO: bool> ExactSizeIterator for $name<$($life,)? ZERO> {
426            fn len(&self) -> usize {
427                self.next_rank_back.map(|x| x + 1).unwrap_or_default().saturating_sub(self.next_rank)
428            }
429        }
430    }
431}
432
433/// An iterator that iterates over 1-bits or 0-bits and returns their indices.
434/// It uses the select data structures to speed up iteration.
435/// It is faster than iterating over all bits if the iterated bits are sparse.
436/// This is also faster than manually calling `select` on each rank,
437/// because the iterator exploits the linear access pattern for faster select queries.
438///
439/// The iterator can be constructed by calling [`iter0`] or [`iter1`].
440///
441/// # Example
442/// ```rust
443/// use vers_vecs::{BitVec, RsVec};
444///
445/// let mut bit_vec = BitVec::new();
446/// bit_vec.append_word(u64::MAX);
447/// bit_vec.append_word(u64::MAX);
448/// bit_vec.flip_bit(4);
449///
450/// let rs_vec = RsVec::from_bit_vec(bit_vec);
451///
452/// let mut iter = rs_vec.iter0();
453///
454/// assert_eq!(iter.next(), Some(4));
455/// assert_eq!(iter.next(), None);
456/// ```
457///
458/// [`iter0`]: crate::RsVec::iter0
459/// [`iter1`]: crate::RsVec::iter1
460#[derive(Clone, Debug)]
461#[must_use]
462pub struct SelectIter<'a, const ZERO: bool> {
463    pub(crate) vec: &'a RsVec,
464    next_rank: usize,
465
466    // rank back is none, iff it points to element -1 (i.e. element 0 has been consumed by
467    // a call to next_back()). It can be Some(..) even if the iterator is empty
468    next_rank_back: Option<usize>,
469
470    /// the last index in the super block structure where we found a bit
471    last_super_block: usize,
472
473    last_super_block_back: usize,
474
475    /// the last index in the block structure where we found a bit
476    last_block: usize,
477
478    last_block_back: usize,
479}
480
481gen_iter_impl!('a, SelectIter);
482
483/// An iterator that iterates over 1-bits or 0-bits and returns their indices.
484/// It owns the iterated bit-vector.
485/// It uses the select data structures to speed up iteration.
486/// It is faster than iterating over all bits if the iterated bits are sparse.
487/// This is also faster than manually calling `select` on each rank,
488/// because the iterator exploits the linear access pattern for faster select queries.
489///
490/// The iterator can be constructed by calling [`into_iter0`] or [`into_iter1`].
491///
492/// # Example
493/// ```rust
494/// use vers_vecs::{BitVec, RsVec};
495///
496/// let mut bit_vec = BitVec::new();
497/// bit_vec.append_word(u64::MAX);
498/// bit_vec.append_word(u64::MAX);
499/// bit_vec.flip_bit(4);
500///
501/// let rs_vec = RsVec::from_bit_vec(bit_vec);
502///
503/// let mut iter = rs_vec.iter0();
504///
505/// assert_eq!(iter.next(), Some(4));
506/// assert_eq!(iter.next(), None);
507/// ```
508///
509/// [`into_iter0`]: crate::RsVec::into_iter0
510/// [`into_iter1`]: crate::RsVec::into_iter1
511#[derive(Clone, Debug)]
512#[must_use]
513// the naming convention of other iterators is broken here, because SelectIter existed before
514// this owning iterator became necessary
515pub struct SelectIntoIter<const ZERO: bool> {
516    pub(crate) vec: RsVec,
517    next_rank: usize,
518
519    // rank back is none, iff it points to element -1 (i.e. element 0 has been consumed by
520    // a call to next_back()). It can be Some(..) even if the iterator is empty
521    next_rank_back: Option<usize>,
522
523    /// the last index in the super block structure where we found a bit
524    last_super_block: usize,
525
526    last_super_block_back: usize,
527
528    /// the last index in the block structure where we found a bit
529    last_block: usize,
530
531    last_block_back: usize,
532}
533
534gen_iter_impl!(SelectIntoIter);