vers_vecs/bit_vec/fast_rs_vec/iter.rs
1use crate::bit_vec::fast_rs_vec::{BLOCK_SIZE, SELECT_BLOCK_SIZE, SUPER_BLOCK_SIZE};
2use crate::RsVec;
3use std::iter::FusedIterator;
4use std::num::NonZeroUsize;
5
6impl RsVec {
7 /// Get an iterator over the bits in the vector. The iterator will return the indices of the
8 /// 0-bits or the 1-bits in the vector, depending on the constant `ZERO`
9 /// (if true, 0-bits are returned).
10 ///
11 /// It uses the select data structure to speed up iteration.
12 /// It is also faster than calling `select` on each rank, because the iterator exploits
13 /// the linear access pattern.
14 ///
15 /// This method has convenience methods `iter0` and `iter1`.
16 pub fn select_iter<const ZERO: bool>(&self) -> SelectIter<'_, ZERO> {
17 SelectIter::new(self)
18 }
19
20 /// Convert vector into an iterator over the bits in the vector. The iterator will return the indices of the
21 /// 0-bits or the 1-bits in the vector, depending on the constant `ZERO`
22 /// (if true, 0-bits are returned).
23 ///
24 /// It uses the select data structure to speed up iteration.
25 /// It is also faster than calling `select` on each rank, because the iterator exploits
26 /// the linear access pattern.
27 ///
28 /// This method has convenience methods `into_iter0` and `into_iter1`.
29 pub fn into_select_iter<const ZERO: bool>(self) -> SelectIntoIter<ZERO> {
30 SelectIntoIter::new(self)
31 }
32
33 /// Get an iterator over the 0-bits in the vector that uses the select data structure to speed
34 /// up iteration.
35 /// It is faster than calling `select0` on each rank, because the iterator
36 /// exploits the linear access pattern.
37 ///
38 /// See [`SelectIter`] for more information.
39 pub fn iter0(&self) -> SelectIter<'_, true> {
40 self.select_iter()
41 }
42
43 /// Get an iterator over the 1-bits in the vector that uses the select data structure to speed
44 /// up iteration.
45 /// It is faster than calling `select1` on each rank, because the iterator
46 /// exploits the linear access pattern.
47 ///
48 /// See [`SelectIter`] for more information.
49 pub fn iter1(&self) -> SelectIter<'_, false> {
50 self.select_iter()
51 }
52
53 /// Convert vector into an iterator over the 0-bits in the vector
54 /// that uses the select data structure to speed up iteration.
55 /// It is faster than calling `select0` on each rank, because the iterator
56 /// exploits the linear access pattern.
57 ///
58 /// See [`SelectIntoIter`] for more information.
59 pub fn into_iter0(self) -> SelectIntoIter<true> {
60 self.into_select_iter()
61 }
62
63 /// Convert vector into an iterator over the 1-bits in the vector
64 /// that uses the select data structure to speed up iteration.
65 /// It is faster than calling `select1` on each rank, because the iterator
66 /// exploits the linear access pattern.
67 ///
68 /// See [`SelectIntoIter`] for more information.
69 pub fn into_iter1(self) -> SelectIntoIter<false> {
70 self.into_select_iter()
71 }
72}
73
74macro_rules! gen_iter_impl {
75 ($($life:lifetime, )? $name:ident) => {
76 impl<$($life,)? const ZERO: bool> $name<$($life,)? ZERO> {
77
78 /// Create a new iterator over the given bit-vector. Initialize the caches for select queries
79 fn new(vec: $(&$life)? RsVec) -> Self {
80 if vec.is_empty() {
81 return Self {
82 vec,
83 next_rank: 0,
84 next_rank_back: None,
85 last_super_block: 0,
86 last_super_block_back: 0,
87 last_block: 0,
88 last_block_back: 0,
89 };
90 }
91
92 let blocks_len = vec.blocks.len();
93 let super_blocks_len = vec.super_blocks.len();
94 let rank0 = vec.rank0;
95 let rank1 = vec.rank1;
96
97 Self {
98 vec,
99 next_rank: 0,
100 next_rank_back: Some(if ZERO { rank0 } else { rank1 }).and_then(|x| if x > 0 { Some(x - 1) } else { None }),
101 last_super_block: 0,
102 last_super_block_back: super_blocks_len - 1,
103 last_block: 0,
104 last_block_back: blocks_len - 1,
105 }
106 }
107
108 /// Same implementation like select0, but uses cached indices of last query to speed up search
109 fn select_next_0(&mut self) -> Option<usize> {
110 let mut rank = self.next_rank;
111
112 if rank >= self.vec.rank0 || self.next_rank_back.is_none() || rank > self.next_rank_back.unwrap() {
113 return None;
114 }
115
116 let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_0;
117 let mut block_index = 0;
118
119 if self.vec.super_blocks.len() > (self.last_super_block + 1)
120 && self.vec.super_blocks[self.last_super_block + 1].zeros > rank
121 {
122 // instantly jump to the last searched position
123 super_block = self.last_super_block;
124 rank -= self.vec.super_blocks[super_block].zeros;
125
126 // check if current block contains the one and if yes, we don't need to search
127 // this is true IF the last_block is either the last block in a super block,
128 // in which case it must be this block, because we know the rank is within the super block,
129 // OR if the next block has a rank higher than the current rank
130 if self.last_block % (SUPER_BLOCK_SIZE / BLOCK_SIZE) == 15
131 || self.vec.blocks.len() > self.last_block + 1
132 && self.vec.blocks[self.last_block + 1].zeros as usize > rank
133 {
134 // instantly jump to the last searched position
135 block_index = self.last_block;
136 rank -= self.vec.blocks[block_index].zeros as usize;
137 }
138 } else {
139 super_block = self.vec.search_super_block0(super_block, rank);
140 self.last_super_block = super_block;
141 rank -= self.vec.super_blocks[super_block].zeros;
142 }
143
144 // if the block index is not zero, we already found the block, and need only update the word
145 if block_index == 0 {
146 block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
147 self.vec.search_block0(rank, &mut block_index);
148
149 self.last_block = block_index;
150 rank -= self.vec.blocks[block_index].zeros as usize;
151 }
152
153 self.next_rank += 1;
154 Some(self.vec.search_word_in_block0(rank, block_index))
155 }
156
157 /// Same implementation like ``select_next_0``, but backwards
158 fn select_next_0_back(&mut self) -> Option<usize> {
159 let mut rank = self.next_rank_back?;
160
161 if self.next_rank_back.is_none() || rank < self.next_rank {
162 return None;
163 }
164
165 let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_0;
166 let mut block_index = 0;
167
168 if self.vec.super_blocks[self.last_super_block_back].zeros < rank
169 {
170 // instantly jump to the last searched position
171 super_block = self.last_super_block_back;
172 rank -= self.vec.super_blocks[super_block].zeros;
173
174 // check if current block contains the one and if yes, we don't need to search
175 // this is true IF the zeros before the last block are less than the rank,
176 // since the block before then can't contain it
177 if self.vec.blocks[self.last_block_back].zeros as usize <= rank
178 {
179 // instantly jump to the last searched position
180 block_index = self.last_block_back;
181 rank -= self.vec.blocks[block_index].zeros as usize;
182 }
183 } else {
184 super_block = self.vec.search_super_block0(super_block, rank);
185 self.last_super_block_back = super_block;
186 rank -= self.vec.super_blocks[super_block].zeros;
187 }
188
189 // if the block index is not zero, we already found the block, and need only update the word
190 if block_index == 0 {
191 block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
192 self.vec.search_block0(rank, &mut block_index);
193
194 self.last_block_back = block_index;
195 rank -= self.vec.blocks[block_index].zeros as usize;
196 }
197
198 self.next_rank_back = self.next_rank_back.and_then(|x| if x > 0 { Some(x - 1) } else { None });
199 Some(self.vec.search_word_in_block0(rank, block_index))
200 }
201
202 #[must_use]
203 #[allow(clippy::assertions_on_constants)]
204 fn select_next_1(&mut self) -> Option<usize> {
205 let mut rank = self.next_rank;
206
207 if rank >= self.vec.rank1 || self.next_rank_back.is_none() || rank > self.next_rank_back.unwrap() {
208 return None;
209 }
210
211 let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_1;
212 let mut block_index = 0;
213
214 // check if the last super block still contains the rank, and if yes, we don't need to search
215 if self.vec.super_blocks.len() > (self.last_super_block + 1)
216 && (self.last_super_block + 1) * SUPER_BLOCK_SIZE
217 - self.vec.super_blocks[self.last_super_block + 1].zeros
218 > rank
219 {
220 // instantly jump to the last searched position
221 super_block = self.last_super_block;
222 let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
223 rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
224
225 // check if current block contains the one and if yes, we don't need to search
226 // this is true IF the last_block is either the last block in a super block,
227 // in which case it must be this block, because we know the rank is within the super block,
228 // OR if the next block has a rank higher than the current rank
229 if self.last_block % (SUPER_BLOCK_SIZE / BLOCK_SIZE) == 15
230 || self.vec.blocks.len() > self.last_block + 1
231 && (self.last_block + 1 - block_at_super_block) * BLOCK_SIZE
232 - self.vec.blocks[self.last_block + 1].zeros as usize
233 > rank
234 {
235 // instantly jump to the last searched position
236 block_index = self.last_block;
237 let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
238 rank -= (block_index - block_at_super_block) * BLOCK_SIZE
239 - self.vec.blocks[block_index].zeros as usize;
240 }
241 } else {
242 super_block = self.vec.search_super_block1(super_block, rank);
243
244 self.last_super_block = super_block;
245 rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
246 }
247
248 // if the block index is not zero, we already found the block, and need only update the word
249 if block_index == 0 {
250 // full binary search for block that contains the rank, manually loop-unrolled, because
251 // LLVM doesn't do it for us, but it gains just under 20% performance
252 let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
253 block_index = block_at_super_block;
254 self.vec
255 .search_block1(rank, block_at_super_block, &mut block_index);
256
257 self.last_block = block_index;
258 rank -= (block_index - block_at_super_block) * BLOCK_SIZE
259 - self.vec.blocks[block_index].zeros as usize;
260 }
261
262 self.next_rank += 1;
263 Some(self.vec.search_word_in_block1(rank, block_index))
264 }
265
266 #[must_use]
267 #[allow(clippy::assertions_on_constants)]
268 fn select_next_1_back(&mut self) -> Option<usize> {
269 let mut rank = self.next_rank_back?;
270
271 if self.next_rank_back.is_none() || rank < self.next_rank {
272 return None;
273 }
274
275 let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_1;
276 let mut block_index = 0;
277
278 // check if the last super block still contains the rank, and if yes, we don't need to search
279 if (self.last_super_block_back) * SUPER_BLOCK_SIZE
280 - self.vec.super_blocks[self.last_super_block_back].zeros
281 < rank
282 {
283 // instantly jump to the last searched position
284 super_block = self.last_super_block_back;
285 let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
286 rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
287
288 // check if current block contains the one and if yes, we don't need to search
289 // this is true IF the ones before the last block are less than the rank,
290 // since the block before then can't contain it
291 if (self.last_block_back - block_at_super_block) * BLOCK_SIZE
292 - self.vec.blocks[self.last_block_back].zeros as usize
293 <= rank
294 {
295 // instantly jump to the last searched position
296 block_index = self.last_block_back;
297 let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
298 rank -= (block_index - block_at_super_block) * BLOCK_SIZE
299 - self.vec.blocks[block_index].zeros as usize;
300 }
301 } else {
302 super_block = self.vec.search_super_block1(super_block, rank);
303
304 self.last_super_block_back = super_block;
305 rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
306 }
307
308 // if the block index is not zero, we already found the block, and need only update the word
309 if block_index == 0 {
310 // full binary search for block that contains the rank, manually loop-unrolled, because
311 // LLVM doesn't do it for us, but it gains just under 20% performance
312 let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
313 block_index = block_at_super_block;
314 self.vec
315 .search_block1(rank, block_at_super_block, &mut block_index);
316
317 self.last_block_back = block_index;
318 rank -= (block_index - block_at_super_block) * BLOCK_SIZE
319 - self.vec.blocks[block_index].zeros as usize;
320 }
321
322 self.next_rank_back = self.next_rank_back.and_then(|x| if x > 0 { Some(x - 1) } else { None });
323 Some(self.vec.search_word_in_block1(rank, block_index))
324 }
325
326 /// Advances the iterator by `n` elements. Returns an error if the iterator does not have
327 /// enough elements left. Does not call `next` internally.
328 /// This method is currently being added to the iterator trait, see
329 /// [this issue](https://github.com/rust-lang/rust/issues/77404).
330 /// As soon as it is stabilized, this method will be removed and replaced with a custom
331 /// implementation in the iterator impl.
332 pub(super) fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
333 if self.len() >= n {
334 self.next_rank += n;
335 Ok(())
336 } else {
337 let len = self.len();
338 self.next_rank += len;
339 Err(NonZeroUsize::new(n - len).unwrap())
340 }
341 }
342
343 /// Advances the iterator back by `n` elements. Returns an error if the iterator does not have
344 /// enough elements left. Does not call `next_back` internally.
345 /// This method is currently being added to the iterator trait, see
346 /// [this issue](https://github.com/rust-lang/rust/issues/77404).
347 /// As soon as it is stabilized, this method will be removed and replaced with a custom
348 /// implementation in the double ended iterator impl.
349 pub(super) fn advance_back_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
350 if self.len() >= n {
351 self.next_rank_back = self.next_rank_back.map(|x| x - n);
352 Ok(())
353 } else {
354 let len = self.len();
355 self.next_rank_back = self.next_rank_back.map(|x| x - len);
356 Err(NonZeroUsize::new(n - len).unwrap())
357 }
358 }
359 }
360
361 impl<$($life,)? const ZERO: bool> Iterator for $name<$($life,)? ZERO> {
362 type Item = usize;
363
364 fn next(&mut self) -> Option<Self::Item> {
365 if ZERO {
366 self.select_next_0()
367 } else {
368 self.select_next_1()
369 }
370 }
371
372 fn size_hint(&self) -> (usize, Option<usize>) {
373 (self.len(), Some(self.len()))
374 }
375
376 fn count(self) -> usize
377 where
378 Self: Sized,
379 {
380 self.len()
381 }
382
383 fn last(mut self) -> Option<Self::Item>
384 where
385 Self: Sized,
386 {
387 if self.len() == 0 {
388 None
389 } else {
390 self.advance_by(self.len() - 1)
391 .ok()
392 .and_then(|()| self.next())
393 }
394 }
395
396 fn nth(&mut self, n: usize) -> Option<Self::Item> {
397 if ZERO {
398 self.advance_by(n).ok().and_then(|()| self.select_next_0())
399 } else {
400 self.advance_by(n).ok().and_then(|()| self.select_next_1())
401 }
402 }
403 }
404
405 impl<$($life,)? const ZERO: bool> DoubleEndedIterator for $name<$($life,)? ZERO> {
406 fn next_back(&mut self) -> Option<Self::Item> {
407 if ZERO {
408 self.select_next_0_back()
409 } else {
410 self.select_next_1_back()
411 }
412 }
413
414 fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
415 if ZERO {
416 self.advance_back_by(n).ok().and_then(|()| self.select_next_0_back())
417 } else {
418 self.advance_back_by(n).ok().and_then(|()| self.select_next_1_back())
419 }
420 }
421 }
422
423 impl<$($life,)? const ZERO: bool> FusedIterator for $name<$($life,)? ZERO> {}
424
425 impl<$($life,)? const ZERO: bool> ExactSizeIterator for $name<$($life,)? ZERO> {
426 fn len(&self) -> usize {
427 self.next_rank_back.map(|x| x + 1).unwrap_or_default().saturating_sub(self.next_rank)
428 }
429 }
430 }
431}
432
433/// An iterator that iterates over 1-bits or 0-bits and returns their indices.
434/// It uses the select data structures to speed up iteration.
435/// It is faster than iterating over all bits if the iterated bits are sparse.
436/// This is also faster than manually calling `select` on each rank,
437/// because the iterator exploits the linear access pattern for faster select queries.
438///
439/// The iterator can be constructed by calling [`iter0`] or [`iter1`].
440///
441/// # Example
442/// ```rust
443/// use vers_vecs::{BitVec, RsVec};
444///
445/// let mut bit_vec = BitVec::new();
446/// bit_vec.append_word(u64::MAX);
447/// bit_vec.append_word(u64::MAX);
448/// bit_vec.flip_bit(4);
449///
450/// let rs_vec = RsVec::from_bit_vec(bit_vec);
451///
452/// let mut iter = rs_vec.iter0();
453///
454/// assert_eq!(iter.next(), Some(4));
455/// assert_eq!(iter.next(), None);
456/// ```
457///
458/// [`iter0`]: crate::RsVec::iter0
459/// [`iter1`]: crate::RsVec::iter1
460#[derive(Clone, Debug)]
461#[must_use]
462pub struct SelectIter<'a, const ZERO: bool> {
463 pub(crate) vec: &'a RsVec,
464 next_rank: usize,
465
466 // rank back is none, iff it points to element -1 (i.e. element 0 has been consumed by
467 // a call to next_back()). It can be Some(..) even if the iterator is empty
468 next_rank_back: Option<usize>,
469
470 /// the last index in the super block structure where we found a bit
471 last_super_block: usize,
472
473 last_super_block_back: usize,
474
475 /// the last index in the block structure where we found a bit
476 last_block: usize,
477
478 last_block_back: usize,
479}
480
481gen_iter_impl!('a, SelectIter);
482
483/// An iterator that iterates over 1-bits or 0-bits and returns their indices.
484/// It owns the iterated bit-vector.
485/// It uses the select data structures to speed up iteration.
486/// It is faster than iterating over all bits if the iterated bits are sparse.
487/// This is also faster than manually calling `select` on each rank,
488/// because the iterator exploits the linear access pattern for faster select queries.
489///
490/// The iterator can be constructed by calling [`into_iter0`] or [`into_iter1`].
491///
492/// # Example
493/// ```rust
494/// use vers_vecs::{BitVec, RsVec};
495///
496/// let mut bit_vec = BitVec::new();
497/// bit_vec.append_word(u64::MAX);
498/// bit_vec.append_word(u64::MAX);
499/// bit_vec.flip_bit(4);
500///
501/// let rs_vec = RsVec::from_bit_vec(bit_vec);
502///
503/// let mut iter = rs_vec.iter0();
504///
505/// assert_eq!(iter.next(), Some(4));
506/// assert_eq!(iter.next(), None);
507/// ```
508///
509/// [`into_iter0`]: crate::RsVec::into_iter0
510/// [`into_iter1`]: crate::RsVec::into_iter1
511#[derive(Clone, Debug)]
512#[must_use]
513// the naming convention of other iterators is broken here, because SelectIter existed before
514// this owning iterator became necessary
515pub struct SelectIntoIter<const ZERO: bool> {
516 pub(crate) vec: RsVec,
517 next_rank: usize,
518
519 // rank back is none, iff it points to element -1 (i.e. element 0 has been consumed by
520 // a call to next_back()). It can be Some(..) even if the iterator is empty
521 next_rank_back: Option<usize>,
522
523 /// the last index in the super block structure where we found a bit
524 last_super_block: usize,
525
526 last_super_block_back: usize,
527
528 /// the last index in the block structure where we found a bit
529 last_block: usize,
530
531 last_block_back: usize,
532}
533
534gen_iter_impl!(SelectIntoIter);