Skip to main content

vers_vecs/wavelet/
mod.rs

1//! This module contains the implementation of a [wavelet matrix].
2//! The wavelet matrix is a data structure that encodes a sequence of `n` `k`-bit words
3//! into a matrix of bit vectors, allowing for fast rank and select queries on the encoded sequence.
4//!
5//! This implementation further supports quantile queries, and range-predecessor and -successor queries.
6//! All operations are `O(k)` time complexity.
7//!
8//! See the struct documentation for more information.
9//!
10//! [wavelet matrix]: WaveletMatrix
11
12use crate::util::impl_vector_iterator;
13use crate::{BitVec, RsVec};
14use std::mem;
15use std::ops::Range;
16
17/// A wavelet matrix implementation implemented as described in
18/// [Navarro and Claude, 2021](http://dx.doi.org/10.1007/978-3-642-34109-0_18).
19/// The implementation is designed to allow for extremely large alphabet sizes, without
20/// sacrificing performance for small alphabets.
21///
22/// There are two constructor algorithms available:
23/// - [`from_bit_vec`] and [`from_slice`] construct the wavelet matrix by repeatedly sorting the elements.
24///   These constructors have linear space overhead and run in `O(kn * log n)` time complexity.
25/// - [`from_bit_vec_pc`] and [`from_slice_pc`] construct the wavelet matrix by counting the
26///   prefixes of the elements. These constructors have a space complexity of `O(2^k)` and run
27///   in `O(kn)`, which makes this constructor preferable for large sequences over small alphabets.
28///
29/// They encode a sequence of `n` `k`-bit words into a wavelet matrix which supports constant-time
30/// rank and select queries on elements of its `k`-bit alphabet.
31/// All query functions are mirrored for both `BitVec` and `u64` query elements, so
32/// if `k <= 64`, no heap allocation is needed for the query element.
33///
34/// Other than rank and select queries, the matrix also supports quantile queries (range select i), and
35/// range-predecessor and -successor queries, all of which are loosely based on
36/// [Külekci and Thankachan](https://doi.org/10.1016/j.jda.2017.01.002) with better time complexity.
37///
38/// All operations implemented on the matrix are `O(k)` time complexity.
39/// The space complexity of the wavelet matrix is `O(n * k)` with a small linear overhead
40/// (see [`RsVec`]).
41///
42/// # Examples
43/// ```
44/// use vers_vecs::{BitVec, WaveletMatrix};
45///
46/// // pack elements from a 3-bit alphabet into a bit vector and construct a wavelet matrix from them
47/// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
48/// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
49///
50/// // query the wavelet matrix
51/// assert_eq!(wavelet_matrix.get_u64(0), Some(1));
52/// assert_eq!(wavelet_matrix.get_u64(1), Some(4));
53///
54// // rank and select queries
55/// assert_eq!(wavelet_matrix.rank_u64(3, 4), Some(2));
56/// assert_eq!(wavelet_matrix.rank_u64(3, 1), Some(1));
57/// assert_eq!(wavelet_matrix.select_u64(0, 7), Some(5));
58///
59/// // statistics
60/// assert_eq!(wavelet_matrix.range_median_u64(0..3), Some(4));
61/// assert_eq!(wavelet_matrix.predecessor_u64(0..6, 3), Some(2));
62/// ```
63///
64/// [`RsVec`]: RsVec
65/// [`from_bit_vec`]: WaveletMatrix::from_bit_vec
66/// [`from_slice`]: WaveletMatrix::from_slice
67/// [`from_bit_vec_pc`]: WaveletMatrix::from_bit_vec_pc
68/// [`from_slice_pc`]: WaveletMatrix::from_slice_pc
69#[derive(Clone, Debug)]
70#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
71#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemSize, mem_dbg::MemDbg))]
72pub struct WaveletMatrix {
73    data: Box<[RsVec]>,
74}
75
76impl WaveletMatrix {
77    /// Generic constructor that constructs the wavelet matrix by repeatedly sorting the elements.
78    /// The runtime complexity is `O(kn * log n)`.
79    ///
80    /// # Parameters
81    /// - `bits_per_element`: The number of bits in each word. Cannot exceed 1 << 16.
82    /// - `num_elements`: The number of elements in the sequence.
83    /// - `bit_lookup`: A closure that returns the `bit`-th bit of the `element`-th word.
84    #[inline(always)] // should get rid of closures in favor of static calls
85    fn permutation_sorting<LOOKUP: Fn(usize, usize) -> u64>(
86        bits_per_element: u16,
87        num_elements: usize,
88        bit_lookup: LOOKUP,
89    ) -> Self {
90        let element_len = bits_per_element as usize;
91
92        let mut data = vec![BitVec::from_zeros(num_elements); element_len];
93
94        // insert the first bit of each word into the first bit vector
95        // for each following level, insert the next bit of each word into the next bit vector
96        // sorted stably by the previous bit vector
97        let mut permutation = (0..num_elements).collect::<Vec<_>>();
98        let mut next_permutation = vec![0; num_elements];
99
100        for (level, data) in data.iter_mut().enumerate() {
101            let mut total_zeros = 0;
102            for (i, p) in permutation.iter().enumerate() {
103                if bit_lookup(*p, element_len - level - 1) == 0 {
104                    total_zeros += 1;
105                } else {
106                    data.set(i, 1).unwrap();
107                }
108            }
109
110            // scan through the generated bit array and move the elements to the correct position
111            // for the next permutation
112            if level < element_len - 1 {
113                let mut zero_boundary = 0;
114                let mut one_boundary = total_zeros;
115                for (i, p) in permutation.iter().enumerate() {
116                    if data.get_unchecked(i) == 0 {
117                        next_permutation[zero_boundary] = *p;
118                        zero_boundary += 1;
119                    } else {
120                        next_permutation[one_boundary] = *p;
121                        one_boundary += 1;
122                    }
123                }
124
125                mem::swap(&mut permutation, &mut next_permutation);
126            }
127        }
128
129        Self {
130            data: data.into_iter().map(BitVec::into).collect(),
131        }
132    }
133
134    /// Create a new wavelet matrix from a sequence of `n` `k`-bit words.
135    /// The constructor runs in `O(kn * log n)` time complexity.
136    ///
137    /// # Parameters
138    /// - `bit_vec`: A packed sequence of `n` `k`-bit words. The `i`-th word begins in the
139    ///   `bits_per_element * i`-th bit of the bit vector. Words are stored from least to most
140    ///   significant bit.
141    /// - `bits_per_element`: The number `k` of bits in each word. Cannot exceed 1 << 16.
142    ///
143    /// # Panics
144    /// Panics if the number of bits in the bit vector is not a multiple of the number of bits per element.
145    #[must_use]
146    pub fn from_bit_vec(bit_vec: &BitVec, bits_per_element: u16) -> Self {
147        assert_eq!(bit_vec.len() % bits_per_element as usize, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element.");
148        let num_elements = bit_vec.len() / bits_per_element as usize;
149        Self::permutation_sorting(bits_per_element, num_elements, |element, bit| {
150            bit_vec.get_unchecked(element * bits_per_element as usize + bit)
151        })
152    }
153
154    /// Create a new wavelet matrix from a sequence of `n` `k`-bit words.
155    /// The constructor runs in `O(kn * log n)` time complexity.
156    ///
157    /// # Parameters
158    /// - `sequence`: A slice of `n` u64 values, each encoding a `k`-bit word.
159    /// - `bits_per_element`: The number `k` of bits in each word. Cannot exceed 64.
160    ///
161    /// # Panics
162    /// Panics if the number of bits per element exceeds 64.
163    #[must_use]
164    pub fn from_slice(sequence: &[u64], bits_per_element: u16) -> Self {
165        assert!(
166            bits_per_element <= 64,
167            "The number of bits per element cannot exceed 64."
168        );
169        Self::permutation_sorting(bits_per_element, sequence.len(), |element, bit| {
170            (sequence[element] >> bit) & 1
171        })
172    }
173
174    /// Generic constructor that constructs the wavelet matrix by counting the prefixes of the elements.
175    /// The runtime complexity is `O(kn)`.
176    /// This constructor is only recommended for small alphabets.
177    ///
178    /// # Parameters
179    /// - `bits_per_element`: The number of bits in each word. Cannot exceed 64.
180    /// - `num_elements`: The number of elements in the sequence.
181    /// - `bit_lookup`: A closure that returns the `bit`-th bit of the `element`-th word.
182    /// - `element_lookup`: A closure that returns the `element`-th word.
183    #[inline(always)] // should get rid of closures in favor of static calls
184    fn prefix_counting<LOOKUP: Fn(usize, usize) -> u64, ELEMENT: Fn(usize) -> u64>(
185        bits_per_element: u16,
186        num_elements: usize,
187        bit_lookup: LOOKUP,
188        element_lookup: ELEMENT,
189    ) -> Self {
190        let element_len = bits_per_element as usize;
191        let mut histogram = vec![0usize; 1 << bits_per_element];
192        let mut borders = vec![0usize; 1 << bits_per_element];
193        let mut data = vec![BitVec::from_zeros(num_elements); element_len];
194
195        for i in 0..num_elements {
196            histogram[element_lookup(i) as usize] += 1;
197            data[0].set_unchecked(i, bit_lookup(i, element_len - 1));
198        }
199
200        for level in (1..element_len).rev() {
201            // combine histograms of prefixes
202            for h in 0..1 << level {
203                histogram[h] = histogram[2 * h] + histogram[2 * h + 1];
204            }
205
206            // compute borders of current level, using bit reverse patterns because of the weird
207            // node ordering in wavelet matrices
208            borders[0] = 0;
209            for h in 1usize..1 << level {
210                let h_minus_1 = (h - 1).reverse_bits() >> (64 - level);
211                borders[h.reverse_bits() >> (64 - level)] =
212                    borders[h_minus_1] + histogram[h_minus_1];
213            }
214
215            for i in 0..num_elements {
216                let bit = bit_lookup(i, element_len - level - 1);
217                data[level].set_unchecked(
218                    borders[element_lookup(i) as usize >> (element_len - level)],
219                    bit,
220                );
221                borders[element_lookup(i) as usize >> (element_len - level)] += 1;
222            }
223        }
224
225        Self {
226            data: data.into_iter().map(BitVec::into).collect(),
227        }
228    }
229
230    /// Create a new wavelet matrix from a sequence of `n` `k`-bit words using the prefix counting
231    /// algorithm [Dinklage et al.](https://doi.org/10.1145/3457197)
232    /// The constructor runs in `O(kn)` time complexity but requires `O(2^k)` space during construction,
233    /// so it is only recommended for small alphabets.
234    /// Use the [`from_bit_vec`] or [`from_slice`] constructors for larger alphabets.
235    ///
236    /// # Parameters
237    /// - `bit_vec`: A packed sequence of `n` `k`-bit words. The `i`-th word begins in the
238    ///   `bits_per_element * i`-th bit of the bit vector. Words are stored from least to most
239    ///   significant bit.
240    /// - `bits_per_element`: The number `k` of bits in each word. Cannot exceed 1 << 16.
241    ///
242    /// # Panics
243    /// Panics if the number of bits in the bit vector is not a multiple of the number of bits per element,
244    /// or if the number of bits per element exceeds 64.
245    ///
246    /// [`from_bit_vec`]: WaveletMatrix::from_bit_vec
247    /// [`from_slice`]: WaveletMatrix::from_slice
248    #[must_use]
249    pub fn from_bit_vec_pc(bit_vec: &BitVec, bits_per_element: u16) -> Self {
250        assert_eq!(bit_vec.len() % bits_per_element as usize, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element.");
251        assert!(
252            bits_per_element <= 64,
253            "The number of bits per element cannot exceed 64."
254        );
255        let num_elements = bit_vec.len() / bits_per_element as usize;
256        Self::prefix_counting(
257            bits_per_element,
258            num_elements,
259            |element, bit| bit_vec.get_unchecked(element * bits_per_element as usize + bit),
260            |element| {
261                bit_vec.get_bits_unchecked(
262                    element * bits_per_element as usize,
263                    bits_per_element as usize,
264                )
265            },
266        )
267    }
268
269    /// Create a new wavelet matrix from a sequence of `n` `k`-bit words using the prefix counting
270    /// algorithm [Dinklage et al.](https://doi.org/10.1145/3457197)
271    /// The constructor runs in `O(kn)` time complexity but requires `O(2^k)` space during construction,
272    /// so it is only recommended for small alphabets.
273    /// Use the [`from_bit_vec`] or [`from_slice`] constructors for larger alphabets.
274    ///
275    /// # Parameters
276    /// - `sequence`: A slice of `n` u64 values, each encoding a `k`-bit word.
277    /// - `bits_per_element`: The number `k` of bits in each word. Cannot exceed 64.
278    ///
279    /// # Panics
280    /// Panics if the number of bits per element exceeds 64.
281    ///
282    /// [`from_bit_vec`]: WaveletMatrix::from_bit_vec
283    /// [`from_slice`]: WaveletMatrix::from_slice
284    #[must_use]
285    pub fn from_slice_pc(sequence: &[u64], bits_per_element: u16) -> Self {
286        assert!(
287            bits_per_element <= 64,
288            "The number of bits per element cannot exceed 64."
289        );
290        Self::prefix_counting(
291            bits_per_element,
292            sequence.len(),
293            |element, bit| (sequence[element] >> bit) & 1,
294            |element| sequence[element],
295        )
296    }
297
298    /// Generic function to read a value from the wavelet matrix and consume it with a closure.
299    /// The function is used by the `get_value` and `get_u64` functions, deduplicating code.
300    #[inline(always)]
301    fn reconstruct_value_unchecked<F: FnMut(u64)>(&self, mut i: usize, mut target_func: F) {
302        for level in 0..self.bits_per_element() {
303            let bit = self.data[level].get_unchecked(i);
304            target_func(bit);
305            if bit == 0 {
306                i = self.data[level].rank0(i);
307            } else {
308                i = self.data[level].rank0 + self.data[level].rank1(i);
309            }
310        }
311    }
312
313    /// Get the `i`-th element of the encoded sequence in a `k`-bit word.
314    /// The `k`-bit word is returned as a `BitVec`.
315    /// The first element of the bit vector is the least significant bit.
316    ///
317    /// Returns `None` if the index is out of bounds.
318    ///
319    /// # Example
320    /// ```
321    /// use vers_vecs::{BitVec, WaveletMatrix};
322    ///
323    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
324    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
325    ///
326    /// assert_eq!(wavelet_matrix.get_value(0), Some(BitVec::pack_sequence_u8(&[1], 3)));
327    /// assert_eq!(wavelet_matrix.get_value(1), Some(BitVec::pack_sequence_u8(&[4], 3)));
328    /// assert_eq!(wavelet_matrix.get_value(100), None);
329    /// ```
330    #[must_use]
331    pub fn get_value(&self, i: usize) -> Option<BitVec> {
332        if self.data.is_empty() || i >= self.data[0].len() {
333            None
334        } else {
335            Some(self.get_value_unchecked(i))
336        }
337    }
338
339    /// Get the `i`-th element of the encoded sequence in a `k`-bit word.
340    /// The `k`-bit word is returned as a `BitVec`.
341    /// The first element of the bit vector is the least significant bit.
342    /// This function does not perform bounds checking.
343    /// Use [`get_value`] for a checked version.
344    ///
345    /// # Panics
346    /// May panic if the index is out of bounds. May instead return an empty bit vector.
347    ///
348    /// [`get_value`]: WaveletMatrix::get_value
349    #[must_use]
350    pub fn get_value_unchecked(&self, i: usize) -> BitVec {
351        let mut value = BitVec::from_zeros(self.bits_per_element());
352        let mut level = self.bits_per_element() - 1;
353        self.reconstruct_value_unchecked(i, |bit| {
354            value.set_unchecked(level, bit);
355            level = level.saturating_sub(1);
356        });
357        value
358    }
359
360    /// Get the `i`-th element of the encoded sequence as a `u64`.
361    /// The `u64` is constructed from the `k`-bit word stored in the wavelet matrix.
362    ///
363    /// Returns `None` if the index is out of bounds, or if the number of bits per element exceeds 64.
364    ///
365    /// # Example
366    /// ```
367    /// use vers_vecs::{BitVec, WaveletMatrix};
368    ///
369    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
370    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
371    ///
372    /// assert_eq!(wavelet_matrix.get_u64(0), Some(1));
373    /// assert_eq!(wavelet_matrix.get_u64(1), Some(4));
374    /// assert_eq!(wavelet_matrix.get_u64(100), None);
375    /// ```
376    #[must_use]
377    pub fn get_u64(&self, i: usize) -> Option<u64> {
378        if self.bits_per_element() > 64 || self.data.is_empty() || i >= self.data[0].len() {
379            None
380        } else {
381            Some(self.get_u64_unchecked(i))
382        }
383    }
384
385    /// Get the `i`-th element of the encoded sequence as a `u64` numeral.
386    /// The element is encoded in the lowest `k` bits of the numeral.
387    /// If the number of bits per element exceeds 64, the value is truncated.
388    /// This function does not perform bounds checking.
389    /// Use [`get_u64`] for a checked version.
390    ///
391    /// # Panic
392    /// May panic if the value of `i` is out of bounds. May instead return 0.
393    ///
394    /// [`get_u64`]: WaveletMatrix::get_u64
395    #[must_use]
396    pub fn get_u64_unchecked(&self, i: usize) -> u64 {
397        let mut value = 0;
398        self.reconstruct_value_unchecked(i, |bit| {
399            value <<= 1;
400            value |= bit;
401        });
402        value
403    }
404
405    /// Get the number of occurrences of the given `symbol` in the encoded sequence in the `range`.
406    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
407    /// where the least significant bit is the first element, and `k` is the number of bits per element
408    /// in the wavelet matrix.
409    ///
410    /// This method does not perform bounds checking, nor does it check if the `symbol` is a valid
411    /// `k`-bit word.
412    /// Use [`rank_range`] for a checked version.
413    ///
414    /// # Panics
415    /// May panic if the `range` is out of bounds,
416    /// or if the number of bits in `symbol` is lower than `k`.
417    /// May instead return 0.
418    ///
419    /// [`BitVec`]: BitVec
420    /// [`rank_range`]: WaveletMatrix::rank_range
421    #[must_use]
422    pub fn rank_range_unchecked(&self, mut range: Range<usize>, symbol: &BitVec) -> usize {
423        for (level, data) in self.data.iter().enumerate() {
424            if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 {
425                range.start = data.rank0(range.start);
426                range.end = data.rank0(range.end);
427            } else {
428                range.start = data.rank0 + data.rank1(range.start);
429                range.end = data.rank0 + data.rank1(range.end);
430            }
431        }
432
433        range.end - range.start
434    }
435
436    /// Get the number of occurrences of the given `symbol` in the encoded sequence in the `range`.
437    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
438    /// where the least significant bit is the first element, and `k` is the number of bits per element
439    /// in the wavelet matrix.
440    ///
441    /// Returns `None` if the `range` is out of bounds (greater than the length of the encoded sequence,
442    /// but since it is exclusive, it may be equal to the length),
443    /// or if the number of bits in `symbol` is not equal to `k`.
444    ///
445    /// # Example
446    /// ```
447    /// use vers_vecs::{BitVec, WaveletMatrix};
448    ///
449    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
450    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
451    ///
452    /// assert_eq!(wavelet_matrix.rank_range(0..3, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
453    /// assert_eq!(wavelet_matrix.rank_range(2..4, &BitVec::pack_sequence_u8(&[4], 3)), Some(1));
454    /// ```
455    ///
456    /// [`BitVec`]: BitVec
457    #[must_use]
458    pub fn rank_range(&self, range: Range<usize>, symbol: &BitVec) -> Option<usize> {
459        if range.start >= self.len()
460            || range.end > self.len()
461            || symbol.len() != self.bits_per_element()
462        {
463            None
464        } else {
465            Some(self.rank_range_unchecked(range, symbol))
466        }
467    }
468
469    /// Get the number of occurrences of the given `symbol` in the encoded sequence in the `range`.
470    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
471    /// where k is less than or equal to 64.
472    /// The interval is half-open, meaning `rank_range_u64(0..0, symbol)` returns 0.
473    ///
474    /// This method does not perform bounds checking, nor does it check if the elements of the
475    /// wavelet matrix can be represented in a u64 numeral.
476    /// Use [`rank_range_u64`] for a checked version.
477    ///
478    /// # Panics
479    /// May panic if the `range` is out of bounds.
480    /// May instead return 0.
481    /// If the number of bits in wavelet matrix elements exceed `64`, the behavior is
482    /// platform-dependent.
483    ///
484    /// [`rank_range_u64`]: WaveletMatrix::rank_range_u64
485    #[must_use]
486    pub fn rank_range_u64_unchecked(&self, mut range: Range<usize>, symbol: u64) -> usize {
487        for (level, data) in self.data.iter().enumerate() {
488            if (symbol >> ((self.bits_per_element() - 1) - level)) & 1 == 0 {
489                range.start = data.rank0(range.start);
490                range.end = data.rank0(range.end);
491            } else {
492                range.start = data.rank0 + data.rank1(range.start);
493                range.end = data.rank0 + data.rank1(range.end);
494            }
495        }
496
497        range.end - range.start
498    }
499
500    /// Get the number of occurrences of the given `symbol` in the encoded sequence in the `range`.
501    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
502    /// where k is less than or equal to 64.
503    /// The interval is half-open, meaning `rank_range_u64(0..0, symbol)` returns 0.
504    ///
505    /// Returns `None` if the `range` is out of bounds (greater than the length of the encoded sequence,
506    /// but since it is exclusive, it may be equal to the length),
507    /// or if the number of bits in the wavelet matrix elements exceed `64`.
508    ///
509    /// # Example
510    /// ```
511    /// use vers_vecs::{BitVec, WaveletMatrix};
512    ///
513    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
514    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
515    ///
516    /// assert_eq!(wavelet_matrix.rank_range_u64(0..3, 4), Some(2));
517    /// assert_eq!(wavelet_matrix.rank_range_u64(2..4, 4), Some(1));
518    /// ```
519    #[must_use]
520    pub fn rank_range_u64(&self, range: Range<usize>, symbol: u64) -> Option<usize> {
521        if range.start >= self.len() || range.end > self.len() || self.bits_per_element() > 64 {
522            None
523        } else {
524            Some(self.rank_range_u64_unchecked(range, symbol))
525        }
526    }
527
528    /// Get the number of occurrences of the given `symbol` in the encoded sequence between the
529    /// `offset`-th and `i`-th element (exclusive).
530    /// This is equivalent to ```rank_range_unchecked(offset..i, symbol)```.
531    /// The interval is half-open, meaning ```rank_offset_unchecked(0, 0, symbol)``` returns 0.
532    ///
533    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
534    /// where the least significant bit is the first element, and `k` is the number of bits per element
535    /// in the wavelet matrix.
536    ///
537    /// This method does not perform bounds checking, nor does it check if the `symbol` is a valid
538    /// `k`-bit word.
539    /// Use [`rank_offset`] for a checked version.
540    ///
541    /// # Panics
542    /// May panic if `offset` is out of bounds,
543    /// or if `offset + i` is larger than the length of the encoded sequence,
544    /// or if `offset` is greater than `i`,
545    /// or if the number of bits in `symbol` is lower than `k`.
546    /// May instead return 0.
547    ///
548    /// [`BitVec`]: BitVec
549    /// [`rank_offset`]: WaveletMatrix::rank_offset
550    #[must_use]
551    pub fn rank_offset_unchecked(&self, offset: usize, i: usize, symbol: &BitVec) -> usize {
552        self.rank_range_unchecked(offset..i, symbol)
553    }
554
555    /// Get the number of occurrences of the given `symbol` in the encoded sequence between the
556    /// `offset`-th and `i`-th element (exclusive).
557    /// This is equivalent to ``rank_range(offset..i, symbol)``.
558    /// The interval is half-open, meaning ``rank_offset(0, 0, symbol)`` returns 0,
559    /// because the interval is empty.
560    ///
561    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
562    /// where the least significant bit is the first element, and `k` is the number of bits per element
563    /// in the wavelet matrix.
564    ///
565    /// Returns `None` if `offset` is out of bounds,
566    /// or if `i` is larger than the length of the encoded sequence,
567    /// or if `offset` is greater than `i`,
568    /// or if the number of bits in `symbol` is not equal to `k`.
569    /// `i` may equal the length of the encoded sequence,
570    /// which will return the number of occurrences of the symbol up to the end of the sequence.
571    ///
572    /// # Example
573    /// ```
574    /// use vers_vecs::{BitVec, WaveletMatrix};
575    ///
576    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
577    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
578    ///
579    /// assert_eq!(wavelet_matrix.rank_offset(0, 3, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
580    /// assert_eq!(wavelet_matrix.rank_offset(2, 4, &BitVec::pack_sequence_u8(&[4], 3)), Some(1));
581    /// ```
582    ///
583    /// [`BitVec`]: BitVec
584    #[must_use]
585    pub fn rank_offset(&self, offset: usize, i: usize, symbol: &BitVec) -> Option<usize> {
586        if offset > i
587            || offset >= self.len()
588            || i > self.len()
589            || symbol.len() != self.bits_per_element()
590        {
591            None
592        } else {
593            Some(self.rank_offset_unchecked(offset, i, symbol))
594        }
595    }
596
597    /// Get the number of occurrences of the given `symbol` in the encoded sequence between the
598    /// `offset`-th and `i`-th element (exclusive).
599    /// This is equivalent to ``rank_range(offset..i, symbol)``.
600    /// The interval is half-open, meaning ``rank_offset(0, 0, symbol)`` returns 0.
601    ///
602    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
603    /// where k is less than or equal to 64.
604    ///
605    /// This method does not perform bounds checking, nor does it check if the elements of the
606    /// wavelet matrix can be represented in a u64 numeral.
607    /// Use [`rank_offset_u64`] for a checked version.
608    ///
609    /// # Panics
610    /// May panic if `offset` is out of bounds,
611    /// or if `i` is larger than the length of the encoded sequence,
612    /// or if `offset` is greater than `i`,
613    /// or if the number of bits in wavelet matrix elements exceed `64`.
614    /// May instead return 0.
615    ///
616    /// [`rank_offset_u64`]: WaveletMatrix::rank_offset_u64
617    #[must_use]
618    pub fn rank_offset_u64_unchecked(&self, offset: usize, i: usize, symbol: u64) -> usize {
619        self.rank_range_u64_unchecked(offset..i, symbol)
620    }
621
622    /// Get the number of occurrences of the given `symbol` in the encoded sequence between the
623    /// `offset`-th and `i`-th element (exclusive).
624    /// This is equivalent to ``rank_range(offset..i, symbol)``.
625    /// The interval is half-open, meaning ``rank_offset(0, 0, symbol)`` returns 0.
626    ///
627    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
628    /// where k is less than or equal to 64.
629    ///
630    /// Returns `None` if `offset` is out of bounds,
631    /// or if `i` is larger than the length of the encoded sequence,
632    /// or if `offset` is greater than `i`,
633    /// or if the number of bits in the wavelet matrix elements exceed `64`.
634    /// `i` may equal the length of the encoded sequence,
635    /// which will return the number of occurrences of the symbol up to the end of the sequence.
636    ///
637    /// # Example
638    /// ```
639    /// use vers_vecs::{BitVec, WaveletMatrix};
640    ///
641    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
642    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
643    ///
644    /// assert_eq!(wavelet_matrix.rank_offset_u64(0, 3, 4), Some(2));
645    /// assert_eq!(wavelet_matrix.rank_offset_u64(2, 4, 4), Some(1));
646    /// ```
647    #[must_use]
648    pub fn rank_offset_u64(&self, offset: usize, i: usize, symbol: u64) -> Option<usize> {
649        if offset > i || offset >= self.len() || i > self.len() || self.bits_per_element() > 64 {
650            None
651        } else {
652            Some(self.rank_offset_u64_unchecked(offset, i, symbol))
653        }
654    }
655
656    /// Get the number of occurrences of the given `symbol` in the encoded sequence up to the `i`-th
657    /// element (exclusive).
658    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
659    /// where the least significant bit is the first element, and `k` is the number of bits per element
660    /// in the wavelet matrix.
661    ///
662    /// This method does not perform bounds checking, nor does it check if the `symbol` is a valid
663    /// `k`-bit word.
664    /// Use [`rank`] for a checked version.
665    ///
666    /// # Panics
667    /// May panic if `i` is out of bounds, or if the number of bits in `symbol` is lower than `k`.
668    /// May instead return 0.
669    /// If the number of bits in `symbol` exceeds `k`, the remaining bits are ignored.
670    ///
671    /// [`BitVec`]: BitVec
672    /// [`rank`]: WaveletMatrix::rank
673    #[must_use]
674    pub fn rank_unchecked(&self, i: usize, symbol: &BitVec) -> usize {
675        self.rank_range_unchecked(0..i, symbol)
676    }
677
678    /// Get the number of occurrences of the given `symbol` in the encoded sequence up to the `i`-th
679    /// element (exclusive).
680    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
681    /// where the least significant bit is the first element, and `k` is the number of bits per element
682    /// in the wavelet matrix.
683    ///
684    /// Returns `None` if `i` is out of bounds (greater than the length of the encoded sequence, but
685    /// since it is exclusive, it may be equal to the length),
686    /// or if the number of bits in `symbol` is not equal to `k`.
687    ///
688    /// # Example
689    /// ```
690    /// use vers_vecs::{BitVec, WaveletMatrix};
691    ///
692    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
693    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
694    ///
695    /// assert_eq!(wavelet_matrix.rank(3, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
696    /// assert_eq!(wavelet_matrix.rank(3, &BitVec::pack_sequence_u8(&[1], 3)), Some(1));
697    /// ```
698    ///
699    /// [`BitVec`]: BitVec
700    #[must_use]
701    pub fn rank(&self, i: usize, symbol: &BitVec) -> Option<usize> {
702        if i > self.len() || symbol.len() != self.bits_per_element() {
703            None
704        } else {
705            Some(self.rank_range_unchecked(0..i, symbol))
706        }
707    }
708
709    /// Get the number of occurrences of the given `symbol` in the encoded sequence up to the `i`-th
710    /// element (exclusive).
711    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
712    /// where k is less than or equal to 64.
713    ///
714    /// This method does not perform bounds checking, nor does it check if the elements of the
715    /// wavelet matrix can be represented in a u64 numeral.
716    /// Use [`rank_u64`] for a checked version.
717    ///
718    /// # Panics
719    /// May panic if `i` is out of bounds,
720    /// or if the number of bits in wavelet matrix elements exceed `64`.
721    /// May instead return 0.
722    ///
723    /// [`rank_u64`]: WaveletMatrix::rank_u64
724    #[must_use]
725    pub fn rank_u64_unchecked(&self, i: usize, symbol: u64) -> usize {
726        self.rank_range_u64_unchecked(0..i, symbol)
727    }
728
729    /// Get the number of occurrences of the given `symbol` in the encoded sequence up to the `i`-th
730    /// element (exclusive).
731    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
732    /// where k is less than or equal to 64.
733    ///
734    /// Returns `None` if `i` is out of bounds (greater than the length of the encoded sequence, but
735    /// since it is exclusive, it may be equal to the length),
736    /// or if the number of bits in the wavelet matrix elements exceed `64`.
737    ///
738    /// # Example
739    /// ```
740    /// use vers_vecs::{BitVec, WaveletMatrix};
741    ///
742    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
743    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
744    ///
745    /// assert_eq!(wavelet_matrix.rank_u64(3, 4), Some(2));
746    /// assert_eq!(wavelet_matrix.rank_u64(3, 1), Some(1));
747    /// ```
748    #[must_use]
749    pub fn rank_u64(&self, i: usize, symbol: u64) -> Option<usize> {
750        if i > self.len() || self.bits_per_element() > 64 {
751            None
752        } else {
753            Some(self.rank_range_u64_unchecked(0..i, symbol))
754        }
755    }
756
757    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence,
758    /// starting from the `offset`-th element.
759    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
760    /// where the least significant bit is the first element, and `k` is the number of bits per element
761    /// in the wavelet matrix.
762    ///
763    /// This method does not perform bounds checking, nor does it check if the `symbol` is a valid
764    /// `k`-bit word.
765    /// Use [`select_offset`] for a checked version.
766    ///
767    /// Returns the index of the `rank`-th occurrence of the `symbol` in the encoded sequence,
768    /// or the length of the encoded sequence if the `rank`-th occurrence does not exist.
769    ///
770    /// # Panics
771    /// May panic if the `offset` is out of bounds,
772    /// or if the number of bits in `symbol` is lower than `k`.
773    /// May instead return the length of the encoded sequence.
774    ///
775    /// [`BitVec`]: BitVec
776    /// [`select_offset`]: WaveletMatrix::select_offset
777    #[must_use]
778    pub fn select_offset_unchecked(&self, offset: usize, rank: usize, symbol: &BitVec) -> usize {
779        let mut range_start = offset;
780
781        for (level, data) in self.data.iter().enumerate() {
782            if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 {
783                range_start = data.rank0(range_start);
784            } else {
785                range_start = data.rank0 + data.rank1(range_start);
786            }
787        }
788
789        let mut range_end = range_start + rank;
790
791        for (level, data) in self.data.iter().enumerate().rev() {
792            if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 {
793                range_end = data.select0(range_end);
794            } else {
795                range_end = data.select1(range_end - data.rank0);
796            }
797        }
798
799        range_end
800    }
801
802    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence,
803    /// starting from the `offset`-th element.
804    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
805    /// where the least significant bit is the first element, and `k` is the number of bits per element
806    /// in the wavelet matrix.
807    ///
808    /// Returns `None` if `offset` is out of bounds, or if the number of bits in `symbol` is not equal to `k`,
809    /// or if the `rank`-th occurrence of the `symbol` does not exist.
810    ///
811    /// # Example
812    /// ```
813    /// use vers_vecs::{BitVec, WaveletMatrix};
814    ///
815    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
816    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
817    ///
818    /// assert_eq!(wavelet_matrix.select_offset(0, 0, &BitVec::pack_sequence_u8(&[4], 3)), Some(1));
819    /// assert_eq!(wavelet_matrix.select_offset(0, 1, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
820    /// assert_eq!(wavelet_matrix.select_offset(2, 0, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
821    /// assert_eq!(wavelet_matrix.select_offset(2, 1, &BitVec::pack_sequence_u8(&[4], 3)), None);
822    /// ```
823    ///
824    /// [`BitVec`]: BitVec
825    #[must_use]
826    pub fn select_offset(&self, offset: usize, rank: usize, symbol: &BitVec) -> Option<usize> {
827        if offset >= self.len() || symbol.len() != self.bits_per_element() {
828            None
829        } else {
830            let idx = self.select_offset_unchecked(offset, rank, symbol);
831            if idx < self.len() {
832                Some(idx)
833            } else {
834                None
835            }
836        }
837    }
838
839    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence,
840    /// starting from the `offset`-th element.
841    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
842    /// where k is less than or equal to 64.
843    ///
844    /// This method does not perform bounds checking, nor does it check if the elements of the
845    /// wavelet matrix can be represented in a u64 numeral.
846    /// Use [`select_offset_u64`] for a checked version.
847    ///
848    /// Returns the index of the `rank`-th occurrence of the `symbol` in the encoded sequence,
849    /// or the length of the encoded sequence if the `rank`-th occurrence does not exist.
850    ///
851    /// # Panics
852    /// May panic if the `offset` is out of bounds,
853    /// or if the number of bits in wavelet matrix elements exceed `64`.
854    /// May instead return the length of the encoded sequence.
855    ///
856    /// [`select_offset_u64`]: WaveletMatrix::select_offset_u64
857    #[must_use]
858    pub fn select_offset_u64_unchecked(&self, offset: usize, rank: usize, symbol: u64) -> usize {
859        let mut range_start = offset;
860
861        for (level, data) in self.data.iter().enumerate() {
862            if (symbol >> ((self.bits_per_element() - 1) - level)) & 1 == 0 {
863                range_start = data.rank0(range_start);
864            } else {
865                range_start = data.rank0 + data.rank1(range_start);
866            }
867        }
868
869        let mut range_end = range_start + rank;
870
871        for (level, data) in self.data.iter().enumerate().rev() {
872            if (symbol >> ((self.bits_per_element() - 1) - level)) & 1 == 0 {
873                range_end = data.select0(range_end);
874            } else {
875                range_end = data.select1(range_end - data.rank0);
876            }
877        }
878
879        range_end
880    }
881
882    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence,
883    /// starting from the `offset`-th element.
884    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
885    /// where k is less than or equal to 64.
886    ///
887    /// Returns `None` if `offset` is out of bounds, or if the number of bits in the wavelet matrix
888    /// elements exceed `64`, or if the `rank`-th occurrence of the `symbol` does not exist.
889    ///
890    /// # Example
891    /// ```
892    /// use vers_vecs::{BitVec, WaveletMatrix};
893    ///
894    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
895    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
896    ///
897    /// assert_eq!(wavelet_matrix.select_offset_u64(0, 0, 4), Some(1));
898    /// assert_eq!(wavelet_matrix.select_offset_u64(0, 1, 4), Some(2));
899    /// assert_eq!(wavelet_matrix.select_offset_u64(2, 0, 4), Some(2));
900    /// assert_eq!(wavelet_matrix.select_offset_u64(2, 1, 4), None);
901    /// ```
902    #[must_use]
903    pub fn select_offset_u64(&self, offset: usize, rank: usize, symbol: u64) -> Option<usize> {
904        if offset >= self.len() || self.bits_per_element() > 64 {
905            None
906        } else {
907            let idx = self.select_offset_u64_unchecked(offset, rank, symbol);
908            if idx < self.len() {
909                Some(idx)
910            } else {
911                None
912            }
913        }
914    }
915
916    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence.
917    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
918    /// where the least significant bit is the first element, and `k` is the number of bits per element
919    /// in the wavelet matrix.
920    ///
921    /// This method does not perform bounds checking, nor does it check if the `symbol` is a valid
922    /// `k`-bit word.
923    /// Use [`select`] for a checked version.
924    ///
925    /// Returns the index of the `rank`-th occurrence of the `symbol` in the encoded sequence,
926    /// or the length of the encoded sequence if the `rank`-th occurrence does not exist.
927    ///
928    /// # Panics
929    /// May panic if the number of bits in `symbol` is not equal to `k`.
930    /// May instead return the length of the encoded sequence.
931    ///
932    /// [`BitVec`]: BitVec
933    /// [`select`]: WaveletMatrix::select
934    #[must_use]
935    pub fn select_unchecked(&self, rank: usize, symbol: &BitVec) -> usize {
936        self.select_offset_unchecked(0, rank, symbol)
937    }
938
939    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence.
940    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
941    /// where the least significant bit is the first element, and `k` is the number of bits per element
942    /// in the wavelet matrix.
943    ///
944    /// Returns `None` if the number of bits in `symbol` is not equal to `k`,
945    /// or if the `rank`-th occurrence of the `symbol` does not exist.
946    ///
947    /// # Example
948    /// ```
949    /// use vers_vecs::{BitVec, WaveletMatrix};
950    ///
951    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
952    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
953    ///
954    /// assert_eq!(wavelet_matrix.select(0, &BitVec::pack_sequence_u8(&[4], 3)), Some(1));
955    /// assert_eq!(wavelet_matrix.select(1, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
956    /// ```
957    ///
958    /// [`BitVec`]: BitVec
959    #[must_use]
960    pub fn select(&self, rank: usize, symbol: &BitVec) -> Option<usize> {
961        if symbol.len() == self.bits_per_element() {
962            let idx = self.select_unchecked(rank, symbol);
963            if idx < self.len() {
964                Some(idx)
965            } else {
966                None
967            }
968        } else {
969            None
970        }
971    }
972
973    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence.
974    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
975    /// where k is less than or equal to 64.
976    ///
977    /// This method does not perform bounds checking, nor does it check if the elements of the
978    /// wavelet matrix can be represented in a u64 numeral.
979    /// Use [`select_u64`] for a checked version.
980    ///
981    /// Returns the index of the `rank`-th occurrence of the `symbol` in the encoded sequence,
982    /// or the length of the encoded sequence if the `rank`-th occurrence does not exist.
983    ///
984    /// # Panics
985    /// May panic if the number of bits in wavelet matrix elements exceed `64`.
986    /// May instead return the length of the encoded sequence.
987    ///
988    /// [`select_u64`]: WaveletMatrix::select_u64
989    #[must_use]
990    pub fn select_u64_unchecked(&self, rank: usize, symbol: u64) -> usize {
991        self.select_offset_u64_unchecked(0, rank, symbol)
992    }
993
994    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence.
995    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
996    /// where k is less than or equal to 64.
997    ///
998    /// Returns `None` if the number of bits in the wavelet matrix elements exceed `64`,
999    /// or if the `rank`-th occurrence of the `symbol` does not exist.
1000    ///
1001    /// # Example
1002    /// ```
1003    /// use vers_vecs::{BitVec, WaveletMatrix};
1004    ///
1005    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1006    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1007    ///
1008    /// assert_eq!(wavelet_matrix.select_u64(0, 4), Some(1));
1009    /// assert_eq!(wavelet_matrix.select_u64(1, 4), Some(2));
1010    /// ```
1011    #[must_use]
1012    pub fn select_u64(&self, rank: usize, symbol: u64) -> Option<usize> {
1013        if self.bits_per_element() > 64 {
1014            None
1015        } else {
1016            let idx = self.select_u64_unchecked(rank, symbol);
1017            if idx < self.len() {
1018                Some(idx)
1019            } else {
1020                None
1021            }
1022        }
1023    }
1024
1025    /// Get the `k`-th smallest element in the encoded sequence in the specified `range`,
1026    /// where `k = 0` returns the smallest element.
1027    /// The `range` is a half-open interval, meaning that the `end` index is exclusive.
1028    /// The `k`-th smallest element is returned as a `BitVec`,
1029    /// where the least significant bit is the first element.
1030    ///
1031    /// This method does not perform bounds checking.
1032    /// It returns a nonsensical result if the `k` is greater than the size of the range.
1033    /// Use [`quantile`] for a checked version.
1034    ///
1035    /// # Panics
1036    /// May panic if the `range` is out of bounds. May instead return an empty bit vector.
1037    ///
1038    /// [`quantile`]: WaveletMatrix::quantile
1039    #[must_use]
1040    pub fn quantile_unchecked(&self, range: Range<usize>, k: usize) -> BitVec {
1041        let result = BitVec::from_zeros(self.bits_per_element());
1042
1043        self.partial_quantile_search_unchecked(range, k, 0, result)
1044    }
1045
1046    /// Internal function to reuse the quantile code for the predecessor and successor search.
1047    /// This function performs the quantile search starting at the given level with the given prefix.
1048    ///
1049    /// The function does not perform any checks, so the caller must ensure that the range is valid,
1050    /// and that the prefix is a valid prefix for the given level.
1051    #[inline(always)]
1052    fn partial_quantile_search_unchecked(
1053        &self,
1054        mut range: Range<usize>,
1055        mut k: usize,
1056        start_level: usize,
1057        mut prefix: BitVec,
1058    ) -> BitVec {
1059        debug_assert!(prefix.len() == self.bits_per_element());
1060        debug_assert!(!range.is_empty());
1061        debug_assert!(range.end <= self.len());
1062
1063        for (level, data) in self.data.iter().enumerate().skip(start_level) {
1064            let zeros_start = data.rank0(range.start);
1065            let zeros_end = data.rank0(range.end);
1066            let zeros = zeros_end - zeros_start;
1067
1068            // if k < zeros, the element is among the zeros
1069            if k < zeros {
1070                range.start = zeros_start;
1071                range.end = zeros_end;
1072            } else {
1073                // the element is among the ones, so we set the bit to 1, and move the range
1074                // into the 1-partition of the next level
1075                prefix.set_unchecked((self.bits_per_element() - 1) - level, 1);
1076                k -= zeros;
1077                range.start = data.rank0 + (range.start - zeros_start); // range.start - zeros_start is the rank1 of range.start
1078                range.end = data.rank0 + (range.end - zeros_end); // same here
1079            }
1080        }
1081
1082        prefix
1083    }
1084
1085    /// Get the `k`-th smallest element in the encoded sequence in the specified `range`,
1086    /// where `k = 0` returns the smallest element.
1087    /// The `range` is a half-open interval, meaning that the `end` index is exclusive.
1088    /// The `k`-th smallest element is returned as a [`BitVec`],
1089    /// where the least significant bit is the first element.
1090    ///
1091    /// Returns `None` if the `range` is out of bounds, or if `k` is greater than the size of the range.
1092    ///
1093    /// # Example
1094    /// ```
1095    /// use vers_vecs::{BitVec, WaveletMatrix};
1096    ///
1097    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1098    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1099    ///
1100    /// assert_eq!(wavelet_matrix.quantile(0..3, 0), Some(BitVec::pack_sequence_u8(&[1], 3)));
1101    /// assert_eq!(wavelet_matrix.quantile(0..3, 1), Some(BitVec::pack_sequence_u8(&[4], 3)));
1102    /// assert_eq!(wavelet_matrix.quantile(1..4, 0), Some(BitVec::pack_sequence_u8(&[1], 3)));
1103    /// ```
1104    #[must_use]
1105    pub fn quantile(&self, range: Range<usize>, k: usize) -> Option<BitVec> {
1106        if range.start >= self.len() || range.end > self.len() || k >= range.end - range.start {
1107            None
1108        } else {
1109            Some(self.quantile_unchecked(range, k))
1110        }
1111    }
1112
1113    /// Get the `i`-th smallest element in the entire wavelet matrix.
1114    /// The `i`-th smallest element is returned as a `BitVec`,
1115    /// where the least significant bit is the first element.
1116    ///
1117    /// This method does not perform bounds checking.
1118    /// Use [`get_sorted`] for a checked version.
1119    ///
1120    /// # Panics
1121    /// May panic if the `i` is out of bounds, or returns an empty bit vector.
1122    ///
1123    /// [`get_sorted`]: Self::get_sorted
1124    #[must_use]
1125    pub fn get_sorted_unchecked(&self, i: usize) -> BitVec {
1126        self.quantile_unchecked(0..self.len(), i)
1127    }
1128
1129    /// Get the `i`-th smallest element in the wavelet matrix.
1130    /// The `i`-th smallest element is returned as a `BitVec`,
1131    /// where the least significant bit is the first element.
1132    /// This method call is equivalent to `self.quantile(0..self.len(), i)`.
1133    ///
1134    /// Returns `None` if the `i` is out of bounds.
1135    ///
1136    /// # Example
1137    /// ```
1138    /// use vers_vecs::{BitVec, WaveletMatrix};
1139    ///
1140    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1141    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1142    ///
1143    /// assert_eq!(wavelet_matrix.get_sorted(0), Some(BitVec::pack_sequence_u8(&[1], 3)));
1144    /// assert_eq!(wavelet_matrix.get_sorted(1), Some(BitVec::pack_sequence_u8(&[1], 3)));
1145    /// assert_eq!(wavelet_matrix.get_sorted(2), Some(BitVec::pack_sequence_u8(&[2], 3)));
1146    /// ```
1147    #[must_use]
1148    pub fn get_sorted(&self, i: usize) -> Option<BitVec> {
1149        if i >= self.len() {
1150            None
1151        } else {
1152            Some(self.get_sorted_unchecked(i))
1153        }
1154    }
1155
1156    /// Get the `k`-th smallest element in the encoded sequence in the specified `range`,
1157    /// where `k = 0` returns the smallest element.
1158    /// The `range` is a half-open interval, meaning that the `end` index is exclusive.
1159    /// The `k`-th smallest element is returned as a `u64` numeral.
1160    /// If the number of bits per element exceeds 64, the value is truncated.
1161    ///
1162    /// This method does not perform bounds checking.
1163    /// It returns a nonsensical result if the `k` is greater than the size of the range.
1164    /// Use [`quantile_u64`] for a checked version.
1165    ///
1166    /// # Panics
1167    /// May panic if the `range` is out of bounds.
1168    /// May instead return 0.
1169    ///
1170    /// [`quantile_u64`]: WaveletMatrix::quantile_u64
1171    #[must_use]
1172    pub fn quantile_u64_unchecked(&self, range: Range<usize>, k: usize) -> u64 {
1173        self.partial_quantile_search_u64_unchecked(range, k, 0, 0)
1174    }
1175
1176    /// Internal function to reuse the quantile code for the predecessor and successor search.
1177    /// This function performs the quantile search starting at the given level with the given prefix.
1178    ///
1179    /// The function does not perform any checks, so the caller must ensure that the range is valid,
1180    /// and that the prefix is a valid prefix for the given level (i.e. the prefix is not shifted
1181    /// to another level).
1182    #[inline(always)]
1183    fn partial_quantile_search_u64_unchecked(
1184        &self,
1185        mut range: Range<usize>,
1186        mut k: usize,
1187        start_level: usize,
1188        mut prefix: u64,
1189    ) -> u64 {
1190        debug_assert!(self.bits_per_element() <= 64);
1191        debug_assert!(!range.is_empty());
1192        debug_assert!(range.end <= self.len());
1193
1194        for data in self.data.iter().skip(start_level) {
1195            prefix <<= 1;
1196            let zeros_start = data.rank0(range.start);
1197            let zeros_end = data.rank0(range.end);
1198            let zeros = zeros_end - zeros_start;
1199
1200            if k < zeros {
1201                range.start = zeros_start;
1202                range.end = zeros_end;
1203            } else {
1204                prefix |= 1;
1205                k -= zeros;
1206                range.start = data.rank0 + (range.start - zeros_start);
1207                range.end = data.rank0 + (range.end - zeros_end);
1208            }
1209        }
1210
1211        prefix
1212    }
1213
1214    /// Get the `k`-th smallest element in the encoded sequence in the specified `range`,
1215    /// where `k = 0` returns the smallest element.
1216    /// The `range` is a half-open interval, meaning that the `end` index is exclusive.
1217    /// The `k`-th smallest element is returned as a `u64` numeral.
1218    ///
1219    /// Returns `None` if the `range` is out of bounds, or if the number of bits per element exceeds 64,
1220    /// or if `k` is greater than the size of the range.
1221    ///
1222    /// # Example
1223    /// ```
1224    /// use vers_vecs::{BitVec, WaveletMatrix};
1225    ///
1226    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1227    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1228    ///
1229    /// assert_eq!(wavelet_matrix.quantile_u64(0..3, 0), Some(1));
1230    /// assert_eq!(wavelet_matrix.quantile_u64(0..3, 1), Some(4));
1231    /// assert_eq!(wavelet_matrix.quantile_u64(1..4, 0), Some(1));
1232    /// ```
1233    #[must_use]
1234    pub fn quantile_u64(&self, range: Range<usize>, k: usize) -> Option<u64> {
1235        if range.start >= self.len()
1236            || range.end > self.len()
1237            || self.bits_per_element() > 64
1238            || k >= range.end - range.start
1239        {
1240            None
1241        } else {
1242            Some(self.quantile_u64_unchecked(range, k))
1243        }
1244    }
1245
1246    /// Get the `i`-th smallest element in the wavelet matrix.
1247    /// The `i`-th smallest element is returned as a u64 numeral.
1248    ///
1249    /// If the number of bits per element exceeds 64, the value is truncated.
1250    ///
1251    /// This method does not perform bounds checking.
1252    /// Use [`get_sorted_u64`] for a checked version.
1253    ///
1254    /// # Panics
1255    /// May panic if the `i` is out of bounds, or returns an empty bit vector.
1256    ///
1257    /// [`get_sorted_u64`]: WaveletMatrix::get_sorted_u64
1258    #[must_use]
1259    pub fn get_sorted_u64_unchecked(&self, i: usize) -> u64 {
1260        self.quantile_u64_unchecked(0..self.len(), i)
1261    }
1262
1263    /// Get the `i`-th smallest element in the entire wavelet matrix.
1264    /// The `i`-th smallest element is returned as a u64 numeral.
1265    ///
1266    /// Returns `None` if the `i` is out of bounds, or if the number of bits per element exceeds 64.
1267    ///
1268    /// # Example
1269    /// ```
1270    /// use vers_vecs::{BitVec, WaveletMatrix};
1271    ///
1272    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1273    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1274    ///
1275    /// assert_eq!(wavelet_matrix.get_sorted_u64(0), Some(1));
1276    /// assert_eq!(wavelet_matrix.get_sorted_u64(1), Some(1));
1277    /// assert_eq!(wavelet_matrix.get_sorted_u64(2), Some(2));
1278    /// ```
1279    #[must_use]
1280    pub fn get_sorted_u64(&self, i: usize) -> Option<u64> {
1281        if i >= self.len() || self.bits_per_element() > 64 {
1282            None
1283        } else {
1284            Some(self.get_sorted_u64_unchecked(i))
1285        }
1286    }
1287
1288    /// Get the smallest element in the encoded sequence in the specified `range`.
1289    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1290    /// The smallest element is returned as a `BitVec`,
1291    ///
1292    /// This method does not perform bounds checking.
1293    /// Use [`range_min`] for a checked version.
1294    ///
1295    /// # Panics
1296    /// May panic if the `range` is out of bounds or if the range is empty.
1297    /// May instead return an empty bit vector.
1298    ///
1299    /// [`range_min`]: WaveletMatrix::range_min
1300    #[must_use]
1301    pub fn range_min_unchecked(&self, range: Range<usize>) -> BitVec {
1302        self.quantile_unchecked(range, 0)
1303    }
1304
1305    /// Get the smallest element in the encoded sequence in the specified `range`.
1306    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1307    /// The smallest element is returned as a `BitVec`,
1308    ///
1309    /// Returns `None` if the `range` is out of bounds or if the range is empty.
1310    ///
1311    /// # Example
1312    /// ```
1313    /// use vers_vecs::{BitVec, WaveletMatrix};
1314    ///
1315    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1316    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1317    ///
1318    /// assert_eq!(wavelet_matrix.range_min(0..3), Some(BitVec::pack_sequence_u8(&[1], 3)));
1319    /// assert_eq!(wavelet_matrix.range_min(1..4), Some(BitVec::pack_sequence_u8(&[1], 3)));
1320    /// assert_eq!(wavelet_matrix.range_min(1..3), Some(BitVec::pack_sequence_u8(&[4], 3)));
1321    /// ```
1322    #[must_use]
1323    pub fn range_min(&self, range: Range<usize>) -> Option<BitVec> {
1324        self.quantile(range, 0)
1325    }
1326
1327    /// Get the smallest element in the encoded sequence in the specified `range`.
1328    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1329    /// The smallest element is returned as a `u64` numeral.
1330    /// If the number of bits per element exceeds 64, the value is truncated.
1331    ///
1332    /// This method does not perform bounds checking.
1333    /// Use [`range_min_u64`] for a checked version.
1334    ///
1335    /// # Panics
1336    /// May panic if the `range` is out of bounds or if the range is empty.
1337    /// May instead return 0.
1338    ///
1339    /// [`range_min_u64`]: WaveletMatrix::range_min_u64
1340    #[must_use]
1341    pub fn range_min_u64_unchecked(&self, range: Range<usize>) -> u64 {
1342        self.quantile_u64_unchecked(range, 0)
1343    }
1344
1345    /// Get the smallest element in the encoded sequence in the specified `range`.
1346    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1347    /// The smallest element is returned as a `u64` numeral.
1348    ///
1349    /// Returns `None` if the `range` is out of bounds, if the range is empty, or if the number of bits
1350    /// per element exceeds 64.
1351    ///
1352    /// # Example
1353    /// ```
1354    /// use vers_vecs::{BitVec, WaveletMatrix};
1355    ///
1356    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1357    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1358    ///
1359    /// assert_eq!(wavelet_matrix.range_min_u64(0..3), Some(1));
1360    /// assert_eq!(wavelet_matrix.range_min_u64(1..4), Some(1));
1361    /// assert_eq!(wavelet_matrix.range_min_u64(1..3), Some(4));
1362    /// ```
1363    #[must_use]
1364    pub fn range_min_u64(&self, range: Range<usize>) -> Option<u64> {
1365        self.quantile_u64(range, 0)
1366    }
1367
1368    /// Get the largest element in the encoded sequence in the specified `range`.
1369    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1370    /// The largest element is returned as a `BitVec`,
1371    /// where the least significant bit is the first element.
1372    ///
1373    /// This method does not perform bounds checking.
1374    /// Use [`range_max`] for a checked version.
1375    ///
1376    /// # Panics
1377    /// May panic if the `range` is out of bounds or if the range is empty.
1378    /// May instead return an empty bit vector.
1379    ///
1380    /// [`range_max`]: WaveletMatrix::range_max
1381    #[must_use]
1382    pub fn range_max_unchecked(&self, range: Range<usize>) -> BitVec {
1383        let k = range.end - range.start - 1;
1384        self.quantile_unchecked(range, k)
1385    }
1386
1387    /// Get the largest element in the encoded sequence in the specified `range`.
1388    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1389    /// The largest element is returned as a `BitVec`,
1390    /// where the least significant bit is the first element.
1391    ///
1392    /// Returns `None` if the `range` is out of bounds or if the range is empty.
1393    ///
1394    /// # Example
1395    /// ```
1396    /// use vers_vecs::{BitVec, WaveletMatrix};
1397    ///
1398    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1399    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1400    ///
1401    /// assert_eq!(wavelet_matrix.range_max(0..3), Some(BitVec::pack_sequence_u8(&[4], 3)));
1402    /// assert_eq!(wavelet_matrix.range_max(3..6), Some(BitVec::pack_sequence_u8(&[7], 3)));
1403    /// ```
1404    #[must_use]
1405    pub fn range_max(&self, range: Range<usize>) -> Option<BitVec> {
1406        if range.is_empty() {
1407            None
1408        } else {
1409            let k = range.end - range.start - 1;
1410            self.quantile(range, k)
1411        }
1412    }
1413
1414    /// Get the largest element in the encoded sequence in the specified `range`.
1415    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1416    /// The largest element is returned as a `u64` numeral.
1417    /// If the number of bits per element exceeds 64, the value is truncated.
1418    ///
1419    /// This method does not perform bounds checking.
1420    /// Use [`range_max_u64`] for a checked version.
1421    ///
1422    /// # Panics
1423    /// May panic if the `range` is out of bounds or if the range is empty.
1424    /// May instead return 0.
1425    ///
1426    /// [`range_max_u64`]: WaveletMatrix::range_max_u64
1427    #[must_use]
1428    pub fn range_max_u64_unchecked(&self, range: Range<usize>) -> u64 {
1429        let k = range.end - range.start - 1;
1430        self.quantile_u64_unchecked(range, k)
1431    }
1432
1433    /// Get the largest element in the encoded sequence in the specified `range`.
1434    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1435    /// The largest element is returned as a `u64` numeral.
1436    ///
1437    /// Returns `None` if the `range` is out of bounds, if the range is empty, or if the number of bits
1438    /// per element exceeds 64.
1439    ///
1440    /// # Example
1441    /// ```
1442    /// use vers_vecs::{BitVec, WaveletMatrix};
1443    ///
1444    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1445    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1446    ///
1447    /// assert_eq!(wavelet_matrix.range_max_u64(0..3), Some(4));
1448    /// assert_eq!(wavelet_matrix.range_max_u64(3..6), Some(7));
1449    /// ```
1450    #[must_use]
1451    pub fn range_max_u64(&self, range: Range<usize>) -> Option<u64> {
1452        if range.is_empty() {
1453            None
1454        } else {
1455            let k = range.end - range.start - 1;
1456            self.quantile_u64(range, k)
1457        }
1458    }
1459
1460    /// Get the median element in the encoded sequence in the specified `range`.
1461    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1462    /// The median element is returned as a `BitVec`,
1463    /// where the least significant bit is the first element.
1464    ///
1465    /// If the range does not contain an odd number of elements, the position is rounded down.
1466    ///
1467    /// This method does not perform bounds checking.
1468    /// Use [`range_median`] for a checked version.
1469    ///
1470    /// # Panics
1471    /// May panic if the `range` is out of bounds or if the range is empty.
1472    /// May instead return an empty bit vector.
1473    ///
1474    /// [`range_median`]: WaveletMatrix::range_median
1475    #[must_use]
1476    pub fn range_median_unchecked(&self, range: Range<usize>) -> BitVec {
1477        let k = (range.end - 1 - range.start) / 2;
1478        self.quantile_unchecked(range, k)
1479    }
1480
1481    /// Get the median element in the encoded sequence in the specified `range`.
1482    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1483    /// The median element is returned as a `BitVec`,
1484    /// where the least significant bit is the first element.
1485    ///
1486    /// If the range does not contain an odd number of elements, the position is rounded down.
1487    ///
1488    /// Returns `None` if the `range` is out of bounds or if the range is empty.
1489    ///
1490    /// # Example
1491    /// ```
1492    /// use vers_vecs::{BitVec, WaveletMatrix};
1493    ///
1494    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1495    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1496    ///
1497    /// assert_eq!(wavelet_matrix.range_median(0..3), Some(BitVec::pack_sequence_u8(&[4], 3)));
1498    /// assert_eq!(wavelet_matrix.range_median(1..4), Some(BitVec::pack_sequence_u8(&[4], 3)));
1499    /// assert_eq!(wavelet_matrix.range_median(0..6), Some(BitVec::pack_sequence_u8(&[2], 3)));
1500    /// ```
1501    #[must_use]
1502    pub fn range_median(&self, range: Range<usize>) -> Option<BitVec> {
1503        if range.is_empty() {
1504            None
1505        } else {
1506            let k = (range.end - 1 - range.start) / 2;
1507            self.quantile(range, k)
1508        }
1509    }
1510
1511    /// Get the median element in the encoded sequence in the specified `range`.
1512    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1513    /// The median element is returned as a `u64` numeral.
1514    /// If the number of bits per element exceeds 64, the value is truncated.
1515    ///
1516    /// If the range does not contain an odd number of elements, the position is rounded down.
1517    ///
1518    /// This method does not perform bounds checking.
1519    /// Use [`range_median_u64`] for a checked version.
1520    ///
1521    /// # Panics
1522    /// May panic if the `range` is out of bounds or if the range is empty.
1523    /// May instead return 0.
1524    ///
1525    /// [`range_median_u64`]: WaveletMatrix::range_median_u64
1526    #[must_use]
1527    pub fn range_median_u64_unchecked(&self, range: Range<usize>) -> u64 {
1528        let k = (range.end - 1 - range.start) / 2;
1529        self.quantile_u64_unchecked(range, k)
1530    }
1531
1532    /// Get the median element in the encoded sequence in the specified `range`.
1533    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1534    /// The median element is returned as a `u64` numeral.
1535    ///
1536    /// If the range does not contain an odd number of elements, the position is rounded down.
1537    ///
1538    /// Returns `None` if the `range` is out of bounds, if the range is empty, or if the number of bits
1539    /// per element exceeds 64.
1540    ///
1541    /// # Example
1542    /// ```
1543    /// use vers_vecs::{BitVec, WaveletMatrix};
1544    ///
1545    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1546    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1547    ///
1548    /// assert_eq!(wavelet_matrix.range_median_u64(0..3), Some(4));
1549    /// assert_eq!(wavelet_matrix.range_median_u64(1..4), Some(4));
1550    /// assert_eq!(wavelet_matrix.range_median_u64(0..6), Some(2));
1551    /// ```
1552    #[must_use]
1553    pub fn range_median_u64(&self, range: Range<usize>) -> Option<u64> {
1554        if range.is_empty() || self.bits_per_element() > 64 || range.end > self.len() {
1555            None
1556        } else {
1557            let k = (range.end - 1 - range.start) / 2;
1558            self.quantile_u64(range, k)
1559        }
1560    }
1561
1562    /// Get the predecessor of the given `symbol` in the given `range`.
1563    /// This is a private generic helper function to implement the public `predecessor` functions.
1564    ///
1565    /// The read and write access to the query and result values are abstracted by the `Reader` and `Writer` closures.
1566    #[inline(always)] // even though the function is pretty large, inlining probably gets rid of the closure calls in favor of static calls
1567    fn predecessor_generic_unchecked<
1568        T: Clone,
1569        Reader: Fn(usize, &T) -> u64,
1570        Writer: Fn(u64, usize, &mut T),
1571        Quantile: Fn(&Self, Range<usize>, usize, usize, T) -> T,
1572    >(
1573        &self,
1574        mut range: Range<usize>,
1575        symbol: &T,
1576        mut result_value: T,
1577        bit_reader: Reader,
1578        result_writer: Writer,
1579        quantile_search: Quantile,
1580    ) -> Option<T> {
1581        // the bit-prefix at the last node where we could go to an interval with smaller elements,
1582        // i.e. where we need to go if the current prefix has no elements smaller than the query
1583        let mut last_smaller_prefix = result_value.clone();
1584        // the level of the last node where we could go to an interval with smaller elements
1585        let mut last_one_level: Option<usize> = None;
1586        // the range of the last node where we could go to an interval with smaller elements
1587        let mut next_smaller_range: Option<Range<usize>> = None;
1588
1589        for (level, data) in self.data.iter().enumerate() {
1590            let query_bit = bit_reader(level, symbol);
1591
1592            // read the amount of elements with the current result-prefix plus one 0 bit.
1593            // if the query_bit is 1, we can calculate the amount of elements with the result-prefix
1594            // plus one 1 from there
1595            let zeros_start = data.rank0(range.start);
1596            let zeros_end = data.rank0(range.end);
1597            let elements_zero = zeros_end - zeros_start;
1598
1599            if query_bit == 0 {
1600                if elements_zero == 0 {
1601                    // if our query bit is zero in this level and suddenly our new interval is empty,
1602                    // all elements that were in the previous interval are bigger than the query element,
1603                    // because they all have a 1 in the current level.
1604                    // this means the predecessor is the largest element in the last smaller interval,
1605                    // i.e. the interval that has a 0 bit at the last level where our prefix had a 1 bit.
1606
1607                    return next_smaller_range.map(|r| {
1608                        let idx = r.end - r.start - 1;
1609                        result_writer(0, last_one_level.unwrap(), &mut last_smaller_prefix);
1610                        quantile_search(
1611                            self,
1612                            r,
1613                            idx,
1614                            last_one_level.unwrap() + 1,
1615                            last_smaller_prefix,
1616                        )
1617                    });
1618                }
1619
1620                // update the prefix
1621                result_writer(0, level, &mut result_value);
1622
1623                // update the range to the interval of the new prefix
1624                range.start = zeros_start;
1625                range.end = zeros_end;
1626            } else {
1627                if elements_zero == range.end - range.start {
1628                    // if our query element is 1 in this level and suddenly our new interval is empty,
1629                    // all elements that were in the previous interval are smaller than the query element,
1630                    // because they all have a 0 in the current level.
1631                    // this means the predecessor is the largest element in the last level's interval.
1632
1633                    let idx = range.end - range.start - 1;
1634                    return Some(quantile_search(self, range, idx, level, result_value));
1635                }
1636
1637                // if the other interval is not empty, we update the last smaller interval to the last interval where we can switch to a 0 prefix
1638                if !(zeros_start..zeros_end).is_empty() {
1639                    last_one_level = Some(level);
1640                    next_smaller_range = Some(zeros_start..zeros_end);
1641                    last_smaller_prefix = result_value.clone();
1642                }
1643
1644                // update the prefix
1645                result_writer(1, level, &mut result_value);
1646
1647                // update the range to the interval of the new prefix
1648                range.start = data.rank0 + (range.start - zeros_start);
1649                range.end = data.rank0 + (range.end - zeros_end);
1650            }
1651        }
1652
1653        Some(result_value)
1654    }
1655
1656    /// Get the predecessor of the given `symbol` in the given `range`.
1657    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
1658    /// where the least significant bit is the first element, and `k` is the number of bits per element
1659    /// in the wavelet matrix.
1660    /// The `symbol` does not have to be in the encoded sequence.
1661    /// The predecessor is the largest element in the `range` that is smaller than or equal
1662    /// to the `symbol`.
1663    ///
1664    /// Returns `None` if the number of bits in the `symbol` is not equal to `k`,
1665    /// if the range is empty, if the wavelet matrix is empty, if the range is out of bounds,
1666    /// or if the `symbol` is smaller than all elements in the range.
1667    ///
1668    /// # Example
1669    /// ```
1670    /// use vers_vecs::{BitVec, WaveletMatrix};
1671    ///
1672    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1673    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1674    ///
1675    /// assert_eq!(wavelet_matrix.predecessor(0..3, &BitVec::pack_sequence_u8(&[7], 3)), Some(BitVec::pack_sequence_u8(&[4], 3)));
1676    /// assert_eq!(wavelet_matrix.predecessor(0..3, &BitVec::pack_sequence_u8(&[4], 3)), Some(BitVec::pack_sequence_u8(&[4], 3)));
1677    /// assert_eq!(wavelet_matrix.predecessor(0..6, &BitVec::pack_sequence_u8(&[7], 3)), Some(BitVec::pack_sequence_u8(&[7], 3)));
1678    /// ```
1679    ///
1680    /// [`BitVec`]: BitVec
1681    #[must_use]
1682    pub fn predecessor(&self, range: Range<usize>, symbol: &BitVec) -> Option<BitVec> {
1683        if symbol.len() != self.bits_per_element()
1684            || range.is_empty()
1685            || self.is_empty()
1686            || range.end > self.len()
1687        {
1688            return None;
1689        }
1690
1691        self.predecessor_generic_unchecked(
1692            range,
1693            symbol,
1694            BitVec::from_zeros(self.bits_per_element()),
1695            |level, symbol| symbol.get_unchecked((self.bits_per_element() - 1) - level),
1696            |bit, level, result| {
1697                result.set_unchecked((self.bits_per_element() - 1) - level, bit);
1698            },
1699            Self::partial_quantile_search_unchecked,
1700        )
1701    }
1702
1703    /// Get the predecessor of the given `symbol` in the given `range`.
1704    /// The `symbol` is a `k`-bit word encoded in a `u64` numeral,
1705    /// where k is less than or equal to 64.
1706    /// The `symbol` does not have to be in the encoded sequence.
1707    /// The predecessor is the largest element in the `range` that is smaller than or equal
1708    /// to the `symbol`.
1709    ///
1710    /// Returns `None` if the number of bits in the matrix is greater than 64,
1711    /// if the range is empty, if the wavelet matrix is empty, if the range is out of bounds,
1712    /// or if the `symbol` is smaller than all elements in the range.
1713    ///
1714    /// # Example
1715    /// ```
1716    /// use vers_vecs::{BitVec, WaveletMatrix};
1717    ///
1718    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1719    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1720    ///
1721    /// assert_eq!(wavelet_matrix.predecessor_u64(0..3, 7), Some(4));
1722    /// assert_eq!(wavelet_matrix.predecessor_u64(0..3, 4), Some(4));
1723    /// assert_eq!(wavelet_matrix.predecessor_u64(0..6, 7), Some(7));
1724    /// ```
1725    #[must_use]
1726    pub fn predecessor_u64(&self, range: Range<usize>, symbol: u64) -> Option<u64> {
1727        if self.bits_per_element() > 64
1728            || range.is_empty()
1729            || self.is_empty()
1730            || range.end > self.len()
1731        {
1732            return None;
1733        }
1734
1735        self.predecessor_generic_unchecked(
1736            range,
1737            &symbol,
1738            0,
1739            |level, symbol| (symbol >> ((self.bits_per_element() - 1) - level)) & 1,
1740            |bit, _level, result| {
1741                // we ignore the level here, and instead rely on the fact that the bits are set in order.
1742                // we have to do that, because the quantile_search_u64 does the same.
1743                *result <<= 1;
1744                *result |= bit;
1745            },
1746            Self::partial_quantile_search_u64_unchecked,
1747        )
1748    }
1749
1750    #[inline(always)]
1751    fn successor_generic_unchecked<
1752        T: Clone,
1753        Reader: Fn(usize, &T) -> u64,
1754        Writer: Fn(u64, usize, &mut T),
1755        Quantile: Fn(&Self, Range<usize>, usize, usize, T) -> T,
1756    >(
1757        &self,
1758        mut range: Range<usize>,
1759        symbol: &T,
1760        mut result_value: T,
1761        bit_reader: Reader,
1762        result_writer: Writer,
1763        quantile_search: Quantile,
1764    ) -> Option<T> {
1765        // the bit-prefix at the last node where we could go to an interval with larger elements,
1766        // i.e. where we need to go if the current prefix has no elements larger than the query
1767        let mut last_larger_prefix = result_value.clone();
1768        // the level of the last node where we could go to an interval with larger elements
1769        let mut last_zero_level: Option<usize> = None;
1770        // the range of the last node where we could go to an interval with larger elements
1771        let mut next_larger_range: Option<Range<usize>> = None;
1772
1773        for (level, data) in self.data.iter().enumerate() {
1774            let query_bit = bit_reader(level, symbol);
1775
1776            // read the amount of elements with the current result-prefix plus one 0 bit.
1777            // if the query_bit is 1, we can calculate the amount of elements with the result-prefix
1778            // plus one 1 from there
1779            let zeros_start = data.rank0(range.start);
1780            let zeros_end = data.rank0(range.end);
1781            let elements_zero = zeros_end - zeros_start;
1782
1783            if query_bit == 0 {
1784                if elements_zero == 0 {
1785                    // if our query element is 0 in this level and suddenly our new interval is empty,
1786                    // all elements that were in the previous interval are larger than the query element,
1787                    // because they all have a 1 in the current level.
1788                    // this means the successor is the smallest element in the last level's interval.
1789
1790                    return Some(quantile_search(self, range, 0, level, result_value));
1791                }
1792
1793                // if the other interval is not empty, we update the last interval where we can switch to a prefix with a 1
1794                if !(data.rank0 + (range.start - zeros_start)..data.rank0 + (range.end - zeros_end))
1795                    .is_empty()
1796                {
1797                    last_zero_level = Some(level);
1798                    next_larger_range = Some(
1799                        data.rank0 + (range.start - zeros_start)
1800                            ..data.rank0 + (range.end - zeros_end),
1801                    );
1802                    last_larger_prefix = result_value.clone();
1803                }
1804
1805                // update the prefix
1806                result_writer(0, level, &mut result_value);
1807
1808                // update the range to the interval of the new prefix
1809                range.start = zeros_start;
1810                range.end = zeros_end;
1811            } else {
1812                if elements_zero == range.end - range.start {
1813                    // if our query bit is 1 in this level and suddenly our new interval is empty,
1814                    // all elements that were in the previous interval are smaller than the query element,
1815                    // because they all have a 0 in the current level.
1816                    // this means the successor is the smallest element in the last interval with a larger prefix,
1817                    // i.e. the interval that has a 1 bit at the last level where our prefix had a 0 bit.
1818
1819                    return next_larger_range.map(|r| {
1820                        result_writer(1, last_zero_level.unwrap(), &mut last_larger_prefix);
1821                        quantile_search(
1822                            self,
1823                            r,
1824                            0,
1825                            last_zero_level.unwrap() + 1,
1826                            last_larger_prefix,
1827                        )
1828                    });
1829                }
1830
1831                // update the prefix
1832                result_writer(1, level, &mut result_value);
1833
1834                // update the range to the interval of the new prefix
1835                range.start = data.rank0 + (range.start - zeros_start);
1836                range.end = data.rank0 + (range.end - zeros_end);
1837            }
1838        }
1839
1840        Some(result_value)
1841    }
1842
1843    /// Get the successor of the given `symbol` in the given range.
1844    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
1845    /// where the least significant bit is the first element, and `k` is the number of bits per element
1846    /// in the wavelet matrix.
1847    /// The `symbol` does not have to be in the encoded sequence.
1848    /// The successor is the smallest element in the range that is greater than or equal
1849    /// to the `symbol`.
1850    ///
1851    /// Returns `None` if the number of bits in the `symbol` is not equal to `k`,
1852    /// if the range is empty, if the wavelet matrix is empty, if the range is out of bounds,
1853    /// or if the `symbol` is greater than all elements in the range.
1854    ///
1855    /// # Example
1856    /// ```
1857    /// use vers_vecs::{BitVec, WaveletMatrix};
1858    ///
1859    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1860    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1861    ///
1862    /// assert_eq!(wavelet_matrix.successor(0..3, &BitVec::pack_sequence_u8(&[2], 3)), Some(BitVec::pack_sequence_u8(&[4], 3)));
1863    /// assert_eq!(wavelet_matrix.successor(0..3, &BitVec::pack_sequence_u8(&[5], 3)), None);
1864    /// assert_eq!(wavelet_matrix.successor(0..6, &BitVec::pack_sequence_u8(&[2], 3)), Some(BitVec::pack_sequence_u8(&[2], 3)));
1865    /// ```
1866    ///
1867    /// [`BitVec`]: BitVec
1868    #[must_use]
1869    pub fn successor(&self, range: Range<usize>, symbol: &BitVec) -> Option<BitVec> {
1870        if symbol.len() != self.bits_per_element()
1871            || range.is_empty()
1872            || self.is_empty()
1873            || range.end > self.len()
1874        {
1875            return None;
1876        }
1877
1878        self.successor_generic_unchecked(
1879            range,
1880            symbol,
1881            BitVec::from_zeros(self.bits_per_element()),
1882            |level, symbol| symbol.get_unchecked((self.bits_per_element() - 1) - level),
1883            |bit, level, result| {
1884                result.set_unchecked((self.bits_per_element() - 1) - level, bit);
1885            },
1886            Self::partial_quantile_search_unchecked,
1887        )
1888    }
1889
1890    /// Get the successor of the given `symbol` in the given range.
1891    /// The `symbol` is a `k`-bit word encoded in a `u64` numeral,
1892    /// where k is less than or equal to 64.
1893    /// The `symbol` does not have to be in the encoded sequence.
1894    /// The successor is the smallest element in the range that is greater than or equal
1895    /// to the `symbol`.
1896    ///
1897    /// Returns `None` if the number of bits in the matrix is greater than 64,
1898    /// if the range is empty, if the wavelet matrix is empty, if the range is out of bounds,
1899    /// or if the `symbol` is greater than all elements in the range.
1900    ///
1901    /// # Example
1902    /// ```
1903    /// use vers_vecs::{BitVec, WaveletMatrix};
1904    ///
1905    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1906    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1907    ///
1908    /// assert_eq!(wavelet_matrix.successor_u64(0..3, 2), Some(4));
1909    /// assert_eq!(wavelet_matrix.successor_u64(0..3, 5), None);
1910    /// assert_eq!(wavelet_matrix.successor_u64(0..6, 2), Some(2));
1911    /// ```
1912    #[must_use]
1913    pub fn successor_u64(&self, range: Range<usize>, symbol: u64) -> Option<u64> {
1914        if self.bits_per_element() > 64
1915            || range.is_empty()
1916            || self.is_empty()
1917            || range.end > self.len()
1918        {
1919            return None;
1920        }
1921
1922        self.successor_generic_unchecked(
1923            range,
1924            &symbol,
1925            0,
1926            |level, symbol| (symbol >> ((self.bits_per_element() - 1) - level)) & 1,
1927            |bit, _level, result| {
1928                // we ignore the level here, and instead rely on the fact that the bits are set in order.
1929                // we have to do that, because the quantile_search_u64 does the same.
1930                *result <<= 1;
1931                *result |= bit;
1932            },
1933            Self::partial_quantile_search_u64_unchecked,
1934        )
1935    }
1936
1937    /// Get an iterator over the elements of the encoded sequence.
1938    /// The iterator yields `u64` elements.
1939    /// If the number of bits per element exceeds 64, `None` is returned.
1940    ///
1941    /// # Example
1942    /// ```
1943    /// use vers_vecs::{BitVec, WaveletMatrix};
1944    ///
1945    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1946    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1947    ///
1948    /// let mut iter = wavelet_matrix.iter_u64().unwrap();
1949    /// assert_eq!(iter.collect::<Vec<_>>(), vec![1, 4, 4, 1, 2, 7]);
1950    /// ```
1951    #[must_use]
1952    pub fn iter_u64(&self) -> Option<WaveletNumRefIter<'_>> {
1953        if self.bits_per_element() > 64 {
1954            None
1955        } else {
1956            Some(WaveletNumRefIter::new(self))
1957        }
1958    }
1959
1960    /// Turn the encoded sequence into an iterator.
1961    /// The iterator yields `u64` elements.
1962    /// If the number of bits per element exceeds 64, `None` is returned.
1963    #[must_use]
1964    pub fn into_iter_u64(self) -> Option<WaveletNumIter> {
1965        if self.bits_per_element() > 64 {
1966            None
1967        } else {
1968            Some(WaveletNumIter::new(self))
1969        }
1970    }
1971
1972    /// Get an iterator over the sorted elements of the encoded sequence.
1973    /// The iterator yields `BitVec` elements.
1974    ///
1975    /// See also [`iter_sorted_u64`] for an iterator that yields `u64` elements.
1976    ///
1977    /// [`iter_sorted_u64`]: Self::iter_sorted_u64
1978    #[must_use]
1979    pub fn iter_sorted(&self) -> WaveletSortedRefIter<'_> {
1980        WaveletSortedRefIter::new(self)
1981    }
1982
1983    /// Turn the encoded sequence into an iterator over the sorted sequence.
1984    /// The iterator yields `BitVec` elements.
1985    #[must_use]
1986    pub fn into_iter_sorted(self) -> WaveletSortedIter {
1987        WaveletSortedIter::new(self)
1988    }
1989
1990    /// Get an iterator over the sorted elements of the encoded sequence.
1991    /// The iterator yields `u64` elements.
1992    /// If the number of bits per element exceeds 64, `None` is returned.
1993    ///
1994    /// # Example
1995    /// ```
1996    /// use vers_vecs::{BitVec, WaveletMatrix};
1997    ///
1998    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1999    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
2000    ///
2001    /// let mut iter = wavelet_matrix.iter_sorted_u64().unwrap();
2002    /// assert_eq!(iter.collect::<Vec<_>>(), vec![1, 1, 2, 4, 4, 7]);
2003    /// ```
2004    #[must_use]
2005    pub fn iter_sorted_u64(&self) -> Option<WaveletSortedNumRefIter<'_>> {
2006        if self.bits_per_element() > 64 {
2007            None
2008        } else {
2009            Some(WaveletSortedNumRefIter::new(self))
2010        }
2011    }
2012
2013    /// Turn the encoded sequence into an iterator over the sorted sequence.
2014    /// The iterator yields `u64` elements.
2015    /// If the number of bits per element exceeds 64, `None` is returned.
2016    #[must_use]
2017    pub fn into_iter_sorted_u64(self) -> Option<WaveletSortedNumIter> {
2018        if self.bits_per_element() > 64 {
2019            None
2020        } else {
2021            Some(WaveletSortedNumIter::new(self))
2022        }
2023    }
2024
2025    /// Get the number of bits per element in the alphabet of the encoded sequence.
2026    #[must_use]
2027    #[inline(always)]
2028    pub fn bits_per_element(&self) -> usize {
2029        self.data.len()
2030    }
2031
2032    /// Get the number of bits per element in the alphabet of the encoded sequence.
2033    #[must_use]
2034    #[deprecated(since = "1.5.1", note = "please use `bits_per_element` instead")]
2035    #[allow(clippy::cast_possible_truncation)]
2036    pub fn bit_len(&self) -> u16 {
2037        self.bits_per_element() as u16
2038    }
2039
2040    /// Get the number of elements stored in the encoded sequence.
2041    #[must_use]
2042    pub fn len(&self) -> usize {
2043        if self.data.is_empty() {
2044            0
2045        } else {
2046            self.data[0].len()
2047        }
2048    }
2049
2050    /// Check if the wavelet matrix is empty.
2051    #[must_use]
2052    pub fn is_empty(&self) -> bool {
2053        self.len() == 0
2054    }
2055
2056    /// Get the number of bytes allocated on the heap for the wavelet matrix.
2057    /// This does not include memory that is allocated but unused due to allocation policies of
2058    /// internal data structures.
2059    #[must_use]
2060    pub fn heap_size(&self) -> usize {
2061        self.data.iter().map(RsVec::heap_size).sum::<usize>()
2062    }
2063}
2064
2065impl_vector_iterator!(
2066    WaveletMatrix,
2067    WaveletIter,
2068    WaveletRefIter,
2069    get_value_unchecked,
2070    get_value,
2071    BitVec
2072);
2073
2074impl_vector_iterator!(
2075    WaveletMatrix,
2076    WaveletNumIter,
2077    WaveletNumRefIter,
2078    get_u64_unchecked,
2079    get_u64,
2080    u64,
2081    special
2082);
2083
2084impl_vector_iterator!(
2085    WaveletMatrix,
2086    WaveletSortedIter,
2087    WaveletSortedRefIter,
2088    get_sorted_unchecked,
2089    get_sorted,
2090    BitVec,
2091    special
2092);
2093
2094impl_vector_iterator!(
2095    WaveletMatrix,
2096    WaveletSortedNumIter,
2097    WaveletSortedNumRefIter,
2098    get_sorted_u64_unchecked,
2099    get_sorted_u64,
2100    u64,
2101    special
2102);
2103
2104#[cfg(test)]
2105mod tests;