vers_vecs/wavelet/
mod.rs

1//! This module contains the implementation of a [wavelet matrix].
2//! The wavelet matrix is a data structure that encodes a sequence of `n` `k`-bit words
3//! into a matrix of bit vectors, allowing for fast rank and select queries on the encoded sequence.
4//!
5//! This implementation further supports quantile queries, and range-predecessor and -successor queries.
6//! All operations are `O(k)` time complexity.
7//!
8//! See the struct documentation for more information.
9//!
10//! [wavelet matrix]: WaveletMatrix
11
12use crate::util::impl_vector_iterator;
13use crate::{BitVec, RsVec};
14use std::mem;
15use std::ops::Range;
16
17/// A wavelet matrix implementation implemented as described in
18/// [Navarro and Claude, 2021](http://dx.doi.org/10.1007/978-3-642-34109-0_18).
19/// The implementation is designed to allow for extremely large alphabet sizes, without
20/// sacrificing performance for small alphabets.
21///
22/// There are two constructor algorithms available:
23/// - [`from_bit_vec`] and [`from_slice`] construct the wavelet matrix by repeatedly sorting the elements.
24///   These constructors have linear space overhead and run in `O(kn * log n)` time complexity.
25/// - [`from_bit_vec_pc`] and [`from_slice_pc`] construct the wavelet matrix by counting the
26///   prefixes of the elements. These constructors have a space complexity of `O(2^k)` and run
27///   in `O(kn)`, which makes this constructor preferable for large sequences over small alphabets.
28///
29/// They encode a sequence of `n` `k`-bit words into a wavelet matrix which supports constant-time
30/// rank and select queries on elements of its `k`-bit alphabet.
31/// All query functions are mirrored for both `BitVec` and `u64` query elements, so
32/// if `k <= 64`, no heap allocation is needed for the query element.
33///
34/// Other than rank and select queries, the matrix also supports quantile queries (range select i), and
35/// range-predecessor and -successor queries, all of which are loosely based on
36/// [Külekci and Thankachan](https://doi.org/10.1016/j.jda.2017.01.002) with better time complexity.
37///
38/// All operations implemented on the matrix are `O(k)` time complexity.
39/// The space complexity of the wavelet matrix is `O(n * k)` with a small linear overhead
40/// (see [`RsVec`]).
41///
42/// # Examples
43/// ```
44/// use vers_vecs::{BitVec, WaveletMatrix};
45///
46/// // pack elements from a 3-bit alphabet into a bit vector and construct a wavelet matrix from them
47/// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
48/// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
49///
50/// // query the wavelet matrix
51/// assert_eq!(wavelet_matrix.get_u64(0), Some(1));
52/// assert_eq!(wavelet_matrix.get_u64(1), Some(4));
53///
54// // rank and select queries
55/// assert_eq!(wavelet_matrix.rank_u64(3, 4), Some(2));
56/// assert_eq!(wavelet_matrix.rank_u64(3, 1), Some(1));
57/// assert_eq!(wavelet_matrix.select_u64(0, 7), Some(5));
58///
59/// // statistics
60/// assert_eq!(wavelet_matrix.range_median_u64(0..3), Some(4));
61/// assert_eq!(wavelet_matrix.predecessor_u64(0..6, 3), Some(2));
62/// ```
63///
64/// [`RsVec`]: RsVec
65#[derive(Clone, Debug)]
66#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
67pub struct WaveletMatrix {
68    data: Box<[RsVec]>,
69}
70
71impl WaveletMatrix {
72    /// Generic constructor that constructs the wavelet matrix by repeatedly sorting the elements.
73    /// The runtime complexity is `O(kn * log n)`.
74    ///
75    /// # Parameters
76    /// - `bits_per_element`: The number of bits in each word. Cannot exceed 1 << 16.
77    /// - `num_elements`: The number of elements in the sequence.
78    /// - `bit_lookup`: A closure that returns the `bit`-th bit of the `element`-th word.
79    #[inline(always)] // should get rid of closures in favor of static calls
80    fn permutation_sorting<LOOKUP: Fn(usize, usize) -> u64>(
81        bits_per_element: u16,
82        num_elements: usize,
83        bit_lookup: LOOKUP,
84    ) -> Self {
85        let element_len = bits_per_element as usize;
86
87        let mut data = vec![BitVec::from_zeros(num_elements); element_len];
88
89        // insert the first bit of each word into the first bit vector
90        // for each following level, insert the next bit of each word into the next bit vector
91        // sorted stably by the previous bit vector
92        let mut permutation = (0..num_elements).collect::<Vec<_>>();
93        let mut next_permutation = vec![0; num_elements];
94
95        for (level, data) in data.iter_mut().enumerate() {
96            let mut total_zeros = 0;
97            for (i, p) in permutation.iter().enumerate() {
98                if bit_lookup(*p, element_len - level - 1) == 0 {
99                    total_zeros += 1;
100                } else {
101                    data.set(i, 1).unwrap();
102                }
103            }
104
105            // scan through the generated bit array and move the elements to the correct position
106            // for the next permutation
107            if level < element_len - 1 {
108                let mut zero_boundary = 0;
109                let mut one_boundary = total_zeros;
110                for (i, p) in permutation.iter().enumerate() {
111                    if data.get_unchecked(i) == 0 {
112                        next_permutation[zero_boundary] = *p;
113                        zero_boundary += 1;
114                    } else {
115                        next_permutation[one_boundary] = *p;
116                        one_boundary += 1;
117                    }
118                }
119
120                mem::swap(&mut permutation, &mut next_permutation);
121            }
122        }
123
124        Self {
125            data: data.into_iter().map(BitVec::into).collect(),
126        }
127    }
128
129    /// Create a new wavelet matrix from a sequence of `n` `k`-bit words.
130    /// The constructor runs in `O(kn * log n)` time complexity.
131    ///
132    /// # Parameters
133    /// - `bit_vec`: A packed sequence of `n` `k`-bit words. The `i`-th word begins in the
134    ///   `bits_per_element * i`-th bit of the bit vector. Words are stored from least to most
135    ///   significant bit.
136    /// - `bits_per_element`: The number `k` of bits in each word. Cannot exceed 1 << 16.
137    ///
138    /// # Panics
139    /// Panics if the number of bits in the bit vector is not a multiple of the number of bits per element.
140    #[must_use]
141    pub fn from_bit_vec(bit_vec: &BitVec, bits_per_element: u16) -> Self {
142        assert_eq!(bit_vec.len() % bits_per_element as usize, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element.");
143        let num_elements = bit_vec.len() / bits_per_element as usize;
144        Self::permutation_sorting(bits_per_element, num_elements, |element, bit| {
145            bit_vec.get_unchecked(element * bits_per_element as usize + bit)
146        })
147    }
148
149    /// Create a new wavelet matrix from a sequence of `n` `k`-bit words.
150    /// The constructor runs in `O(kn * log n)` time complexity.
151    ///
152    /// # Parameters
153    /// - `sequence`: A slice of `n` u64 values, each encoding a `k`-bit word.
154    /// - `bits_per_element`: The number `k` of bits in each word. Cannot exceed 64.
155    ///
156    /// # Panics
157    /// Panics if the number of bits per element exceeds 64.
158    #[must_use]
159    pub fn from_slice(sequence: &[u64], bits_per_element: u16) -> Self {
160        assert!(
161            bits_per_element <= 64,
162            "The number of bits per element cannot exceed 64."
163        );
164        Self::permutation_sorting(bits_per_element, sequence.len(), |element, bit| {
165            (sequence[element] >> bit) & 1
166        })
167    }
168
169    /// Generic constructor that constructs the wavelet matrix by counting the prefixes of the elements.
170    /// The runtime complexity is `O(kn)`.
171    /// This constructor is only recommended for small alphabets.
172    ///
173    /// # Parameters
174    /// - `bits_per_element`: The number of bits in each word. Cannot exceed 64.
175    /// - `num_elements`: The number of elements in the sequence.
176    /// - `bit_lookup`: A closure that returns the `bit`-th bit of the `element`-th word.
177    /// - `element_lookup`: A closure that returns the `element`-th word.
178    #[inline(always)] // should get rid of closures in favor of static calls
179    fn prefix_counting<LOOKUP: Fn(usize, usize) -> u64, ELEMENT: Fn(usize) -> u64>(
180        bits_per_element: u16,
181        num_elements: usize,
182        bit_lookup: LOOKUP,
183        element_lookup: ELEMENT,
184    ) -> Self {
185        let element_len = bits_per_element as usize;
186        let mut histogram = vec![0usize; 1 << bits_per_element];
187        let mut borders = vec![0usize; 1 << bits_per_element];
188        let mut data = vec![BitVec::from_zeros(num_elements); element_len];
189
190        for i in 0..num_elements {
191            histogram[element_lookup(i) as usize] += 1;
192            data[0].set_unchecked(i, bit_lookup(i, element_len - 1));
193        }
194
195        for level in (1..element_len).rev() {
196            // combine histograms of prefixes
197            for h in 0..1 << level {
198                histogram[h] = histogram[2 * h] + histogram[2 * h + 1];
199            }
200
201            // compute borders of current level, using bit reverse patterns because of the weird
202            // node ordering in wavelet matrices
203            borders[0] = 0;
204            for h in 1usize..1 << level {
205                let h_minus_1 = (h - 1).reverse_bits() >> (64 - level);
206                borders[h.reverse_bits() >> (64 - level)] =
207                    borders[h_minus_1] + histogram[h_minus_1];
208            }
209
210            for i in 0..num_elements {
211                let bit = bit_lookup(i, element_len - level - 1);
212                data[level].set_unchecked(
213                    borders[element_lookup(i) as usize >> (element_len - level)],
214                    bit,
215                );
216                borders[element_lookup(i) as usize >> (element_len - level)] += 1;
217            }
218        }
219
220        Self {
221            data: data.into_iter().map(BitVec::into).collect(),
222        }
223    }
224
225    /// Create a new wavelet matrix from a sequence of `n` `k`-bit words using the prefix counting
226    /// algorithm [Dinklage et al.](https://doi.org/10.1145/3457197)
227    /// The constructor runs in `O(kn)` time complexity but requires `O(2^k)` space during construction,
228    /// so it is only recommended for small alphabets.
229    /// Use the [`from_bit_vec`] or [`from_slice`] constructors for larger alphabets.
230    ///
231    /// # Parameters
232    /// - `bit_vec`: A packed sequence of `n` `k`-bit words. The `i`-th word begins in the
233    ///   `bits_per_element * i`-th bit of the bit vector. Words are stored from least to most
234    ///   significant bit.
235    /// - `bits_per_element`: The number `k` of bits in each word. Cannot exceed 1 << 16.
236    ///
237    /// # Panics
238    /// Panics if the number of bits in the bit vector is not a multiple of the number of bits per element,
239    /// or if the number of bits per element exceeds 64.
240    ///
241    /// [`from_bit_vec`]: WaveletMatrix::from_bit_vec
242    /// [`from_slice`]: WaveletMatrix::from_slice
243    #[must_use]
244    pub fn from_bit_vec_pc(bit_vec: &BitVec, bits_per_element: u16) -> Self {
245        assert_eq!(bit_vec.len() % bits_per_element as usize, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element.");
246        assert!(
247            bits_per_element <= 64,
248            "The number of bits per element cannot exceed 64."
249        );
250        let num_elements = bit_vec.len() / bits_per_element as usize;
251        Self::prefix_counting(
252            bits_per_element,
253            num_elements,
254            |element, bit| bit_vec.get_unchecked(element * bits_per_element as usize + bit),
255            |element| {
256                bit_vec.get_bits_unchecked(
257                    element * bits_per_element as usize,
258                    bits_per_element as usize,
259                )
260            },
261        )
262    }
263
264    /// Create a new wavelet matrix from a sequence of `n` `k`-bit words using the prefix counting
265    /// algorithm [Dinklage et al.](https://doi.org/10.1145/3457197)
266    /// The constructor runs in `O(kn)` time complexity but requires `O(2^k)` space during construction,
267    /// so it is only recommended for small alphabets.
268    /// Use the [`from_bit_vec`] or [`from_slice`] constructors for larger alphabets.
269    ///
270    /// # Parameters
271    /// - `sequence`: A slice of `n` u64 values, each encoding a `k`-bit word.
272    /// - `bits_per_element`: The number `k` of bits in each word. Cannot exceed 64.
273    ///
274    /// # Panics
275    /// Panics if the number of bits per element exceeds 64.
276    ///
277    /// [`from_bit_vec`]: WaveletMatrix::from_bit_vec
278    /// [`from_slice`]: WaveletMatrix::from_slice
279    #[must_use]
280    pub fn from_slice_pc(sequence: &[u64], bits_per_element: u16) -> Self {
281        assert!(
282            bits_per_element <= 64,
283            "The number of bits per element cannot exceed 64."
284        );
285        Self::prefix_counting(
286            bits_per_element,
287            sequence.len(),
288            |element, bit| (sequence[element] >> bit) & 1,
289            |element| sequence[element],
290        )
291    }
292
293    /// Generic function to read a value from the wavelet matrix and consume it with a closure.
294    /// The function is used by the `get_value` and `get_u64` functions, deduplicating code.
295    #[inline(always)]
296    fn reconstruct_value_unchecked<F: FnMut(u64)>(&self, mut i: usize, mut target_func: F) {
297        for level in 0..self.bits_per_element() {
298            let bit = self.data[level].get_unchecked(i);
299            target_func(bit);
300            if bit == 0 {
301                i = self.data[level].rank0(i);
302            } else {
303                i = self.data[level].rank0 + self.data[level].rank1(i);
304            }
305        }
306    }
307
308    /// Get the `i`-th element of the encoded sequence in a `k`-bit word.
309    /// The `k`-bit word is returned as a `BitVec`.
310    /// The first element of the bit vector is the least significant bit.
311    ///
312    /// Returns `None` if the index is out of bounds.
313    ///
314    /// # Example
315    /// ```
316    /// use vers_vecs::{BitVec, WaveletMatrix};
317    ///
318    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
319    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
320    ///
321    /// assert_eq!(wavelet_matrix.get_value(0), Some(BitVec::pack_sequence_u8(&[1], 3)));
322    /// assert_eq!(wavelet_matrix.get_value(1), Some(BitVec::pack_sequence_u8(&[4], 3)));
323    /// assert_eq!(wavelet_matrix.get_value(100), None);
324    /// ```
325    #[must_use]
326    pub fn get_value(&self, i: usize) -> Option<BitVec> {
327        if self.data.is_empty() || i >= self.data[0].len() {
328            None
329        } else {
330            Some(self.get_value_unchecked(i))
331        }
332    }
333
334    /// Get the `i`-th element of the encoded sequence in a `k`-bit word.
335    /// The `k`-bit word is returned as a `BitVec`.
336    /// The first element of the bit vector is the least significant bit.
337    /// This function does not perform bounds checking.
338    /// Use [`get_value`] for a checked version.
339    ///
340    /// # Panics
341    /// May panic if the index is out of bounds. May instead return an empty bit vector.
342    ///
343    /// [`get_value`]: WaveletMatrix::get_value
344    #[must_use]
345    pub fn get_value_unchecked(&self, i: usize) -> BitVec {
346        let mut value = BitVec::from_zeros(self.bits_per_element());
347        let mut level = self.bits_per_element() - 1;
348        self.reconstruct_value_unchecked(i, |bit| {
349            value.set_unchecked(level, bit);
350            level = level.saturating_sub(1);
351        });
352        value
353    }
354
355    /// Get the `i`-th element of the encoded sequence as a `u64`.
356    /// The `u64` is constructed from the `k`-bit word stored in the wavelet matrix.
357    ///
358    /// Returns `None` if the index is out of bounds, or if the number of bits per element exceeds 64.
359    ///
360    /// # Example
361    /// ```
362    /// use vers_vecs::{BitVec, WaveletMatrix};
363    ///
364    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
365    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
366    ///
367    /// assert_eq!(wavelet_matrix.get_u64(0), Some(1));
368    /// assert_eq!(wavelet_matrix.get_u64(1), Some(4));
369    /// assert_eq!(wavelet_matrix.get_u64(100), None);
370    /// ```
371    #[must_use]
372    pub fn get_u64(&self, i: usize) -> Option<u64> {
373        if self.bits_per_element() > 64 || self.data.is_empty() || i >= self.data[0].len() {
374            None
375        } else {
376            Some(self.get_u64_unchecked(i))
377        }
378    }
379
380    /// Get the `i`-th element of the encoded sequence as a `u64` numeral.
381    /// The element is encoded in the lowest `k` bits of the numeral.
382    /// If the number of bits per element exceeds 64, the value is truncated.
383    /// This function does not perform bounds checking.
384    /// Use [`get_u64`] for a checked version.
385    ///
386    /// # Panic
387    /// May panic if the value of `i` is out of bounds. May instead return 0.
388    ///
389    /// [`get_u64`]: WaveletMatrix::get_u64
390    #[must_use]
391    pub fn get_u64_unchecked(&self, i: usize) -> u64 {
392        let mut value = 0;
393        self.reconstruct_value_unchecked(i, |bit| {
394            value <<= 1;
395            value |= bit;
396        });
397        value
398    }
399
400    /// Get the number of occurrences of the given `symbol` in the encoded sequence in the `range`.
401    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
402    /// where the least significant bit is the first element, and `k` is the number of bits per element
403    /// in the wavelet matrix.
404    ///
405    /// This method does not perform bounds checking, nor does it check if the `symbol` is a valid
406    /// `k`-bit word.
407    /// Use [`rank_range`] for a checked version.
408    ///
409    /// # Panics
410    /// May panic if the `range` is out of bounds,
411    /// or if the number of bits in `symbol` is lower than `k`.
412    /// May instead return 0.
413    ///
414    /// [`BitVec`]: BitVec
415    /// [`rank_range`]: WaveletMatrix::rank_range
416    #[must_use]
417    pub fn rank_range_unchecked(&self, mut range: Range<usize>, symbol: &BitVec) -> usize {
418        for (level, data) in self.data.iter().enumerate() {
419            if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 {
420                range.start = data.rank0(range.start);
421                range.end = data.rank0(range.end);
422            } else {
423                range.start = data.rank0 + data.rank1(range.start);
424                range.end = data.rank0 + data.rank1(range.end);
425            }
426        }
427
428        range.end - range.start
429    }
430
431    /// Get the number of occurrences of the given `symbol` in the encoded sequence in the `range`.
432    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
433    /// where the least significant bit is the first element, and `k` is the number of bits per element
434    /// in the wavelet matrix.
435    ///
436    /// Returns `None` if the `range` is out of bounds (greater than the length of the encoded sequence,
437    /// but since it is exclusive, it may be equal to the length),
438    /// or if the number of bits in `symbol` is not equal to `k`.
439    ///
440    /// # Example
441    /// ```
442    /// use vers_vecs::{BitVec, WaveletMatrix};
443    ///
444    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
445    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
446    ///
447    /// assert_eq!(wavelet_matrix.rank_range(0..3, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
448    /// assert_eq!(wavelet_matrix.rank_range(2..4, &BitVec::pack_sequence_u8(&[4], 3)), Some(1));
449    /// ```
450    ///
451    /// [`BitVec`]: BitVec
452    #[must_use]
453    pub fn rank_range(&self, range: Range<usize>, symbol: &BitVec) -> Option<usize> {
454        if range.start >= self.len()
455            || range.end > self.len()
456            || symbol.len() != self.bits_per_element()
457        {
458            None
459        } else {
460            Some(self.rank_range_unchecked(range, symbol))
461        }
462    }
463
464    /// Get the number of occurrences of the given `symbol` in the encoded sequence in the `range`.
465    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
466    /// where k is less than or equal to 64.
467    /// The interval is half-open, meaning `rank_range_u64(0..0, symbol)` returns 0.
468    ///
469    /// This method does not perform bounds checking, nor does it check if the elements of the
470    /// wavelet matrix can be represented in a u64 numeral.
471    /// Use [`rank_range_u64`] for a checked version.
472    ///
473    /// # Panics
474    /// May panic if the `range` is out of bounds.
475    /// May instead return 0.
476    /// If the number of bits in wavelet matrix elements exceed `64`, the behavior is
477    /// platform-dependent.
478    ///
479    /// [`rank_range_u64`]: WaveletMatrix::rank_range_u64
480    #[must_use]
481    pub fn rank_range_u64_unchecked(&self, mut range: Range<usize>, symbol: u64) -> usize {
482        for (level, data) in self.data.iter().enumerate() {
483            if (symbol >> ((self.bits_per_element() - 1) - level)) & 1 == 0 {
484                range.start = data.rank0(range.start);
485                range.end = data.rank0(range.end);
486            } else {
487                range.start = data.rank0 + data.rank1(range.start);
488                range.end = data.rank0 + data.rank1(range.end);
489            }
490        }
491
492        range.end - range.start
493    }
494
495    /// Get the number of occurrences of the given `symbol` in the encoded sequence in the `range`.
496    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
497    /// where k is less than or equal to 64.
498    /// The interval is half-open, meaning `rank_range_u64(0..0, symbol)` returns 0.
499    ///
500    /// Returns `None` if the `range` is out of bounds (greater than the length of the encoded sequence,
501    /// but since it is exclusive, it may be equal to the length),
502    /// or if the number of bits in the wavelet matrix elements exceed `64`.
503    ///
504    /// # Example
505    /// ```
506    /// use vers_vecs::{BitVec, WaveletMatrix};
507    ///
508    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
509    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
510    ///
511    /// assert_eq!(wavelet_matrix.rank_range_u64(0..3, 4), Some(2));
512    /// assert_eq!(wavelet_matrix.rank_range_u64(2..4, 4), Some(1));
513    /// ```
514    #[must_use]
515    pub fn rank_range_u64(&self, range: Range<usize>, symbol: u64) -> Option<usize> {
516        if range.start >= self.len() || range.end > self.len() || self.bits_per_element() > 64 {
517            None
518        } else {
519            Some(self.rank_range_u64_unchecked(range, symbol))
520        }
521    }
522
523    /// Get the number of occurrences of the given `symbol` in the encoded sequence between the
524    /// `offset`-th and `i`-th element (exclusive).
525    /// This is equivalent to ```rank_range_unchecked(offset..i, symbol)```.
526    /// The interval is half-open, meaning ```rank_offset_unchecked(0, 0, symbol)``` returns 0.
527    ///
528    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
529    /// where the least significant bit is the first element, and `k` is the number of bits per element
530    /// in the wavelet matrix.
531    ///
532    /// This method does not perform bounds checking, nor does it check if the `symbol` is a valid
533    /// `k`-bit word.
534    /// Use [`rank_offset`] for a checked version.
535    ///
536    /// # Panics
537    /// May panic if `offset` is out of bounds,
538    /// or if `offset + i` is larger than the length of the encoded sequence,
539    /// or if `offset` is greater than `i`,
540    /// or if the number of bits in `symbol` is lower than `k`.
541    /// May instead return 0.
542    ///
543    /// [`BitVec`]: BitVec
544    /// [`rank_offset`]: WaveletMatrix::rank_offset
545    #[must_use]
546    pub fn rank_offset_unchecked(&self, offset: usize, i: usize, symbol: &BitVec) -> usize {
547        self.rank_range_unchecked(offset..i, symbol)
548    }
549
550    /// Get the number of occurrences of the given `symbol` in the encoded sequence between the
551    /// `offset`-th and `i`-th element (exclusive).
552    /// This is equivalent to ``rank_range(offset..i, symbol)``.
553    /// The interval is half-open, meaning ``rank_offset(0, 0, symbol)`` returns 0,
554    /// because the interval is empty.
555    ///
556    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
557    /// where the least significant bit is the first element, and `k` is the number of bits per element
558    /// in the wavelet matrix.
559    ///
560    /// Returns `None` if `offset` is out of bounds,
561    /// or if `i` is larger than the length of the encoded sequence,
562    /// or if `offset` is greater than `i`,
563    /// or if the number of bits in `symbol` is not equal to `k`.
564    /// `i` may equal the length of the encoded sequence,
565    /// which will return the number of occurrences of the symbol up to the end of the sequence.
566    ///
567    /// # Example
568    /// ```
569    /// use vers_vecs::{BitVec, WaveletMatrix};
570    ///
571    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
572    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
573    ///
574    /// assert_eq!(wavelet_matrix.rank_offset(0, 3, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
575    /// assert_eq!(wavelet_matrix.rank_offset(2, 4, &BitVec::pack_sequence_u8(&[4], 3)), Some(1));
576    /// ```
577    ///
578    /// [`BitVec`]: BitVec
579    #[must_use]
580    pub fn rank_offset(&self, offset: usize, i: usize, symbol: &BitVec) -> Option<usize> {
581        if offset > i
582            || offset >= self.len()
583            || i > self.len()
584            || symbol.len() != self.bits_per_element()
585        {
586            None
587        } else {
588            Some(self.rank_offset_unchecked(offset, i, symbol))
589        }
590    }
591
592    /// Get the number of occurrences of the given `symbol` in the encoded sequence between the
593    /// `offset`-th and `i`-th element (exclusive).
594    /// This is equivalent to ``rank_range(offset..i, symbol)``.
595    /// The interval is half-open, meaning ``rank_offset(0, 0, symbol)`` returns 0.
596    ///
597    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
598    /// where k is less than or equal to 64.
599    ///
600    /// This method does not perform bounds checking, nor does it check if the elements of the
601    /// wavelet matrix can be represented in a u64 numeral.
602    /// Use [`rank_offset_u64`] for a checked version.
603    ///
604    /// # Panics
605    /// May panic if `offset` is out of bounds,
606    /// or if `i` is larger than the length of the encoded sequence,
607    /// or if `offset` is greater than `i`,
608    /// or if the number of bits in wavelet matrix elements exceed `64`.
609    /// May instead return 0.
610    ///
611    /// [`rank_offset_u64`]: WaveletMatrix::rank_offset_u64
612    #[must_use]
613    pub fn rank_offset_u64_unchecked(&self, offset: usize, i: usize, symbol: u64) -> usize {
614        self.rank_range_u64_unchecked(offset..i, symbol)
615    }
616
617    /// Get the number of occurrences of the given `symbol` in the encoded sequence between the
618    /// `offset`-th and `i`-th element (exclusive).
619    /// This is equivalent to ``rank_range(offset..i, symbol)``.
620    /// The interval is half-open, meaning ``rank_offset(0, 0, symbol)`` returns 0.
621    ///
622    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
623    /// where k is less than or equal to 64.
624    ///
625    /// Returns `None` if `offset` is out of bounds,
626    /// or if `i` is larger than the length of the encoded sequence,
627    /// or if `offset` is greater than `i`,
628    /// or if the number of bits in the wavelet matrix elements exceed `64`.
629    /// `i` may equal the length of the encoded sequence,
630    /// which will return the number of occurrences of the symbol up to the end of the sequence.
631    ///
632    /// # Example
633    /// ```
634    /// use vers_vecs::{BitVec, WaveletMatrix};
635    ///
636    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
637    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
638    ///
639    /// assert_eq!(wavelet_matrix.rank_offset_u64(0, 3, 4), Some(2));
640    /// assert_eq!(wavelet_matrix.rank_offset_u64(2, 4, 4), Some(1));
641    /// ```
642    #[must_use]
643    pub fn rank_offset_u64(&self, offset: usize, i: usize, symbol: u64) -> Option<usize> {
644        if offset > i || offset >= self.len() || i > self.len() || self.bits_per_element() > 64 {
645            None
646        } else {
647            Some(self.rank_offset_u64_unchecked(offset, i, symbol))
648        }
649    }
650
651    /// Get the number of occurrences of the given `symbol` in the encoded sequence up to the `i`-th
652    /// element (exclusive).
653    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
654    /// where the least significant bit is the first element, and `k` is the number of bits per element
655    /// in the wavelet matrix.
656    ///
657    /// This method does not perform bounds checking, nor does it check if the `symbol` is a valid
658    /// `k`-bit word.
659    /// Use [`rank`] for a checked version.
660    ///
661    /// # Panics
662    /// May panic if `i` is out of bounds, or if the number of bits in `symbol` is lower than `k`.
663    /// May instead return 0.
664    /// If the number of bits in `symbol` exceeds `k`, the remaining bits are ignored.
665    ///
666    /// [`BitVec`]: BitVec
667    /// [`rank`]: WaveletMatrix::rank
668    #[must_use]
669    pub fn rank_unchecked(&self, i: usize, symbol: &BitVec) -> usize {
670        self.rank_range_unchecked(0..i, symbol)
671    }
672
673    /// Get the number of occurrences of the given `symbol` in the encoded sequence up to the `i`-th
674    /// element (exclusive).
675    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
676    /// where the least significant bit is the first element, and `k` is the number of bits per element
677    /// in the wavelet matrix.
678    ///
679    /// Returns `None` if `i` is out of bounds (greater than the length of the encoded sequence, but
680    /// since it is exclusive, it may be equal to the length),
681    /// or if the number of bits in `symbol` is not equal to `k`.
682    ///
683    /// # Example
684    /// ```
685    /// use vers_vecs::{BitVec, WaveletMatrix};
686    ///
687    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
688    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
689    ///
690    /// assert_eq!(wavelet_matrix.rank(3, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
691    /// assert_eq!(wavelet_matrix.rank(3, &BitVec::pack_sequence_u8(&[1], 3)), Some(1));
692    /// ```
693    ///
694    /// [`BitVec`]: BitVec
695    #[must_use]
696    pub fn rank(&self, i: usize, symbol: &BitVec) -> Option<usize> {
697        if i > self.len() || symbol.len() != self.bits_per_element() {
698            None
699        } else {
700            Some(self.rank_range_unchecked(0..i, symbol))
701        }
702    }
703
704    /// Get the number of occurrences of the given `symbol` in the encoded sequence up to the `i`-th
705    /// element (exclusive).
706    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
707    /// where k is less than or equal to 64.
708    ///
709    /// This method does not perform bounds checking, nor does it check if the elements of the
710    /// wavelet matrix can be represented in a u64 numeral.
711    /// Use [`rank_u64`] for a checked version.
712    ///
713    /// # Panics
714    /// May panic if `i` is out of bounds,
715    /// or if the number of bits in wavelet matrix elements exceed `64`.
716    /// May instead return 0.
717    ///
718    /// [`rank_u64`]: WaveletMatrix::rank_u64
719    #[must_use]
720    pub fn rank_u64_unchecked(&self, i: usize, symbol: u64) -> usize {
721        self.rank_range_u64_unchecked(0..i, symbol)
722    }
723
724    /// Get the number of occurrences of the given `symbol` in the encoded sequence up to the `i`-th
725    /// element (exclusive).
726    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
727    /// where k is less than or equal to 64.
728    ///
729    /// Returns `None` if `i` is out of bounds (greater than the length of the encoded sequence, but
730    /// since it is exclusive, it may be equal to the length),
731    /// or if the number of bits in the wavelet matrix elements exceed `64`.
732    ///
733    /// # Example
734    /// ```
735    /// use vers_vecs::{BitVec, WaveletMatrix};
736    ///
737    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
738    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
739    ///
740    /// assert_eq!(wavelet_matrix.rank_u64(3, 4), Some(2));
741    /// assert_eq!(wavelet_matrix.rank_u64(3, 1), Some(1));
742    /// ```
743    #[must_use]
744    pub fn rank_u64(&self, i: usize, symbol: u64) -> Option<usize> {
745        if i > self.len() || self.bits_per_element() > 64 {
746            None
747        } else {
748            Some(self.rank_range_u64_unchecked(0..i, symbol))
749        }
750    }
751
752    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence,
753    /// starting from the `offset`-th element.
754    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
755    /// where the least significant bit is the first element, and `k` is the number of bits per element
756    /// in the wavelet matrix.
757    ///
758    /// This method does not perform bounds checking, nor does it check if the `symbol` is a valid
759    /// `k`-bit word.
760    /// Use [`select_offset`] for a checked version.
761    ///
762    /// Returns the index of the `rank`-th occurrence of the `symbol` in the encoded sequence,
763    /// or the length of the encoded sequence if the `rank`-th occurrence does not exist.
764    ///
765    /// # Panics
766    /// May panic if the `offset` is out of bounds,
767    /// or if the number of bits in `symbol` is lower than `k`.
768    /// May instead return the length of the encoded sequence.
769    ///
770    /// [`BitVec`]: BitVec
771    /// [`select_offset`]: WaveletMatrix::select_offset
772    #[must_use]
773    pub fn select_offset_unchecked(&self, offset: usize, rank: usize, symbol: &BitVec) -> usize {
774        let mut range_start = offset;
775
776        for (level, data) in self.data.iter().enumerate() {
777            if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 {
778                range_start = data.rank0(range_start);
779            } else {
780                range_start = data.rank0 + data.rank1(range_start);
781            }
782        }
783
784        let mut range_end = range_start + rank;
785
786        for (level, data) in self.data.iter().enumerate().rev() {
787            if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 {
788                range_end = data.select0(range_end);
789            } else {
790                range_end = data.select1(range_end - data.rank0);
791            }
792        }
793
794        range_end
795    }
796
797    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence,
798    /// starting from the `offset`-th element.
799    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
800    /// where the least significant bit is the first element, and `k` is the number of bits per element
801    /// in the wavelet matrix.
802    ///
803    /// Returns `None` if `offset` is out of bounds, or if the number of bits in `symbol` is not equal to `k`,
804    /// or if the `rank`-th occurrence of the `symbol` does not exist.
805    ///
806    /// # Example
807    /// ```
808    /// use vers_vecs::{BitVec, WaveletMatrix};
809    ///
810    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
811    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
812    ///
813    /// assert_eq!(wavelet_matrix.select_offset(0, 0, &BitVec::pack_sequence_u8(&[4], 3)), Some(1));
814    /// assert_eq!(wavelet_matrix.select_offset(0, 1, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
815    /// assert_eq!(wavelet_matrix.select_offset(2, 0, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
816    /// assert_eq!(wavelet_matrix.select_offset(2, 1, &BitVec::pack_sequence_u8(&[4], 3)), None);
817    /// ```
818    ///
819    /// [`BitVec`]: BitVec
820    #[must_use]
821    pub fn select_offset(&self, offset: usize, rank: usize, symbol: &BitVec) -> Option<usize> {
822        if offset >= self.len() || symbol.len() != self.bits_per_element() {
823            None
824        } else {
825            let idx = self.select_offset_unchecked(offset, rank, symbol);
826            if idx < self.len() {
827                Some(idx)
828            } else {
829                None
830            }
831        }
832    }
833
834    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence,
835    /// starting from the `offset`-th element.
836    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
837    /// where k is less than or equal to 64.
838    ///
839    /// This method does not perform bounds checking, nor does it check if the elements of the
840    /// wavelet matrix can be represented in a u64 numeral.
841    /// Use [`select_offset_u64`] for a checked version.
842    ///
843    /// Returns the index of the `rank`-th occurrence of the `symbol` in the encoded sequence,
844    /// or the length of the encoded sequence if the `rank`-th occurrence does not exist.
845    ///
846    /// # Panics
847    /// May panic if the `offset` is out of bounds,
848    /// or if the number of bits in wavelet matrix elements exceed `64`.
849    /// May instead return the length of the encoded sequence.
850    ///
851    /// [`select_offset_u64`]: WaveletMatrix::select_offset_u64
852    #[must_use]
853    pub fn select_offset_u64_unchecked(&self, offset: usize, rank: usize, symbol: u64) -> usize {
854        let mut range_start = offset;
855
856        for (level, data) in self.data.iter().enumerate() {
857            if (symbol >> ((self.bits_per_element() - 1) - level)) & 1 == 0 {
858                range_start = data.rank0(range_start);
859            } else {
860                range_start = data.rank0 + data.rank1(range_start);
861            }
862        }
863
864        let mut range_end = range_start + rank;
865
866        for (level, data) in self.data.iter().enumerate().rev() {
867            if (symbol >> ((self.bits_per_element() - 1) - level)) & 1 == 0 {
868                range_end = data.select0(range_end);
869            } else {
870                range_end = data.select1(range_end - data.rank0);
871            }
872        }
873
874        range_end
875    }
876
877    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence,
878    /// starting from the `offset`-th element.
879    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
880    /// where k is less than or equal to 64.
881    ///
882    /// Returns `None` if `offset` is out of bounds, or if the number of bits in the wavelet matrix
883    /// elements exceed `64`, or if the `rank`-th occurrence of the `symbol` does not exist.
884    ///
885    /// # Example
886    /// ```
887    /// use vers_vecs::{BitVec, WaveletMatrix};
888    ///
889    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
890    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
891    ///
892    /// assert_eq!(wavelet_matrix.select_offset_u64(0, 0, 4), Some(1));
893    /// assert_eq!(wavelet_matrix.select_offset_u64(0, 1, 4), Some(2));
894    /// assert_eq!(wavelet_matrix.select_offset_u64(2, 0, 4), Some(2));
895    /// assert_eq!(wavelet_matrix.select_offset_u64(2, 1, 4), None);
896    /// ```
897    #[must_use]
898    pub fn select_offset_u64(&self, offset: usize, rank: usize, symbol: u64) -> Option<usize> {
899        if offset >= self.len() || self.bits_per_element() > 64 {
900            None
901        } else {
902            let idx = self.select_offset_u64_unchecked(offset, rank, symbol);
903            if idx < self.len() {
904                Some(idx)
905            } else {
906                None
907            }
908        }
909    }
910
911    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence.
912    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
913    /// where the least significant bit is the first element, and `k` is the number of bits per element
914    /// in the wavelet matrix.
915    ///
916    /// This method does not perform bounds checking, nor does it check if the `symbol` is a valid
917    /// `k`-bit word.
918    /// Use [`select`] for a checked version.
919    ///
920    /// Returns the index of the `rank`-th occurrence of the `symbol` in the encoded sequence,
921    /// or the length of the encoded sequence if the `rank`-th occurrence does not exist.
922    ///
923    /// # Panics
924    /// May panic if the number of bits in `symbol` is not equal to `k`.
925    /// May instead return the length of the encoded sequence.
926    ///
927    /// [`BitVec`]: BitVec
928    /// [`select`]: WaveletMatrix::select
929    #[must_use]
930    pub fn select_unchecked(&self, rank: usize, symbol: &BitVec) -> usize {
931        self.select_offset_unchecked(0, rank, symbol)
932    }
933
934    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence.
935    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
936    /// where the least significant bit is the first element, and `k` is the number of bits per element
937    /// in the wavelet matrix.
938    ///
939    /// Returns `None` if the number of bits in `symbol` is not equal to `k`,
940    /// or if the `rank`-th occurrence of the `symbol` does not exist.
941    ///
942    /// # Example
943    /// ```
944    /// use vers_vecs::{BitVec, WaveletMatrix};
945    ///
946    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
947    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
948    ///
949    /// assert_eq!(wavelet_matrix.select(0, &BitVec::pack_sequence_u8(&[4], 3)), Some(1));
950    /// assert_eq!(wavelet_matrix.select(1, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
951    /// ```
952    ///
953    /// [`BitVec`]: BitVec
954    #[must_use]
955    pub fn select(&self, rank: usize, symbol: &BitVec) -> Option<usize> {
956        if symbol.len() == self.bits_per_element() {
957            let idx = self.select_unchecked(rank, symbol);
958            if idx < self.len() {
959                Some(idx)
960            } else {
961                None
962            }
963        } else {
964            None
965        }
966    }
967
968    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence.
969    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
970    /// where k is less than or equal to 64.
971    ///
972    /// This method does not perform bounds checking, nor does it check if the elements of the
973    /// wavelet matrix can be represented in a u64 numeral.
974    /// Use [`select_u64`] for a checked version.
975    ///
976    /// Returns the index of the `rank`-th occurrence of the `symbol` in the encoded sequence,
977    /// or the length of the encoded sequence if the `rank`-th occurrence does not exist.
978    ///
979    /// # Panics
980    /// May panic if the number of bits in wavelet matrix elements exceed `64`.
981    /// May instead return the length of the encoded sequence.
982    ///
983    /// [`select_u64`]: WaveletMatrix::select_u64
984    #[must_use]
985    pub fn select_u64_unchecked(&self, rank: usize, symbol: u64) -> usize {
986        self.select_offset_u64_unchecked(0, rank, symbol)
987    }
988
989    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence.
990    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
991    /// where k is less than or equal to 64.
992    ///
993    /// Returns `None` if the number of bits in the wavelet matrix elements exceed `64`,
994    /// or if the `rank`-th occurrence of the `symbol` does not exist.
995    ///
996    /// # Example
997    /// ```
998    /// use vers_vecs::{BitVec, WaveletMatrix};
999    ///
1000    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1001    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1002    ///
1003    /// assert_eq!(wavelet_matrix.select_u64(0, 4), Some(1));
1004    /// assert_eq!(wavelet_matrix.select_u64(1, 4), Some(2));
1005    /// ```
1006    #[must_use]
1007    pub fn select_u64(&self, rank: usize, symbol: u64) -> Option<usize> {
1008        if self.bits_per_element() > 64 {
1009            None
1010        } else {
1011            let idx = self.select_u64_unchecked(rank, symbol);
1012            if idx < self.len() {
1013                Some(idx)
1014            } else {
1015                None
1016            }
1017        }
1018    }
1019
1020    /// Get the `k`-th smallest element in the encoded sequence in the specified `range`,
1021    /// where `k = 0` returns the smallest element.
1022    /// The `range` is a half-open interval, meaning that the `end` index is exclusive.
1023    /// The `k`-th smallest element is returned as a `BitVec`,
1024    /// where the least significant bit is the first element.
1025    ///
1026    /// This method does not perform bounds checking.
1027    /// It returns a nonsensical result if the `k` is greater than the size of the range.
1028    /// Use [`quantile`] for a checked version.
1029    ///
1030    /// # Panics
1031    /// May panic if the `range` is out of bounds. May instead return an empty bit vector.
1032    ///
1033    /// [`quantile`]: WaveletMatrix::quantile
1034    #[must_use]
1035    pub fn quantile_unchecked(&self, range: Range<usize>, k: usize) -> BitVec {
1036        let result = BitVec::from_zeros(self.bits_per_element());
1037
1038        self.partial_quantile_search_unchecked(range, k, 0, result)
1039    }
1040
1041    /// Internal function to reuse the quantile code for the predecessor and successor search.
1042    /// This function performs the quantile search starting at the given level with the given prefix.
1043    ///
1044    /// The function does not perform any checks, so the caller must ensure that the range is valid,
1045    /// and that the prefix is a valid prefix for the given level.
1046    #[inline(always)]
1047    fn partial_quantile_search_unchecked(
1048        &self,
1049        mut range: Range<usize>,
1050        mut k: usize,
1051        start_level: usize,
1052        mut prefix: BitVec,
1053    ) -> BitVec {
1054        debug_assert!(prefix.len() == self.bits_per_element());
1055        debug_assert!(!range.is_empty());
1056        debug_assert!(range.end <= self.len());
1057
1058        for (level, data) in self.data.iter().enumerate().skip(start_level) {
1059            let zeros_start = data.rank0(range.start);
1060            let zeros_end = data.rank0(range.end);
1061            let zeros = zeros_end - zeros_start;
1062
1063            // if k < zeros, the element is among the zeros
1064            if k < zeros {
1065                range.start = zeros_start;
1066                range.end = zeros_end;
1067            } else {
1068                // the element is among the ones, so we set the bit to 1, and move the range
1069                // into the 1-partition of the next level
1070                prefix.set_unchecked((self.bits_per_element() - 1) - level, 1);
1071                k -= zeros;
1072                range.start = data.rank0 + (range.start - zeros_start); // range.start - zeros_start is the rank1 of range.start
1073                range.end = data.rank0 + (range.end - zeros_end); // same here
1074            }
1075        }
1076
1077        prefix
1078    }
1079
1080    /// Get the `k`-th smallest element in the encoded sequence in the specified `range`,
1081    /// where `k = 0` returns the smallest element.
1082    /// The `range` is a half-open interval, meaning that the `end` index is exclusive.
1083    /// The `k`-th smallest element is returned as a `BitVec`,
1084    /// where the least significant bit is the first element.
1085    ///
1086    /// Returns `None` if the `range` is out of bounds, or if `k` is greater than the size of the range.
1087    ///
1088    /// # Example
1089    /// ```
1090    /// use vers_vecs::{BitVec, WaveletMatrix};
1091    ///
1092    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1093    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1094    ///
1095    /// assert_eq!(wavelet_matrix.quantile(0..3, 0), Some(BitVec::pack_sequence_u8(&[1], 3)));
1096    /// assert_eq!(wavelet_matrix.quantile(0..3, 1), Some(BitVec::pack_sequence_u8(&[4], 3)));
1097    /// assert_eq!(wavelet_matrix.quantile(1..4, 0), Some(BitVec::pack_sequence_u8(&[1], 3)));
1098    /// ```
1099    #[must_use]
1100    pub fn quantile(&self, range: Range<usize>, k: usize) -> Option<BitVec> {
1101        if range.start >= self.len() || range.end > self.len() || k >= range.end - range.start {
1102            None
1103        } else {
1104            Some(self.quantile_unchecked(range, k))
1105        }
1106    }
1107
1108    /// Get the `i`-th smallest element in the entire wavelet matrix.
1109    /// The `i`-th smallest element is returned as a `BitVec`,
1110    /// where the least significant bit is the first element.
1111    ///
1112    /// This method does not perform bounds checking.
1113    /// Use [`get_sorted`] for a checked version.
1114    ///
1115    /// # Panics
1116    /// May panic if the `i` is out of bounds, or returns an empty bit vector.
1117    #[must_use]
1118    pub fn get_sorted_unchecked(&self, i: usize) -> BitVec {
1119        self.quantile_unchecked(0..self.len(), i)
1120    }
1121
1122    /// Get the `i`-th smallest element in the wavelet matrix.
1123    /// The `i`-th smallest element is returned as a `BitVec`,
1124    /// where the least significant bit is the first element.
1125    /// This method call is equivalent to `self.quantile(0..self.len(), i)`.
1126    ///
1127    /// Returns `None` if the `i` is out of bounds.
1128    ///
1129    /// # Example
1130    /// ```
1131    /// use vers_vecs::{BitVec, WaveletMatrix};
1132    ///
1133    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1134    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1135    ///
1136    /// assert_eq!(wavelet_matrix.get_sorted(0), Some(BitVec::pack_sequence_u8(&[1], 3)));
1137    /// assert_eq!(wavelet_matrix.get_sorted(1), Some(BitVec::pack_sequence_u8(&[1], 3)));
1138    /// assert_eq!(wavelet_matrix.get_sorted(2), Some(BitVec::pack_sequence_u8(&[2], 3)));
1139    /// ```
1140    #[must_use]
1141    pub fn get_sorted(&self, i: usize) -> Option<BitVec> {
1142        if i >= self.len() {
1143            None
1144        } else {
1145            Some(self.get_sorted_unchecked(i))
1146        }
1147    }
1148
1149    /// Get the `k`-th smallest element in the encoded sequence in the specified `range`,
1150    /// where `k = 0` returns the smallest element.
1151    /// The `range` is a half-open interval, meaning that the `end` index is exclusive.
1152    /// The `k`-th smallest element is returned as a `u64` numeral.
1153    /// If the number of bits per element exceeds 64, the value is truncated.
1154    ///
1155    /// This method does not perform bounds checking.
1156    /// It returns a nonsensical result if the `k` is greater than the size of the range.
1157    /// Use [`quantile_u64`] for a checked version.
1158    ///
1159    /// # Panics
1160    /// May panic if the `range` is out of bounds.
1161    /// May instead return 0.
1162    ///
1163    /// [`quantile_u64`]: WaveletMatrix::quantile_u64
1164    #[must_use]
1165    pub fn quantile_u64_unchecked(&self, range: Range<usize>, k: usize) -> u64 {
1166        self.partial_quantile_search_u64_unchecked(range, k, 0, 0)
1167    }
1168
1169    /// Internal function to reuse the quantile code for the predecessor and successor search.
1170    /// This function performs the quantile search starting at the given level with the given prefix.
1171    ///
1172    /// The function does not perform any checks, so the caller must ensure that the range is valid,
1173    /// and that the prefix is a valid prefix for the given level (i.e. the prefix is not shifted
1174    /// to another level).
1175    #[inline(always)]
1176    fn partial_quantile_search_u64_unchecked(
1177        &self,
1178        mut range: Range<usize>,
1179        mut k: usize,
1180        start_level: usize,
1181        mut prefix: u64,
1182    ) -> u64 {
1183        debug_assert!(self.bits_per_element() <= 64);
1184        debug_assert!(!range.is_empty());
1185        debug_assert!(range.end <= self.len());
1186
1187        for data in self.data.iter().skip(start_level) {
1188            prefix <<= 1;
1189            let zeros_start = data.rank0(range.start);
1190            let zeros_end = data.rank0(range.end);
1191            let zeros = zeros_end - zeros_start;
1192
1193            if k < zeros {
1194                range.start = zeros_start;
1195                range.end = zeros_end;
1196            } else {
1197                prefix |= 1;
1198                k -= zeros;
1199                range.start = data.rank0 + (range.start - zeros_start);
1200                range.end = data.rank0 + (range.end - zeros_end);
1201            }
1202        }
1203
1204        prefix
1205    }
1206
1207    /// Get the `k`-th smallest element in the encoded sequence in the specified `range`,
1208    /// where `k = 0` returns the smallest element.
1209    /// The `range` is a half-open interval, meaning that the `end` index is exclusive.
1210    /// The `k`-th smallest element is returned as a `u64` numeral.
1211    ///
1212    /// Returns `None` if the `range` is out of bounds, or if the number of bits per element exceeds 64,
1213    /// or if `k` is greater than the size of the range.
1214    ///
1215    /// # Example
1216    /// ```
1217    /// use vers_vecs::{BitVec, WaveletMatrix};
1218    ///
1219    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1220    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1221    ///
1222    /// assert_eq!(wavelet_matrix.quantile_u64(0..3, 0), Some(1));
1223    /// assert_eq!(wavelet_matrix.quantile_u64(0..3, 1), Some(4));
1224    /// assert_eq!(wavelet_matrix.quantile_u64(1..4, 0), Some(1));
1225    /// ```
1226    #[must_use]
1227    pub fn quantile_u64(&self, range: Range<usize>, k: usize) -> Option<u64> {
1228        if range.start >= self.len()
1229            || range.end > self.len()
1230            || self.bits_per_element() > 64
1231            || k >= range.end - range.start
1232        {
1233            None
1234        } else {
1235            Some(self.quantile_u64_unchecked(range, k))
1236        }
1237    }
1238
1239    /// Get the `i`-th smallest element in the wavelet matrix.
1240    /// The `i`-th smallest element is returned as a u64 numeral.
1241    ///
1242    /// If the number of bits per element exceeds 64, the value is truncated.
1243    ///
1244    /// This method does not perform bounds checking.
1245    /// Use [`get_sorted_u64`] for a checked version.
1246    ///
1247    /// # Panics
1248    /// May panic if the `i` is out of bounds, or returns an empty bit vector.
1249    ///
1250    /// [`get_sorted_u64`]: WaveletMatrix::get_sorted_u64
1251    #[must_use]
1252    pub fn get_sorted_u64_unchecked(&self, i: usize) -> u64 {
1253        self.quantile_u64_unchecked(0..self.len(), i)
1254    }
1255
1256    /// Get the `i`-th smallest element in the entire wavelet matrix.
1257    /// The `i`-th smallest element is returned as a u64 numeral.
1258    ///
1259    /// Returns `None` if the `i` is out of bounds, or if the number of bits per element exceeds 64.
1260    ///
1261    /// # Example
1262    /// ```
1263    /// use vers_vecs::{BitVec, WaveletMatrix};
1264    ///
1265    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1266    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1267    ///
1268    /// assert_eq!(wavelet_matrix.get_sorted_u64(0), Some(1));
1269    /// assert_eq!(wavelet_matrix.get_sorted_u64(1), Some(1));
1270    /// assert_eq!(wavelet_matrix.get_sorted_u64(2), Some(2));
1271    /// ```
1272    #[must_use]
1273    pub fn get_sorted_u64(&self, i: usize) -> Option<u64> {
1274        if i >= self.len() || self.bits_per_element() > 64 {
1275            None
1276        } else {
1277            Some(self.get_sorted_u64_unchecked(i))
1278        }
1279    }
1280
1281    /// Get the smallest element in the encoded sequence in the specified `range`.
1282    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1283    /// The smallest element is returned as a `BitVec`,
1284    ///
1285    /// This method does not perform bounds checking.
1286    /// Use [`range_min`] for a checked version.
1287    ///
1288    /// # Panics
1289    /// May panic if the `range` is out of bounds or if the range is empty.
1290    /// May instead return an empty bit vector.
1291    ///
1292    /// [`range_min`]: WaveletMatrix::range_min
1293    #[must_use]
1294    pub fn range_min_unchecked(&self, range: Range<usize>) -> BitVec {
1295        self.quantile_unchecked(range, 0)
1296    }
1297
1298    /// Get the smallest element in the encoded sequence in the specified `range`.
1299    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1300    /// The smallest element is returned as a `BitVec`,
1301    ///
1302    /// Returns `None` if the `range` is out of bounds or if the range is empty.
1303    ///
1304    /// # Example
1305    /// ```
1306    /// use vers_vecs::{BitVec, WaveletMatrix};
1307    ///
1308    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1309    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1310    ///
1311    /// assert_eq!(wavelet_matrix.range_min(0..3), Some(BitVec::pack_sequence_u8(&[1], 3)));
1312    /// assert_eq!(wavelet_matrix.range_min(1..4), Some(BitVec::pack_sequence_u8(&[1], 3)));
1313    /// assert_eq!(wavelet_matrix.range_min(1..3), Some(BitVec::pack_sequence_u8(&[4], 3)));
1314    /// ```
1315    #[must_use]
1316    pub fn range_min(&self, range: Range<usize>) -> Option<BitVec> {
1317        self.quantile(range, 0)
1318    }
1319
1320    /// Get the smallest element in the encoded sequence in the specified `range`.
1321    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1322    /// The smallest element is returned as a `u64` numeral.
1323    /// If the number of bits per element exceeds 64, the value is truncated.
1324    ///
1325    /// This method does not perform bounds checking.
1326    /// Use [`range_min_u64`] for a checked version.
1327    ///
1328    /// # Panics
1329    /// May panic if the `range` is out of bounds or if the range is empty.
1330    /// May instead return 0.
1331    ///
1332    /// [`range_min_u64`]: WaveletMatrix::range_min_u64
1333    #[must_use]
1334    pub fn range_min_u64_unchecked(&self, range: Range<usize>) -> u64 {
1335        self.quantile_u64_unchecked(range, 0)
1336    }
1337
1338    /// Get the smallest element in the encoded sequence in the specified `range`.
1339    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1340    /// The smallest element is returned as a `u64` numeral.
1341    ///
1342    /// Returns `None` if the `range` is out of bounds, if the range is empty, or if the number of bits
1343    /// per element exceeds 64.
1344    ///
1345    /// # Example
1346    /// ```
1347    /// use vers_vecs::{BitVec, WaveletMatrix};
1348    ///
1349    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1350    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1351    ///
1352    /// assert_eq!(wavelet_matrix.range_min_u64(0..3), Some(1));
1353    /// assert_eq!(wavelet_matrix.range_min_u64(1..4), Some(1));
1354    /// assert_eq!(wavelet_matrix.range_min_u64(1..3), Some(4));
1355    /// ```
1356    #[must_use]
1357    pub fn range_min_u64(&self, range: Range<usize>) -> Option<u64> {
1358        self.quantile_u64(range, 0)
1359    }
1360
1361    /// Get the largest element in the encoded sequence in the specified `range`.
1362    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1363    /// The largest element is returned as a `BitVec`,
1364    /// where the least significant bit is the first element.
1365    ///
1366    /// This method does not perform bounds checking.
1367    /// Use [`range_max`] for a checked version.
1368    ///
1369    /// # Panics
1370    /// May panic if the `range` is out of bounds or if the range is empty.
1371    /// May instead return an empty bit vector.
1372    ///
1373    /// [`range_max`]: WaveletMatrix::range_max
1374    #[must_use]
1375    pub fn range_max_unchecked(&self, range: Range<usize>) -> BitVec {
1376        let k = range.end - range.start - 1;
1377        self.quantile_unchecked(range, k)
1378    }
1379
1380    /// Get the largest element in the encoded sequence in the specified `range`.
1381    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1382    /// The largest element is returned as a `BitVec`,
1383    /// where the least significant bit is the first element.
1384    ///
1385    /// Returns `None` if the `range` is out of bounds or if the range is empty.
1386    ///
1387    /// # Example
1388    /// ```
1389    /// use vers_vecs::{BitVec, WaveletMatrix};
1390    ///
1391    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1392    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1393    ///
1394    /// assert_eq!(wavelet_matrix.range_max(0..3), Some(BitVec::pack_sequence_u8(&[4], 3)));
1395    /// assert_eq!(wavelet_matrix.range_max(3..6), Some(BitVec::pack_sequence_u8(&[7], 3)));
1396    /// ```
1397    #[must_use]
1398    pub fn range_max(&self, range: Range<usize>) -> Option<BitVec> {
1399        if range.is_empty() {
1400            None
1401        } else {
1402            let k = range.end - range.start - 1;
1403            self.quantile(range, k)
1404        }
1405    }
1406
1407    /// Get the largest element in the encoded sequence in the specified `range`.
1408    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1409    /// The largest element is returned as a `u64` numeral.
1410    /// If the number of bits per element exceeds 64, the value is truncated.
1411    ///
1412    /// This method does not perform bounds checking.
1413    /// Use [`range_max_u64`] for a checked version.
1414    ///
1415    /// # Panics
1416    /// May panic if the `range` is out of bounds or if the range is empty.
1417    /// May instead return 0.
1418    ///
1419    /// [`range_max_u64`]: WaveletMatrix::range_max_u64
1420    #[must_use]
1421    pub fn range_max_u64_unchecked(&self, range: Range<usize>) -> u64 {
1422        let k = range.end - range.start - 1;
1423        self.quantile_u64_unchecked(range, k)
1424    }
1425
1426    /// Get the largest element in the encoded sequence in the specified `range`.
1427    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1428    /// The largest element is returned as a `u64` numeral.
1429    ///
1430    /// Returns `None` if the `range` is out of bounds, if the range is empty, or if the number of bits
1431    /// per element exceeds 64.
1432    ///
1433    /// # Example
1434    /// ```
1435    /// use vers_vecs::{BitVec, WaveletMatrix};
1436    ///
1437    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1438    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1439    ///
1440    /// assert_eq!(wavelet_matrix.range_max_u64(0..3), Some(4));
1441    /// assert_eq!(wavelet_matrix.range_max_u64(3..6), Some(7));
1442    /// ```
1443    #[must_use]
1444    pub fn range_max_u64(&self, range: Range<usize>) -> Option<u64> {
1445        if range.is_empty() {
1446            None
1447        } else {
1448            let k = range.end - range.start - 1;
1449            self.quantile_u64(range, k)
1450        }
1451    }
1452
1453    /// Get the median element in the encoded sequence in the specified `range`.
1454    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1455    /// The median element is returned as a `BitVec`,
1456    /// where the least significant bit is the first element.
1457    ///
1458    /// If the range does not contain an odd number of elements, the position is rounded down.
1459    ///
1460    /// This method does not perform bounds checking.
1461    /// Use [`range_median`] for a checked version.
1462    ///
1463    /// # Panics
1464    /// May panic if the `range` is out of bounds or if the range is empty.
1465    /// May instead return an empty bit vector.
1466    ///
1467    /// [`range_median`]: WaveletMatrix::range_median
1468    #[must_use]
1469    pub fn range_median_unchecked(&self, range: Range<usize>) -> BitVec {
1470        let k = (range.end - 1 - range.start) / 2;
1471        self.quantile_unchecked(range, k)
1472    }
1473
1474    /// Get the median element in the encoded sequence in the specified `range`.
1475    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1476    /// The median element is returned as a `BitVec`,
1477    /// where the least significant bit is the first element.
1478    ///
1479    /// If the range does not contain an odd number of elements, the position is rounded down.
1480    ///
1481    /// Returns `None` if the `range` is out of bounds or if the range is empty.
1482    ///
1483    /// # Example
1484    /// ```
1485    /// use vers_vecs::{BitVec, WaveletMatrix};
1486    ///
1487    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1488    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1489    ///
1490    /// assert_eq!(wavelet_matrix.range_median(0..3), Some(BitVec::pack_sequence_u8(&[4], 3)));
1491    /// assert_eq!(wavelet_matrix.range_median(1..4), Some(BitVec::pack_sequence_u8(&[4], 3)));
1492    /// assert_eq!(wavelet_matrix.range_median(0..6), Some(BitVec::pack_sequence_u8(&[2], 3)));
1493    /// ```
1494    #[must_use]
1495    pub fn range_median(&self, range: Range<usize>) -> Option<BitVec> {
1496        if range.is_empty() {
1497            None
1498        } else {
1499            let k = (range.end - 1 - range.start) / 2;
1500            self.quantile(range, k)
1501        }
1502    }
1503
1504    /// Get the median element in the encoded sequence in the specified `range`.
1505    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1506    /// The median element is returned as a `u64` numeral.
1507    /// If the number of bits per element exceeds 64, the value is truncated.
1508    ///
1509    /// If the range does not contain an odd number of elements, the position is rounded down.
1510    ///
1511    /// This method does not perform bounds checking.
1512    /// Use [`range_median_u64`] for a checked version.
1513    ///
1514    /// # Panics
1515    /// May panic if the `range` is out of bounds or if the range is empty.
1516    /// May instead return 0.
1517    ///
1518    /// [`range_median_u64`]: WaveletMatrix::range_median_u64
1519    #[must_use]
1520    pub fn range_median_u64_unchecked(&self, range: Range<usize>) -> u64 {
1521        let k = (range.end - 1 - range.start) / 2;
1522        self.quantile_u64_unchecked(range, k)
1523    }
1524
1525    /// Get the median element in the encoded sequence in the specified `range`.
1526    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1527    /// The median element is returned as a `u64` numeral.
1528    ///
1529    /// If the range does not contain an odd number of elements, the position is rounded down.
1530    ///
1531    /// Returns `None` if the `range` is out of bounds, if the range is empty, or if the number of bits
1532    /// per element exceeds 64.
1533    ///
1534    /// # Example
1535    /// ```
1536    /// use vers_vecs::{BitVec, WaveletMatrix};
1537    ///
1538    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1539    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1540    ///
1541    /// assert_eq!(wavelet_matrix.range_median_u64(0..3), Some(4));
1542    /// assert_eq!(wavelet_matrix.range_median_u64(1..4), Some(4));
1543    /// assert_eq!(wavelet_matrix.range_median_u64(0..6), Some(2));
1544    /// ```
1545    #[must_use]
1546    pub fn range_median_u64(&self, range: Range<usize>) -> Option<u64> {
1547        if range.is_empty() || self.bits_per_element() > 64 || range.end > self.len() {
1548            None
1549        } else {
1550            let k = (range.end - 1 - range.start) / 2;
1551            self.quantile_u64(range, k)
1552        }
1553    }
1554
1555    /// Get the predecessor of the given `symbol` in the given `range`.
1556    /// This is a private generic helper function to implement the public `predecessor` functions.
1557    ///
1558    /// The read and write access to the query and result values are abstracted by the `Reader` and `Writer` closures.
1559    #[inline(always)] // even though the function is pretty large, inlining probably gets rid of the closure calls in favor of static calls
1560    fn predecessor_generic_unchecked<
1561        T: Clone,
1562        Reader: Fn(usize, &T) -> u64,
1563        Writer: Fn(u64, usize, &mut T),
1564        Quantile: Fn(&Self, Range<usize>, usize, usize, T) -> T,
1565    >(
1566        &self,
1567        mut range: Range<usize>,
1568        symbol: &T,
1569        mut result_value: T,
1570        bit_reader: Reader,
1571        result_writer: Writer,
1572        quantile_search: Quantile,
1573    ) -> Option<T> {
1574        // the bit-prefix at the last node where we could go to an interval with smaller elements,
1575        // i.e. where we need to go if the current prefix has no elements smaller than the query
1576        let mut last_smaller_prefix = result_value.clone();
1577        // the level of the last node where we could go to an interval with smaller elements
1578        let mut last_one_level: Option<usize> = None;
1579        // the range of the last node where we could go to an interval with smaller elements
1580        let mut next_smaller_range: Option<Range<usize>> = None;
1581
1582        for (level, data) in self.data.iter().enumerate() {
1583            let query_bit = bit_reader(level, symbol);
1584
1585            // read the amount of elements with the current result-prefix plus one 0 bit.
1586            // if the query_bit is 1, we can calculate the amount of elements with the result-prefix
1587            // plus one 1 from there
1588            let zeros_start = data.rank0(range.start);
1589            let zeros_end = data.rank0(range.end);
1590            let elements_zero = zeros_end - zeros_start;
1591
1592            if query_bit == 0 {
1593                if elements_zero == 0 {
1594                    // if our query bit is zero in this level and suddenly our new interval is empty,
1595                    // all elements that were in the previous interval are bigger than the query element,
1596                    // because they all have a 1 in the current level.
1597                    // this means the predecessor is the largest element in the last smaller interval,
1598                    // i.e. the interval that has a 0 bit at the last level where our prefix had a 1 bit.
1599
1600                    return next_smaller_range.map(|r| {
1601                        let idx = r.end - r.start - 1;
1602                        result_writer(0, last_one_level.unwrap(), &mut last_smaller_prefix);
1603                        quantile_search(
1604                            self,
1605                            r,
1606                            idx,
1607                            last_one_level.unwrap() + 1,
1608                            last_smaller_prefix,
1609                        )
1610                    });
1611                }
1612
1613                // update the prefix
1614                result_writer(0, level, &mut result_value);
1615
1616                // update the range to the interval of the new prefix
1617                range.start = zeros_start;
1618                range.end = zeros_end;
1619            } else {
1620                if elements_zero == range.end - range.start {
1621                    // if our query element is 1 in this level and suddenly our new interval is empty,
1622                    // all elements that were in the previous interval are smaller than the query element,
1623                    // because they all have a 0 in the current level.
1624                    // this means the predecessor is the largest element in the last level's interval.
1625
1626                    let idx = range.end - range.start - 1;
1627                    return Some(quantile_search(self, range, idx, level, result_value));
1628                }
1629
1630                // if the other interval is not empty, we update the last smaller interval to the last interval where we can switch to a 0 prefix
1631                if !(zeros_start..zeros_end).is_empty() {
1632                    last_one_level = Some(level);
1633                    next_smaller_range = Some(zeros_start..zeros_end);
1634                    last_smaller_prefix = result_value.clone();
1635                }
1636
1637                // update the prefix
1638                result_writer(1, level, &mut result_value);
1639
1640                // update the range to the interval of the new prefix
1641                range.start = data.rank0 + (range.start - zeros_start);
1642                range.end = data.rank0 + (range.end - zeros_end);
1643            }
1644        }
1645
1646        Some(result_value)
1647    }
1648
1649    /// Get the predecessor of the given `symbol` in the given `range`.
1650    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
1651    /// where the least significant bit is the first element, and `k` is the number of bits per element
1652    /// in the wavelet matrix.
1653    /// The `symbol` does not have to be in the encoded sequence.
1654    /// The predecessor is the largest element in the `range` that is smaller than or equal
1655    /// to the `symbol`.
1656    ///
1657    /// Returns `None` if the number of bits in the `symbol` is not equal to `k`,
1658    /// if the range is empty, if the wavelet matrix is empty, if the range is out of bounds,
1659    /// or if the `symbol` is smaller than all elements in the range.
1660    ///
1661    /// # Example
1662    /// ```
1663    /// use vers_vecs::{BitVec, WaveletMatrix};
1664    ///
1665    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1666    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1667    ///
1668    /// assert_eq!(wavelet_matrix.predecessor(0..3, &BitVec::pack_sequence_u8(&[7], 3)), Some(BitVec::pack_sequence_u8(&[4], 3)));
1669    /// assert_eq!(wavelet_matrix.predecessor(0..3, &BitVec::pack_sequence_u8(&[4], 3)), Some(BitVec::pack_sequence_u8(&[4], 3)));
1670    /// assert_eq!(wavelet_matrix.predecessor(0..6, &BitVec::pack_sequence_u8(&[7], 3)), Some(BitVec::pack_sequence_u8(&[7], 3)));
1671    /// ```
1672    ///
1673    /// [`BitVec`]: BitVec
1674    #[must_use]
1675    pub fn predecessor(&self, range: Range<usize>, symbol: &BitVec) -> Option<BitVec> {
1676        if symbol.len() != self.bits_per_element()
1677            || range.is_empty()
1678            || self.is_empty()
1679            || range.end > self.len()
1680        {
1681            return None;
1682        }
1683
1684        self.predecessor_generic_unchecked(
1685            range,
1686            symbol,
1687            BitVec::from_zeros(self.bits_per_element()),
1688            |level, symbol| symbol.get_unchecked((self.bits_per_element() - 1) - level),
1689            |bit, level, result| {
1690                result.set_unchecked((self.bits_per_element() - 1) - level, bit);
1691            },
1692            Self::partial_quantile_search_unchecked,
1693        )
1694    }
1695
1696    /// Get the predecessor of the given `symbol` in the given `range`.
1697    /// The `symbol` is a `k`-bit word encoded in a `u64` numeral,
1698    /// where k is less than or equal to 64.
1699    /// The `symbol` does not have to be in the encoded sequence.
1700    /// The predecessor is the largest element in the `range` that is smaller than or equal
1701    /// to the `symbol`.
1702    ///
1703    /// Returns `None` if the number of bits in the matrix is greater than 64,
1704    /// if the range is empty, if the wavelet matrix is empty, if the range is out of bounds,
1705    /// or if the `symbol` is smaller than all elements in the range.
1706    ///
1707    /// # Example
1708    /// ```
1709    /// use vers_vecs::{BitVec, WaveletMatrix};
1710    ///
1711    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1712    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1713    ///
1714    /// assert_eq!(wavelet_matrix.predecessor_u64(0..3, 7), Some(4));
1715    /// assert_eq!(wavelet_matrix.predecessor_u64(0..3, 4), Some(4));
1716    /// assert_eq!(wavelet_matrix.predecessor_u64(0..6, 7), Some(7));
1717    /// ```
1718    #[must_use]
1719    pub fn predecessor_u64(&self, range: Range<usize>, symbol: u64) -> Option<u64> {
1720        if self.bits_per_element() > 64
1721            || range.is_empty()
1722            || self.is_empty()
1723            || range.end > self.len()
1724        {
1725            return None;
1726        }
1727
1728        self.predecessor_generic_unchecked(
1729            range,
1730            &symbol,
1731            0,
1732            |level, symbol| (symbol >> ((self.bits_per_element() - 1) - level)) & 1,
1733            |bit, _level, result| {
1734                // we ignore the level here, and instead rely on the fact that the bits are set in order.
1735                // we have to do that, because the quantile_search_u64 does the same.
1736                *result <<= 1;
1737                *result |= bit;
1738            },
1739            Self::partial_quantile_search_u64_unchecked,
1740        )
1741    }
1742
1743    #[inline(always)]
1744    fn successor_generic_unchecked<
1745        T: Clone,
1746        Reader: Fn(usize, &T) -> u64,
1747        Writer: Fn(u64, usize, &mut T),
1748        Quantile: Fn(&Self, Range<usize>, usize, usize, T) -> T,
1749    >(
1750        &self,
1751        mut range: Range<usize>,
1752        symbol: &T,
1753        mut result_value: T,
1754        bit_reader: Reader,
1755        result_writer: Writer,
1756        quantile_search: Quantile,
1757    ) -> Option<T> {
1758        // the bit-prefix at the last node where we could go to an interval with larger elements,
1759        // i.e. where we need to go if the current prefix has no elements larger than the query
1760        let mut last_larger_prefix = result_value.clone();
1761        // the level of the last node where we could go to an interval with larger elements
1762        let mut last_zero_level: Option<usize> = None;
1763        // the range of the last node where we could go to an interval with larger elements
1764        let mut next_larger_range: Option<Range<usize>> = None;
1765
1766        for (level, data) in self.data.iter().enumerate() {
1767            let query_bit = bit_reader(level, symbol);
1768
1769            // read the amount of elements with the current result-prefix plus one 0 bit.
1770            // if the query_bit is 1, we can calculate the amount of elements with the result-prefix
1771            // plus one 1 from there
1772            let zeros_start = data.rank0(range.start);
1773            let zeros_end = data.rank0(range.end);
1774            let elements_zero = zeros_end - zeros_start;
1775
1776            if query_bit == 0 {
1777                if elements_zero == 0 {
1778                    // if our query element is 0 in this level and suddenly our new interval is empty,
1779                    // all elements that were in the previous interval are larger than the query element,
1780                    // because they all have a 1 in the current level.
1781                    // this means the successor is the smallest element in the last level's interval.
1782
1783                    return Some(quantile_search(self, range, 0, level, result_value));
1784                }
1785
1786                // if the other interval is not empty, we update the last interval where we can switch to a prefix with a 1
1787                if !(data.rank0 + (range.start - zeros_start)..data.rank0 + (range.end - zeros_end))
1788                    .is_empty()
1789                {
1790                    last_zero_level = Some(level);
1791                    next_larger_range = Some(
1792                        data.rank0 + (range.start - zeros_start)
1793                            ..data.rank0 + (range.end - zeros_end),
1794                    );
1795                    last_larger_prefix = result_value.clone();
1796                }
1797
1798                // update the prefix
1799                result_writer(0, level, &mut result_value);
1800
1801                // update the range to the interval of the new prefix
1802                range.start = zeros_start;
1803                range.end = zeros_end;
1804            } else {
1805                if elements_zero == range.end - range.start {
1806                    // if our query bit is 1 in this level and suddenly our new interval is empty,
1807                    // all elements that were in the previous interval are smaller than the query element,
1808                    // because they all have a 0 in the current level.
1809                    // this means the successor is the smallest element in the last interval with a larger prefix,
1810                    // i.e. the interval that has a 1 bit at the last level where our prefix had a 0 bit.
1811
1812                    return next_larger_range.map(|r| {
1813                        result_writer(1, last_zero_level.unwrap(), &mut last_larger_prefix);
1814                        quantile_search(
1815                            self,
1816                            r,
1817                            0,
1818                            last_zero_level.unwrap() + 1,
1819                            last_larger_prefix,
1820                        )
1821                    });
1822                }
1823
1824                // update the prefix
1825                result_writer(1, level, &mut result_value);
1826
1827                // update the range to the interval of the new prefix
1828                range.start = data.rank0 + (range.start - zeros_start);
1829                range.end = data.rank0 + (range.end - zeros_end);
1830            }
1831        }
1832
1833        Some(result_value)
1834    }
1835
1836    /// Get the successor of the given `symbol` in the given range.
1837    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
1838    /// where the least significant bit is the first element, and `k` is the number of bits per element
1839    /// in the wavelet matrix.
1840    /// The `symbol` does not have to be in the encoded sequence.
1841    /// The successor is the smallest element in the range that is greater than or equal
1842    /// to the `symbol`.
1843    ///
1844    /// Returns `None` if the number of bits in the `symbol` is not equal to `k`,
1845    /// if the range is empty, if the wavelet matrix is empty, if the range is out of bounds,
1846    /// or if the `symbol` is greater than all elements in the range.
1847    ///
1848    /// # Example
1849    /// ```
1850    /// use vers_vecs::{BitVec, WaveletMatrix};
1851    ///
1852    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1853    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1854    ///
1855    /// assert_eq!(wavelet_matrix.successor(0..3, &BitVec::pack_sequence_u8(&[2], 3)), Some(BitVec::pack_sequence_u8(&[4], 3)));
1856    /// assert_eq!(wavelet_matrix.successor(0..3, &BitVec::pack_sequence_u8(&[5], 3)), None);
1857    /// assert_eq!(wavelet_matrix.successor(0..6, &BitVec::pack_sequence_u8(&[2], 3)), Some(BitVec::pack_sequence_u8(&[2], 3)));
1858    /// ```
1859    ///
1860    /// [`BitVec`]: BitVec
1861    #[must_use]
1862    pub fn successor(&self, range: Range<usize>, symbol: &BitVec) -> Option<BitVec> {
1863        if symbol.len() != self.bits_per_element()
1864            || range.is_empty()
1865            || self.is_empty()
1866            || range.end > self.len()
1867        {
1868            return None;
1869        }
1870
1871        self.successor_generic_unchecked(
1872            range,
1873            symbol,
1874            BitVec::from_zeros(self.bits_per_element()),
1875            |level, symbol| symbol.get_unchecked((self.bits_per_element() - 1) - level),
1876            |bit, level, result| {
1877                result.set_unchecked((self.bits_per_element() - 1) - level, bit);
1878            },
1879            Self::partial_quantile_search_unchecked,
1880        )
1881    }
1882
1883    /// Get the successor of the given `symbol` in the given range.
1884    /// The `symbol` is a `k`-bit word encoded in a `u64` numeral,
1885    /// where k is less than or equal to 64.
1886    /// The `symbol` does not have to be in the encoded sequence.
1887    /// The successor is the smallest element in the range that is greater than or equal
1888    /// to the `symbol`.
1889    ///
1890    /// Returns `None` if the number of bits in the matrix is greater than 64,
1891    /// if the range is empty, if the wavelet matrix is empty, if the range is out of bounds,
1892    /// or if the `symbol` is greater than all elements in the range.
1893    ///
1894    /// # Example
1895    /// ```
1896    /// use vers_vecs::{BitVec, WaveletMatrix};
1897    ///
1898    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1899    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1900    ///
1901    /// assert_eq!(wavelet_matrix.successor_u64(0..3, 2), Some(4));
1902    /// assert_eq!(wavelet_matrix.successor_u64(0..3, 5), None);
1903    /// assert_eq!(wavelet_matrix.successor_u64(0..6, 2), Some(2));
1904    /// ```
1905    #[must_use]
1906    pub fn successor_u64(&self, range: Range<usize>, symbol: u64) -> Option<u64> {
1907        if self.bits_per_element() > 64
1908            || range.is_empty()
1909            || self.is_empty()
1910            || range.end > self.len()
1911        {
1912            return None;
1913        }
1914
1915        self.successor_generic_unchecked(
1916            range,
1917            &symbol,
1918            0,
1919            |level, symbol| (symbol >> ((self.bits_per_element() - 1) - level)) & 1,
1920            |bit, _level, result| {
1921                // we ignore the level here, and instead rely on the fact that the bits are set in order.
1922                // we have to do that, because the quantile_search_u64 does the same.
1923                *result <<= 1;
1924                *result |= bit;
1925            },
1926            Self::partial_quantile_search_u64_unchecked,
1927        )
1928    }
1929
1930    /// Get an iterator over the elements of the encoded sequence.
1931    /// The iterator yields `u64` elements.
1932    /// If the number of bits per element exceeds 64, `None` is returned.
1933    ///
1934    /// # Example
1935    /// ```
1936    /// use vers_vecs::{BitVec, WaveletMatrix};
1937    ///
1938    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1939    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1940    ///
1941    /// let mut iter = wavelet_matrix.iter_u64().unwrap();
1942    /// assert_eq!(iter.collect::<Vec<_>>(), vec![1, 4, 4, 1, 2, 7]);
1943    /// ```
1944    #[must_use]
1945    pub fn iter_u64(&self) -> Option<WaveletNumRefIter> {
1946        if self.bits_per_element() > 64 {
1947            None
1948        } else {
1949            Some(WaveletNumRefIter::new(self))
1950        }
1951    }
1952
1953    /// Turn the encoded sequence into an iterator.
1954    /// The iterator yields `u64` elements.
1955    /// If the number of bits per element exceeds 64, `None` is returned.
1956    #[must_use]
1957    pub fn into_iter_u64(self) -> Option<WaveletNumIter> {
1958        if self.bits_per_element() > 64 {
1959            None
1960        } else {
1961            Some(WaveletNumIter::new(self))
1962        }
1963    }
1964
1965    /// Get an iterator over the sorted elements of the encoded sequence.
1966    /// The iterator yields `BitVec` elements.
1967    ///
1968    /// See also [`iter_sorted_u64`] for an iterator that yields `u64` elements.
1969    #[must_use]
1970    pub fn iter_sorted(&self) -> WaveletSortedRefIter {
1971        WaveletSortedRefIter::new(self)
1972    }
1973
1974    /// Turn the encoded sequence into an iterator over the sorted sequence.
1975    /// The iterator yields `BitVec` elements.
1976    #[must_use]
1977    pub fn into_iter_sorted(self) -> WaveletSortedIter {
1978        WaveletSortedIter::new(self)
1979    }
1980
1981    /// Get an iterator over the sorted elements of the encoded sequence.
1982    /// The iterator yields `u64` elements.
1983    /// If the number of bits per element exceeds 64, `None` is returned.
1984    ///
1985    /// # Example
1986    /// ```
1987    /// use vers_vecs::{BitVec, WaveletMatrix};
1988    ///
1989    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1990    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1991    ///
1992    /// let mut iter = wavelet_matrix.iter_sorted_u64().unwrap();
1993    /// assert_eq!(iter.collect::<Vec<_>>(), vec![1, 1, 2, 4, 4, 7]);
1994    /// ```
1995    #[must_use]
1996    pub fn iter_sorted_u64(&self) -> Option<WaveletSortedNumRefIter> {
1997        if self.bits_per_element() > 64 {
1998            None
1999        } else {
2000            Some(WaveletSortedNumRefIter::new(self))
2001        }
2002    }
2003
2004    /// Turn the encoded sequence into an iterator over the sorted sequence.
2005    /// The iterator yields `u64` elements.
2006    /// If the number of bits per element exceeds 64, `None` is returned.
2007    #[must_use]
2008    pub fn into_iter_sorted_u64(self) -> Option<WaveletSortedNumIter> {
2009        if self.bits_per_element() > 64 {
2010            None
2011        } else {
2012            Some(WaveletSortedNumIter::new(self))
2013        }
2014    }
2015
2016    /// Get the number of bits per element in the alphabet of the encoded sequence.
2017    #[must_use]
2018    #[inline(always)]
2019    pub fn bits_per_element(&self) -> usize {
2020        self.data.len()
2021    }
2022
2023    /// Get the number of bits per element in the alphabet of the encoded sequence.
2024    #[must_use]
2025    #[deprecated(since = "1.5.1", note = "please use `bits_per_element` instead")]
2026    #[allow(clippy::cast_possible_truncation)]
2027    pub fn bit_len(&self) -> u16 {
2028        self.bits_per_element() as u16
2029    }
2030
2031    /// Get the number of elements stored in the encoded sequence.
2032    #[must_use]
2033    pub fn len(&self) -> usize {
2034        if self.data.is_empty() {
2035            0
2036        } else {
2037            self.data[0].len()
2038        }
2039    }
2040
2041    /// Check if the wavelet matrix is empty.
2042    #[must_use]
2043    pub fn is_empty(&self) -> bool {
2044        self.len() == 0
2045    }
2046
2047    /// Get the number of bytes allocated on the heap for the wavelet matrix.
2048    /// This does not include memory that is allocated but unused due to allocation policies of
2049    /// internal data structures.
2050    #[must_use]
2051    pub fn heap_size(&self) -> usize {
2052        self.data.iter().map(RsVec::heap_size).sum::<usize>()
2053    }
2054}
2055
2056impl_vector_iterator!(
2057    WaveletMatrix,
2058    WaveletIter,
2059    WaveletRefIter,
2060    get_value_unchecked,
2061    get_value,
2062    BitVec
2063);
2064
2065impl_vector_iterator!(
2066    WaveletMatrix,
2067    WaveletNumIter,
2068    WaveletNumRefIter,
2069    get_u64_unchecked,
2070    get_u64,
2071    u64,
2072    special
2073);
2074
2075impl_vector_iterator!(
2076    WaveletMatrix,
2077    WaveletSortedIter,
2078    WaveletSortedRefIter,
2079    get_sorted_unchecked,
2080    get_sorted,
2081    BitVec,
2082    special
2083);
2084
2085impl_vector_iterator!(
2086    WaveletMatrix,
2087    WaveletSortedNumIter,
2088    WaveletSortedNumRefIter,
2089    get_sorted_u64_unchecked,
2090    get_sorted_u64,
2091    u64,
2092    special
2093);
2094
2095#[cfg(test)]
2096mod tests;