vers_vecs/wavelet/
mod.rs

1//! This module contains the implementation of a [wavelet matrix].
2//! The wavelet matrix is a data structure that encodes a sequence of `n` `k`-bit words
3//! into a matrix of bit vectors, allowing for fast rank and select queries on the encoded sequence.
4//!
5//! This implementation further supports quantile queries, and range-predecessor and -successor queries.
6//! All operations are `O(k)` time complexity.
7//!
8//! See the struct documentation for more information.
9//!
10//! [wavelet matrix]: WaveletMatrix
11
12use crate::util::impl_vector_iterator;
13use crate::{BitVec, RsVec};
14use std::mem;
15use std::ops::Range;
16
17/// A wavelet matrix implementation implemented as described in
18/// [Navarro and Claude, 2021](http://dx.doi.org/10.1007/978-3-642-34109-0_18).
19/// The implementation is designed to allow for extremely large alphabet sizes, without
20/// sacrificing performance for small alphabets.
21///
22/// There are two constructor algorithms available:
23/// - [`from_bit_vec`] and [`from_slice`] construct the wavelet matrix by repeatedly sorting the elements.
24///   These constructors have linear space overhead and run in `O(kn * log n)` time complexity.
25/// - [`from_bit_vec_pc`] and [`from_slice_pc`] construct the wavelet matrix by counting the
26///   prefixes of the elements. These constructors have a space complexity of `O(2^k)` and run
27///   in `O(kn)`, which makes this constructor preferable for large sequences over small alphabets.
28///
29/// They encode a sequence of `n` `k`-bit words into a wavelet matrix which supports constant-time
30/// rank and select queries on elements of its `k`-bit alphabet.
31/// All query functions are mirrored for both `BitVec` and `u64` query elements, so
32/// if `k <= 64`, no heap allocation is needed for the query element.
33///
34/// Other than rank and select queries, the matrix also supports quantile queries (range select i), and
35/// range-predecessor and -successor queries, all of which are loosely based on
36/// [Külekci and Thankachan](https://doi.org/10.1016/j.jda.2017.01.002) with better time complexity.
37///
38/// All operations implemented on the matrix are `O(k)` time complexity.
39/// The space complexity of the wavelet matrix is `O(n * k)` with a small linear overhead
40/// (see [`RsVec`]).
41///
42/// # Examples
43/// ```
44/// use vers_vecs::{BitVec, WaveletMatrix};
45///
46/// // pack elements from a 3-bit alphabet into a bit vector and construct a wavelet matrix from them
47/// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
48/// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
49///
50/// // query the wavelet matrix
51/// assert_eq!(wavelet_matrix.get_u64(0), Some(1));
52/// assert_eq!(wavelet_matrix.get_u64(1), Some(4));
53///
54// // rank and select queries
55/// assert_eq!(wavelet_matrix.rank_u64(3, 4), Some(2));
56/// assert_eq!(wavelet_matrix.rank_u64(3, 1), Some(1));
57/// assert_eq!(wavelet_matrix.select_u64(0, 7), Some(5));
58///
59/// // statistics
60/// assert_eq!(wavelet_matrix.range_median_u64(0..3), Some(4));
61/// assert_eq!(wavelet_matrix.predecessor_u64(0..6, 3), Some(2));
62/// ```
63///
64/// [`RsVec`]: RsVec
65/// [`from_bit_vec`]: WaveletMatrix::from_bit_vec
66/// [`from_slice`]: WaveletMatrix::from_slice
67/// [`from_bit_vec_pc`]: WaveletMatrix::from_bit_vec_pc
68/// [`from_slice_pc`]: WaveletMatrix::from_slice_pc
69#[derive(Clone, Debug)]
70#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
71pub struct WaveletMatrix {
72    data: Box<[RsVec]>,
73}
74
75impl WaveletMatrix {
76    /// Generic constructor that constructs the wavelet matrix by repeatedly sorting the elements.
77    /// The runtime complexity is `O(kn * log n)`.
78    ///
79    /// # Parameters
80    /// - `bits_per_element`: The number of bits in each word. Cannot exceed 1 << 16.
81    /// - `num_elements`: The number of elements in the sequence.
82    /// - `bit_lookup`: A closure that returns the `bit`-th bit of the `element`-th word.
83    #[inline(always)] // should get rid of closures in favor of static calls
84    fn permutation_sorting<LOOKUP: Fn(usize, usize) -> u64>(
85        bits_per_element: u16,
86        num_elements: usize,
87        bit_lookup: LOOKUP,
88    ) -> Self {
89        let element_len = bits_per_element as usize;
90
91        let mut data = vec![BitVec::from_zeros(num_elements); element_len];
92
93        // insert the first bit of each word into the first bit vector
94        // for each following level, insert the next bit of each word into the next bit vector
95        // sorted stably by the previous bit vector
96        let mut permutation = (0..num_elements).collect::<Vec<_>>();
97        let mut next_permutation = vec![0; num_elements];
98
99        for (level, data) in data.iter_mut().enumerate() {
100            let mut total_zeros = 0;
101            for (i, p) in permutation.iter().enumerate() {
102                if bit_lookup(*p, element_len - level - 1) == 0 {
103                    total_zeros += 1;
104                } else {
105                    data.set(i, 1).unwrap();
106                }
107            }
108
109            // scan through the generated bit array and move the elements to the correct position
110            // for the next permutation
111            if level < element_len - 1 {
112                let mut zero_boundary = 0;
113                let mut one_boundary = total_zeros;
114                for (i, p) in permutation.iter().enumerate() {
115                    if data.get_unchecked(i) == 0 {
116                        next_permutation[zero_boundary] = *p;
117                        zero_boundary += 1;
118                    } else {
119                        next_permutation[one_boundary] = *p;
120                        one_boundary += 1;
121                    }
122                }
123
124                mem::swap(&mut permutation, &mut next_permutation);
125            }
126        }
127
128        Self {
129            data: data.into_iter().map(BitVec::into).collect(),
130        }
131    }
132
133    /// Create a new wavelet matrix from a sequence of `n` `k`-bit words.
134    /// The constructor runs in `O(kn * log n)` time complexity.
135    ///
136    /// # Parameters
137    /// - `bit_vec`: A packed sequence of `n` `k`-bit words. The `i`-th word begins in the
138    ///   `bits_per_element * i`-th bit of the bit vector. Words are stored from least to most
139    ///   significant bit.
140    /// - `bits_per_element`: The number `k` of bits in each word. Cannot exceed 1 << 16.
141    ///
142    /// # Panics
143    /// Panics if the number of bits in the bit vector is not a multiple of the number of bits per element.
144    #[must_use]
145    pub fn from_bit_vec(bit_vec: &BitVec, bits_per_element: u16) -> Self {
146        assert_eq!(bit_vec.len() % bits_per_element as usize, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element.");
147        let num_elements = bit_vec.len() / bits_per_element as usize;
148        Self::permutation_sorting(bits_per_element, num_elements, |element, bit| {
149            bit_vec.get_unchecked(element * bits_per_element as usize + bit)
150        })
151    }
152
153    /// Create a new wavelet matrix from a sequence of `n` `k`-bit words.
154    /// The constructor runs in `O(kn * log n)` time complexity.
155    ///
156    /// # Parameters
157    /// - `sequence`: A slice of `n` u64 values, each encoding a `k`-bit word.
158    /// - `bits_per_element`: The number `k` of bits in each word. Cannot exceed 64.
159    ///
160    /// # Panics
161    /// Panics if the number of bits per element exceeds 64.
162    #[must_use]
163    pub fn from_slice(sequence: &[u64], bits_per_element: u16) -> Self {
164        assert!(
165            bits_per_element <= 64,
166            "The number of bits per element cannot exceed 64."
167        );
168        Self::permutation_sorting(bits_per_element, sequence.len(), |element, bit| {
169            (sequence[element] >> bit) & 1
170        })
171    }
172
173    /// Generic constructor that constructs the wavelet matrix by counting the prefixes of the elements.
174    /// The runtime complexity is `O(kn)`.
175    /// This constructor is only recommended for small alphabets.
176    ///
177    /// # Parameters
178    /// - `bits_per_element`: The number of bits in each word. Cannot exceed 64.
179    /// - `num_elements`: The number of elements in the sequence.
180    /// - `bit_lookup`: A closure that returns the `bit`-th bit of the `element`-th word.
181    /// - `element_lookup`: A closure that returns the `element`-th word.
182    #[inline(always)] // should get rid of closures in favor of static calls
183    fn prefix_counting<LOOKUP: Fn(usize, usize) -> u64, ELEMENT: Fn(usize) -> u64>(
184        bits_per_element: u16,
185        num_elements: usize,
186        bit_lookup: LOOKUP,
187        element_lookup: ELEMENT,
188    ) -> Self {
189        let element_len = bits_per_element as usize;
190        let mut histogram = vec![0usize; 1 << bits_per_element];
191        let mut borders = vec![0usize; 1 << bits_per_element];
192        let mut data = vec![BitVec::from_zeros(num_elements); element_len];
193
194        for i in 0..num_elements {
195            histogram[element_lookup(i) as usize] += 1;
196            data[0].set_unchecked(i, bit_lookup(i, element_len - 1));
197        }
198
199        for level in (1..element_len).rev() {
200            // combine histograms of prefixes
201            for h in 0..1 << level {
202                histogram[h] = histogram[2 * h] + histogram[2 * h + 1];
203            }
204
205            // compute borders of current level, using bit reverse patterns because of the weird
206            // node ordering in wavelet matrices
207            borders[0] = 0;
208            for h in 1usize..1 << level {
209                let h_minus_1 = (h - 1).reverse_bits() >> (64 - level);
210                borders[h.reverse_bits() >> (64 - level)] =
211                    borders[h_minus_1] + histogram[h_minus_1];
212            }
213
214            for i in 0..num_elements {
215                let bit = bit_lookup(i, element_len - level - 1);
216                data[level].set_unchecked(
217                    borders[element_lookup(i) as usize >> (element_len - level)],
218                    bit,
219                );
220                borders[element_lookup(i) as usize >> (element_len - level)] += 1;
221            }
222        }
223
224        Self {
225            data: data.into_iter().map(BitVec::into).collect(),
226        }
227    }
228
229    /// Create a new wavelet matrix from a sequence of `n` `k`-bit words using the prefix counting
230    /// algorithm [Dinklage et al.](https://doi.org/10.1145/3457197)
231    /// The constructor runs in `O(kn)` time complexity but requires `O(2^k)` space during construction,
232    /// so it is only recommended for small alphabets.
233    /// Use the [`from_bit_vec`] or [`from_slice`] constructors for larger alphabets.
234    ///
235    /// # Parameters
236    /// - `bit_vec`: A packed sequence of `n` `k`-bit words. The `i`-th word begins in the
237    ///   `bits_per_element * i`-th bit of the bit vector. Words are stored from least to most
238    ///   significant bit.
239    /// - `bits_per_element`: The number `k` of bits in each word. Cannot exceed 1 << 16.
240    ///
241    /// # Panics
242    /// Panics if the number of bits in the bit vector is not a multiple of the number of bits per element,
243    /// or if the number of bits per element exceeds 64.
244    ///
245    /// [`from_bit_vec`]: WaveletMatrix::from_bit_vec
246    /// [`from_slice`]: WaveletMatrix::from_slice
247    #[must_use]
248    pub fn from_bit_vec_pc(bit_vec: &BitVec, bits_per_element: u16) -> Self {
249        assert_eq!(bit_vec.len() % bits_per_element as usize, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element.");
250        assert!(
251            bits_per_element <= 64,
252            "The number of bits per element cannot exceed 64."
253        );
254        let num_elements = bit_vec.len() / bits_per_element as usize;
255        Self::prefix_counting(
256            bits_per_element,
257            num_elements,
258            |element, bit| bit_vec.get_unchecked(element * bits_per_element as usize + bit),
259            |element| {
260                bit_vec.get_bits_unchecked(
261                    element * bits_per_element as usize,
262                    bits_per_element as usize,
263                )
264            },
265        )
266    }
267
268    /// Create a new wavelet matrix from a sequence of `n` `k`-bit words using the prefix counting
269    /// algorithm [Dinklage et al.](https://doi.org/10.1145/3457197)
270    /// The constructor runs in `O(kn)` time complexity but requires `O(2^k)` space during construction,
271    /// so it is only recommended for small alphabets.
272    /// Use the [`from_bit_vec`] or [`from_slice`] constructors for larger alphabets.
273    ///
274    /// # Parameters
275    /// - `sequence`: A slice of `n` u64 values, each encoding a `k`-bit word.
276    /// - `bits_per_element`: The number `k` of bits in each word. Cannot exceed 64.
277    ///
278    /// # Panics
279    /// Panics if the number of bits per element exceeds 64.
280    ///
281    /// [`from_bit_vec`]: WaveletMatrix::from_bit_vec
282    /// [`from_slice`]: WaveletMatrix::from_slice
283    #[must_use]
284    pub fn from_slice_pc(sequence: &[u64], bits_per_element: u16) -> Self {
285        assert!(
286            bits_per_element <= 64,
287            "The number of bits per element cannot exceed 64."
288        );
289        Self::prefix_counting(
290            bits_per_element,
291            sequence.len(),
292            |element, bit| (sequence[element] >> bit) & 1,
293            |element| sequence[element],
294        )
295    }
296
297    /// Generic function to read a value from the wavelet matrix and consume it with a closure.
298    /// The function is used by the `get_value` and `get_u64` functions, deduplicating code.
299    #[inline(always)]
300    fn reconstruct_value_unchecked<F: FnMut(u64)>(&self, mut i: usize, mut target_func: F) {
301        for level in 0..self.bits_per_element() {
302            let bit = self.data[level].get_unchecked(i);
303            target_func(bit);
304            if bit == 0 {
305                i = self.data[level].rank0(i);
306            } else {
307                i = self.data[level].rank0 + self.data[level].rank1(i);
308            }
309        }
310    }
311
312    /// Get the `i`-th element of the encoded sequence in a `k`-bit word.
313    /// The `k`-bit word is returned as a `BitVec`.
314    /// The first element of the bit vector is the least significant bit.
315    ///
316    /// Returns `None` if the index is out of bounds.
317    ///
318    /// # Example
319    /// ```
320    /// use vers_vecs::{BitVec, WaveletMatrix};
321    ///
322    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
323    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
324    ///
325    /// assert_eq!(wavelet_matrix.get_value(0), Some(BitVec::pack_sequence_u8(&[1], 3)));
326    /// assert_eq!(wavelet_matrix.get_value(1), Some(BitVec::pack_sequence_u8(&[4], 3)));
327    /// assert_eq!(wavelet_matrix.get_value(100), None);
328    /// ```
329    #[must_use]
330    pub fn get_value(&self, i: usize) -> Option<BitVec> {
331        if self.data.is_empty() || i >= self.data[0].len() {
332            None
333        } else {
334            Some(self.get_value_unchecked(i))
335        }
336    }
337
338    /// Get the `i`-th element of the encoded sequence in a `k`-bit word.
339    /// The `k`-bit word is returned as a `BitVec`.
340    /// The first element of the bit vector is the least significant bit.
341    /// This function does not perform bounds checking.
342    /// Use [`get_value`] for a checked version.
343    ///
344    /// # Panics
345    /// May panic if the index is out of bounds. May instead return an empty bit vector.
346    ///
347    /// [`get_value`]: WaveletMatrix::get_value
348    #[must_use]
349    pub fn get_value_unchecked(&self, i: usize) -> BitVec {
350        let mut value = BitVec::from_zeros(self.bits_per_element());
351        let mut level = self.bits_per_element() - 1;
352        self.reconstruct_value_unchecked(i, |bit| {
353            value.set_unchecked(level, bit);
354            level = level.saturating_sub(1);
355        });
356        value
357    }
358
359    /// Get the `i`-th element of the encoded sequence as a `u64`.
360    /// The `u64` is constructed from the `k`-bit word stored in the wavelet matrix.
361    ///
362    /// Returns `None` if the index is out of bounds, or if the number of bits per element exceeds 64.
363    ///
364    /// # Example
365    /// ```
366    /// use vers_vecs::{BitVec, WaveletMatrix};
367    ///
368    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
369    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
370    ///
371    /// assert_eq!(wavelet_matrix.get_u64(0), Some(1));
372    /// assert_eq!(wavelet_matrix.get_u64(1), Some(4));
373    /// assert_eq!(wavelet_matrix.get_u64(100), None);
374    /// ```
375    #[must_use]
376    pub fn get_u64(&self, i: usize) -> Option<u64> {
377        if self.bits_per_element() > 64 || self.data.is_empty() || i >= self.data[0].len() {
378            None
379        } else {
380            Some(self.get_u64_unchecked(i))
381        }
382    }
383
384    /// Get the `i`-th element of the encoded sequence as a `u64` numeral.
385    /// The element is encoded in the lowest `k` bits of the numeral.
386    /// If the number of bits per element exceeds 64, the value is truncated.
387    /// This function does not perform bounds checking.
388    /// Use [`get_u64`] for a checked version.
389    ///
390    /// # Panic
391    /// May panic if the value of `i` is out of bounds. May instead return 0.
392    ///
393    /// [`get_u64`]: WaveletMatrix::get_u64
394    #[must_use]
395    pub fn get_u64_unchecked(&self, i: usize) -> u64 {
396        let mut value = 0;
397        self.reconstruct_value_unchecked(i, |bit| {
398            value <<= 1;
399            value |= bit;
400        });
401        value
402    }
403
404    /// Get the number of occurrences of the given `symbol` in the encoded sequence in the `range`.
405    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
406    /// where the least significant bit is the first element, and `k` is the number of bits per element
407    /// in the wavelet matrix.
408    ///
409    /// This method does not perform bounds checking, nor does it check if the `symbol` is a valid
410    /// `k`-bit word.
411    /// Use [`rank_range`] for a checked version.
412    ///
413    /// # Panics
414    /// May panic if the `range` is out of bounds,
415    /// or if the number of bits in `symbol` is lower than `k`.
416    /// May instead return 0.
417    ///
418    /// [`BitVec`]: BitVec
419    /// [`rank_range`]: WaveletMatrix::rank_range
420    #[must_use]
421    pub fn rank_range_unchecked(&self, mut range: Range<usize>, symbol: &BitVec) -> usize {
422        for (level, data) in self.data.iter().enumerate() {
423            if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 {
424                range.start = data.rank0(range.start);
425                range.end = data.rank0(range.end);
426            } else {
427                range.start = data.rank0 + data.rank1(range.start);
428                range.end = data.rank0 + data.rank1(range.end);
429            }
430        }
431
432        range.end - range.start
433    }
434
435    /// Get the number of occurrences of the given `symbol` in the encoded sequence in the `range`.
436    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
437    /// where the least significant bit is the first element, and `k` is the number of bits per element
438    /// in the wavelet matrix.
439    ///
440    /// Returns `None` if the `range` is out of bounds (greater than the length of the encoded sequence,
441    /// but since it is exclusive, it may be equal to the length),
442    /// or if the number of bits in `symbol` is not equal to `k`.
443    ///
444    /// # Example
445    /// ```
446    /// use vers_vecs::{BitVec, WaveletMatrix};
447    ///
448    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
449    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
450    ///
451    /// assert_eq!(wavelet_matrix.rank_range(0..3, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
452    /// assert_eq!(wavelet_matrix.rank_range(2..4, &BitVec::pack_sequence_u8(&[4], 3)), Some(1));
453    /// ```
454    ///
455    /// [`BitVec`]: BitVec
456    #[must_use]
457    pub fn rank_range(&self, range: Range<usize>, symbol: &BitVec) -> Option<usize> {
458        if range.start >= self.len()
459            || range.end > self.len()
460            || symbol.len() != self.bits_per_element()
461        {
462            None
463        } else {
464            Some(self.rank_range_unchecked(range, symbol))
465        }
466    }
467
468    /// Get the number of occurrences of the given `symbol` in the encoded sequence in the `range`.
469    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
470    /// where k is less than or equal to 64.
471    /// The interval is half-open, meaning `rank_range_u64(0..0, symbol)` returns 0.
472    ///
473    /// This method does not perform bounds checking, nor does it check if the elements of the
474    /// wavelet matrix can be represented in a u64 numeral.
475    /// Use [`rank_range_u64`] for a checked version.
476    ///
477    /// # Panics
478    /// May panic if the `range` is out of bounds.
479    /// May instead return 0.
480    /// If the number of bits in wavelet matrix elements exceed `64`, the behavior is
481    /// platform-dependent.
482    ///
483    /// [`rank_range_u64`]: WaveletMatrix::rank_range_u64
484    #[must_use]
485    pub fn rank_range_u64_unchecked(&self, mut range: Range<usize>, symbol: u64) -> usize {
486        for (level, data) in self.data.iter().enumerate() {
487            if (symbol >> ((self.bits_per_element() - 1) - level)) & 1 == 0 {
488                range.start = data.rank0(range.start);
489                range.end = data.rank0(range.end);
490            } else {
491                range.start = data.rank0 + data.rank1(range.start);
492                range.end = data.rank0 + data.rank1(range.end);
493            }
494        }
495
496        range.end - range.start
497    }
498
499    /// Get the number of occurrences of the given `symbol` in the encoded sequence in the `range`.
500    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
501    /// where k is less than or equal to 64.
502    /// The interval is half-open, meaning `rank_range_u64(0..0, symbol)` returns 0.
503    ///
504    /// Returns `None` if the `range` is out of bounds (greater than the length of the encoded sequence,
505    /// but since it is exclusive, it may be equal to the length),
506    /// or if the number of bits in the wavelet matrix elements exceed `64`.
507    ///
508    /// # Example
509    /// ```
510    /// use vers_vecs::{BitVec, WaveletMatrix};
511    ///
512    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
513    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
514    ///
515    /// assert_eq!(wavelet_matrix.rank_range_u64(0..3, 4), Some(2));
516    /// assert_eq!(wavelet_matrix.rank_range_u64(2..4, 4), Some(1));
517    /// ```
518    #[must_use]
519    pub fn rank_range_u64(&self, range: Range<usize>, symbol: u64) -> Option<usize> {
520        if range.start >= self.len() || range.end > self.len() || self.bits_per_element() > 64 {
521            None
522        } else {
523            Some(self.rank_range_u64_unchecked(range, symbol))
524        }
525    }
526
527    /// Get the number of occurrences of the given `symbol` in the encoded sequence between the
528    /// `offset`-th and `i`-th element (exclusive).
529    /// This is equivalent to ```rank_range_unchecked(offset..i, symbol)```.
530    /// The interval is half-open, meaning ```rank_offset_unchecked(0, 0, symbol)``` returns 0.
531    ///
532    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
533    /// where the least significant bit is the first element, and `k` is the number of bits per element
534    /// in the wavelet matrix.
535    ///
536    /// This method does not perform bounds checking, nor does it check if the `symbol` is a valid
537    /// `k`-bit word.
538    /// Use [`rank_offset`] for a checked version.
539    ///
540    /// # Panics
541    /// May panic if `offset` is out of bounds,
542    /// or if `offset + i` is larger than the length of the encoded sequence,
543    /// or if `offset` is greater than `i`,
544    /// or if the number of bits in `symbol` is lower than `k`.
545    /// May instead return 0.
546    ///
547    /// [`BitVec`]: BitVec
548    /// [`rank_offset`]: WaveletMatrix::rank_offset
549    #[must_use]
550    pub fn rank_offset_unchecked(&self, offset: usize, i: usize, symbol: &BitVec) -> usize {
551        self.rank_range_unchecked(offset..i, symbol)
552    }
553
554    /// Get the number of occurrences of the given `symbol` in the encoded sequence between the
555    /// `offset`-th and `i`-th element (exclusive).
556    /// This is equivalent to ``rank_range(offset..i, symbol)``.
557    /// The interval is half-open, meaning ``rank_offset(0, 0, symbol)`` returns 0,
558    /// because the interval is empty.
559    ///
560    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
561    /// where the least significant bit is the first element, and `k` is the number of bits per element
562    /// in the wavelet matrix.
563    ///
564    /// Returns `None` if `offset` is out of bounds,
565    /// or if `i` is larger than the length of the encoded sequence,
566    /// or if `offset` is greater than `i`,
567    /// or if the number of bits in `symbol` is not equal to `k`.
568    /// `i` may equal the length of the encoded sequence,
569    /// which will return the number of occurrences of the symbol up to the end of the sequence.
570    ///
571    /// # Example
572    /// ```
573    /// use vers_vecs::{BitVec, WaveletMatrix};
574    ///
575    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
576    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
577    ///
578    /// assert_eq!(wavelet_matrix.rank_offset(0, 3, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
579    /// assert_eq!(wavelet_matrix.rank_offset(2, 4, &BitVec::pack_sequence_u8(&[4], 3)), Some(1));
580    /// ```
581    ///
582    /// [`BitVec`]: BitVec
583    #[must_use]
584    pub fn rank_offset(&self, offset: usize, i: usize, symbol: &BitVec) -> Option<usize> {
585        if offset > i
586            || offset >= self.len()
587            || i > self.len()
588            || symbol.len() != self.bits_per_element()
589        {
590            None
591        } else {
592            Some(self.rank_offset_unchecked(offset, i, symbol))
593        }
594    }
595
596    /// Get the number of occurrences of the given `symbol` in the encoded sequence between the
597    /// `offset`-th and `i`-th element (exclusive).
598    /// This is equivalent to ``rank_range(offset..i, symbol)``.
599    /// The interval is half-open, meaning ``rank_offset(0, 0, symbol)`` returns 0.
600    ///
601    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
602    /// where k is less than or equal to 64.
603    ///
604    /// This method does not perform bounds checking, nor does it check if the elements of the
605    /// wavelet matrix can be represented in a u64 numeral.
606    /// Use [`rank_offset_u64`] for a checked version.
607    ///
608    /// # Panics
609    /// May panic if `offset` is out of bounds,
610    /// or if `i` is larger than the length of the encoded sequence,
611    /// or if `offset` is greater than `i`,
612    /// or if the number of bits in wavelet matrix elements exceed `64`.
613    /// May instead return 0.
614    ///
615    /// [`rank_offset_u64`]: WaveletMatrix::rank_offset_u64
616    #[must_use]
617    pub fn rank_offset_u64_unchecked(&self, offset: usize, i: usize, symbol: u64) -> usize {
618        self.rank_range_u64_unchecked(offset..i, symbol)
619    }
620
621    /// Get the number of occurrences of the given `symbol` in the encoded sequence between the
622    /// `offset`-th and `i`-th element (exclusive).
623    /// This is equivalent to ``rank_range(offset..i, symbol)``.
624    /// The interval is half-open, meaning ``rank_offset(0, 0, symbol)`` returns 0.
625    ///
626    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
627    /// where k is less than or equal to 64.
628    ///
629    /// Returns `None` if `offset` is out of bounds,
630    /// or if `i` is larger than the length of the encoded sequence,
631    /// or if `offset` is greater than `i`,
632    /// or if the number of bits in the wavelet matrix elements exceed `64`.
633    /// `i` may equal the length of the encoded sequence,
634    /// which will return the number of occurrences of the symbol up to the end of the sequence.
635    ///
636    /// # Example
637    /// ```
638    /// use vers_vecs::{BitVec, WaveletMatrix};
639    ///
640    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
641    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
642    ///
643    /// assert_eq!(wavelet_matrix.rank_offset_u64(0, 3, 4), Some(2));
644    /// assert_eq!(wavelet_matrix.rank_offset_u64(2, 4, 4), Some(1));
645    /// ```
646    #[must_use]
647    pub fn rank_offset_u64(&self, offset: usize, i: usize, symbol: u64) -> Option<usize> {
648        if offset > i || offset >= self.len() || i > self.len() || self.bits_per_element() > 64 {
649            None
650        } else {
651            Some(self.rank_offset_u64_unchecked(offset, i, symbol))
652        }
653    }
654
655    /// Get the number of occurrences of the given `symbol` in the encoded sequence up to the `i`-th
656    /// element (exclusive).
657    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
658    /// where the least significant bit is the first element, and `k` is the number of bits per element
659    /// in the wavelet matrix.
660    ///
661    /// This method does not perform bounds checking, nor does it check if the `symbol` is a valid
662    /// `k`-bit word.
663    /// Use [`rank`] for a checked version.
664    ///
665    /// # Panics
666    /// May panic if `i` is out of bounds, or if the number of bits in `symbol` is lower than `k`.
667    /// May instead return 0.
668    /// If the number of bits in `symbol` exceeds `k`, the remaining bits are ignored.
669    ///
670    /// [`BitVec`]: BitVec
671    /// [`rank`]: WaveletMatrix::rank
672    #[must_use]
673    pub fn rank_unchecked(&self, i: usize, symbol: &BitVec) -> usize {
674        self.rank_range_unchecked(0..i, symbol)
675    }
676
677    /// Get the number of occurrences of the given `symbol` in the encoded sequence up to the `i`-th
678    /// element (exclusive).
679    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
680    /// where the least significant bit is the first element, and `k` is the number of bits per element
681    /// in the wavelet matrix.
682    ///
683    /// Returns `None` if `i` is out of bounds (greater than the length of the encoded sequence, but
684    /// since it is exclusive, it may be equal to the length),
685    /// or if the number of bits in `symbol` is not equal to `k`.
686    ///
687    /// # Example
688    /// ```
689    /// use vers_vecs::{BitVec, WaveletMatrix};
690    ///
691    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
692    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
693    ///
694    /// assert_eq!(wavelet_matrix.rank(3, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
695    /// assert_eq!(wavelet_matrix.rank(3, &BitVec::pack_sequence_u8(&[1], 3)), Some(1));
696    /// ```
697    ///
698    /// [`BitVec`]: BitVec
699    #[must_use]
700    pub fn rank(&self, i: usize, symbol: &BitVec) -> Option<usize> {
701        if i > self.len() || symbol.len() != self.bits_per_element() {
702            None
703        } else {
704            Some(self.rank_range_unchecked(0..i, symbol))
705        }
706    }
707
708    /// Get the number of occurrences of the given `symbol` in the encoded sequence up to the `i`-th
709    /// element (exclusive).
710    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
711    /// where k is less than or equal to 64.
712    ///
713    /// This method does not perform bounds checking, nor does it check if the elements of the
714    /// wavelet matrix can be represented in a u64 numeral.
715    /// Use [`rank_u64`] for a checked version.
716    ///
717    /// # Panics
718    /// May panic if `i` is out of bounds,
719    /// or if the number of bits in wavelet matrix elements exceed `64`.
720    /// May instead return 0.
721    ///
722    /// [`rank_u64`]: WaveletMatrix::rank_u64
723    #[must_use]
724    pub fn rank_u64_unchecked(&self, i: usize, symbol: u64) -> usize {
725        self.rank_range_u64_unchecked(0..i, symbol)
726    }
727
728    /// Get the number of occurrences of the given `symbol` in the encoded sequence up to the `i`-th
729    /// element (exclusive).
730    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
731    /// where k is less than or equal to 64.
732    ///
733    /// Returns `None` if `i` is out of bounds (greater than the length of the encoded sequence, but
734    /// since it is exclusive, it may be equal to the length),
735    /// or if the number of bits in the wavelet matrix elements exceed `64`.
736    ///
737    /// # Example
738    /// ```
739    /// use vers_vecs::{BitVec, WaveletMatrix};
740    ///
741    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
742    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
743    ///
744    /// assert_eq!(wavelet_matrix.rank_u64(3, 4), Some(2));
745    /// assert_eq!(wavelet_matrix.rank_u64(3, 1), Some(1));
746    /// ```
747    #[must_use]
748    pub fn rank_u64(&self, i: usize, symbol: u64) -> Option<usize> {
749        if i > self.len() || self.bits_per_element() > 64 {
750            None
751        } else {
752            Some(self.rank_range_u64_unchecked(0..i, symbol))
753        }
754    }
755
756    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence,
757    /// starting from the `offset`-th element.
758    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
759    /// where the least significant bit is the first element, and `k` is the number of bits per element
760    /// in the wavelet matrix.
761    ///
762    /// This method does not perform bounds checking, nor does it check if the `symbol` is a valid
763    /// `k`-bit word.
764    /// Use [`select_offset`] for a checked version.
765    ///
766    /// Returns the index of the `rank`-th occurrence of the `symbol` in the encoded sequence,
767    /// or the length of the encoded sequence if the `rank`-th occurrence does not exist.
768    ///
769    /// # Panics
770    /// May panic if the `offset` is out of bounds,
771    /// or if the number of bits in `symbol` is lower than `k`.
772    /// May instead return the length of the encoded sequence.
773    ///
774    /// [`BitVec`]: BitVec
775    /// [`select_offset`]: WaveletMatrix::select_offset
776    #[must_use]
777    pub fn select_offset_unchecked(&self, offset: usize, rank: usize, symbol: &BitVec) -> usize {
778        let mut range_start = offset;
779
780        for (level, data) in self.data.iter().enumerate() {
781            if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 {
782                range_start = data.rank0(range_start);
783            } else {
784                range_start = data.rank0 + data.rank1(range_start);
785            }
786        }
787
788        let mut range_end = range_start + rank;
789
790        for (level, data) in self.data.iter().enumerate().rev() {
791            if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 {
792                range_end = data.select0(range_end);
793            } else {
794                range_end = data.select1(range_end - data.rank0);
795            }
796        }
797
798        range_end
799    }
800
801    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence,
802    /// starting from the `offset`-th element.
803    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
804    /// where the least significant bit is the first element, and `k` is the number of bits per element
805    /// in the wavelet matrix.
806    ///
807    /// Returns `None` if `offset` is out of bounds, or if the number of bits in `symbol` is not equal to `k`,
808    /// or if the `rank`-th occurrence of the `symbol` does not exist.
809    ///
810    /// # Example
811    /// ```
812    /// use vers_vecs::{BitVec, WaveletMatrix};
813    ///
814    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
815    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
816    ///
817    /// assert_eq!(wavelet_matrix.select_offset(0, 0, &BitVec::pack_sequence_u8(&[4], 3)), Some(1));
818    /// assert_eq!(wavelet_matrix.select_offset(0, 1, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
819    /// assert_eq!(wavelet_matrix.select_offset(2, 0, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
820    /// assert_eq!(wavelet_matrix.select_offset(2, 1, &BitVec::pack_sequence_u8(&[4], 3)), None);
821    /// ```
822    ///
823    /// [`BitVec`]: BitVec
824    #[must_use]
825    pub fn select_offset(&self, offset: usize, rank: usize, symbol: &BitVec) -> Option<usize> {
826        if offset >= self.len() || symbol.len() != self.bits_per_element() {
827            None
828        } else {
829            let idx = self.select_offset_unchecked(offset, rank, symbol);
830            if idx < self.len() {
831                Some(idx)
832            } else {
833                None
834            }
835        }
836    }
837
838    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence,
839    /// starting from the `offset`-th element.
840    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
841    /// where k is less than or equal to 64.
842    ///
843    /// This method does not perform bounds checking, nor does it check if the elements of the
844    /// wavelet matrix can be represented in a u64 numeral.
845    /// Use [`select_offset_u64`] for a checked version.
846    ///
847    /// Returns the index of the `rank`-th occurrence of the `symbol` in the encoded sequence,
848    /// or the length of the encoded sequence if the `rank`-th occurrence does not exist.
849    ///
850    /// # Panics
851    /// May panic if the `offset` is out of bounds,
852    /// or if the number of bits in wavelet matrix elements exceed `64`.
853    /// May instead return the length of the encoded sequence.
854    ///
855    /// [`select_offset_u64`]: WaveletMatrix::select_offset_u64
856    #[must_use]
857    pub fn select_offset_u64_unchecked(&self, offset: usize, rank: usize, symbol: u64) -> usize {
858        let mut range_start = offset;
859
860        for (level, data) in self.data.iter().enumerate() {
861            if (symbol >> ((self.bits_per_element() - 1) - level)) & 1 == 0 {
862                range_start = data.rank0(range_start);
863            } else {
864                range_start = data.rank0 + data.rank1(range_start);
865            }
866        }
867
868        let mut range_end = range_start + rank;
869
870        for (level, data) in self.data.iter().enumerate().rev() {
871            if (symbol >> ((self.bits_per_element() - 1) - level)) & 1 == 0 {
872                range_end = data.select0(range_end);
873            } else {
874                range_end = data.select1(range_end - data.rank0);
875            }
876        }
877
878        range_end
879    }
880
881    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence,
882    /// starting from the `offset`-th element.
883    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
884    /// where k is less than or equal to 64.
885    ///
886    /// Returns `None` if `offset` is out of bounds, or if the number of bits in the wavelet matrix
887    /// elements exceed `64`, or if the `rank`-th occurrence of the `symbol` does not exist.
888    ///
889    /// # Example
890    /// ```
891    /// use vers_vecs::{BitVec, WaveletMatrix};
892    ///
893    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
894    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
895    ///
896    /// assert_eq!(wavelet_matrix.select_offset_u64(0, 0, 4), Some(1));
897    /// assert_eq!(wavelet_matrix.select_offset_u64(0, 1, 4), Some(2));
898    /// assert_eq!(wavelet_matrix.select_offset_u64(2, 0, 4), Some(2));
899    /// assert_eq!(wavelet_matrix.select_offset_u64(2, 1, 4), None);
900    /// ```
901    #[must_use]
902    pub fn select_offset_u64(&self, offset: usize, rank: usize, symbol: u64) -> Option<usize> {
903        if offset >= self.len() || self.bits_per_element() > 64 {
904            None
905        } else {
906            let idx = self.select_offset_u64_unchecked(offset, rank, symbol);
907            if idx < self.len() {
908                Some(idx)
909            } else {
910                None
911            }
912        }
913    }
914
915    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence.
916    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
917    /// where the least significant bit is the first element, and `k` is the number of bits per element
918    /// in the wavelet matrix.
919    ///
920    /// This method does not perform bounds checking, nor does it check if the `symbol` is a valid
921    /// `k`-bit word.
922    /// Use [`select`] for a checked version.
923    ///
924    /// Returns the index of the `rank`-th occurrence of the `symbol` in the encoded sequence,
925    /// or the length of the encoded sequence if the `rank`-th occurrence does not exist.
926    ///
927    /// # Panics
928    /// May panic if the number of bits in `symbol` is not equal to `k`.
929    /// May instead return the length of the encoded sequence.
930    ///
931    /// [`BitVec`]: BitVec
932    /// [`select`]: WaveletMatrix::select
933    #[must_use]
934    pub fn select_unchecked(&self, rank: usize, symbol: &BitVec) -> usize {
935        self.select_offset_unchecked(0, rank, symbol)
936    }
937
938    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence.
939    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
940    /// where the least significant bit is the first element, and `k` is the number of bits per element
941    /// in the wavelet matrix.
942    ///
943    /// Returns `None` if the number of bits in `symbol` is not equal to `k`,
944    /// or if the `rank`-th occurrence of the `symbol` does not exist.
945    ///
946    /// # Example
947    /// ```
948    /// use vers_vecs::{BitVec, WaveletMatrix};
949    ///
950    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
951    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
952    ///
953    /// assert_eq!(wavelet_matrix.select(0, &BitVec::pack_sequence_u8(&[4], 3)), Some(1));
954    /// assert_eq!(wavelet_matrix.select(1, &BitVec::pack_sequence_u8(&[4], 3)), Some(2));
955    /// ```
956    ///
957    /// [`BitVec`]: BitVec
958    #[must_use]
959    pub fn select(&self, rank: usize, symbol: &BitVec) -> Option<usize> {
960        if symbol.len() == self.bits_per_element() {
961            let idx = self.select_unchecked(rank, symbol);
962            if idx < self.len() {
963                Some(idx)
964            } else {
965                None
966            }
967        } else {
968            None
969        }
970    }
971
972    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence.
973    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
974    /// where k is less than or equal to 64.
975    ///
976    /// This method does not perform bounds checking, nor does it check if the elements of the
977    /// wavelet matrix can be represented in a u64 numeral.
978    /// Use [`select_u64`] for a checked version.
979    ///
980    /// Returns the index of the `rank`-th occurrence of the `symbol` in the encoded sequence,
981    /// or the length of the encoded sequence if the `rank`-th occurrence does not exist.
982    ///
983    /// # Panics
984    /// May panic if the number of bits in wavelet matrix elements exceed `64`.
985    /// May instead return the length of the encoded sequence.
986    ///
987    /// [`select_u64`]: WaveletMatrix::select_u64
988    #[must_use]
989    pub fn select_u64_unchecked(&self, rank: usize, symbol: u64) -> usize {
990        self.select_offset_u64_unchecked(0, rank, symbol)
991    }
992
993    /// Get the index of the `rank`-th occurrence of the given `symbol` in the encoded sequence.
994    /// The `symbol` is a `k`-bit word encoded in a u64 numeral,
995    /// where k is less than or equal to 64.
996    ///
997    /// Returns `None` if the number of bits in the wavelet matrix elements exceed `64`,
998    /// or if the `rank`-th occurrence of the `symbol` does not exist.
999    ///
1000    /// # Example
1001    /// ```
1002    /// use vers_vecs::{BitVec, WaveletMatrix};
1003    ///
1004    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1005    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1006    ///
1007    /// assert_eq!(wavelet_matrix.select_u64(0, 4), Some(1));
1008    /// assert_eq!(wavelet_matrix.select_u64(1, 4), Some(2));
1009    /// ```
1010    #[must_use]
1011    pub fn select_u64(&self, rank: usize, symbol: u64) -> Option<usize> {
1012        if self.bits_per_element() > 64 {
1013            None
1014        } else {
1015            let idx = self.select_u64_unchecked(rank, symbol);
1016            if idx < self.len() {
1017                Some(idx)
1018            } else {
1019                None
1020            }
1021        }
1022    }
1023
1024    /// Get the `k`-th smallest element in the encoded sequence in the specified `range`,
1025    /// where `k = 0` returns the smallest element.
1026    /// The `range` is a half-open interval, meaning that the `end` index is exclusive.
1027    /// The `k`-th smallest element is returned as a `BitVec`,
1028    /// where the least significant bit is the first element.
1029    ///
1030    /// This method does not perform bounds checking.
1031    /// It returns a nonsensical result if the `k` is greater than the size of the range.
1032    /// Use [`quantile`] for a checked version.
1033    ///
1034    /// # Panics
1035    /// May panic if the `range` is out of bounds. May instead return an empty bit vector.
1036    ///
1037    /// [`quantile`]: WaveletMatrix::quantile
1038    #[must_use]
1039    pub fn quantile_unchecked(&self, range: Range<usize>, k: usize) -> BitVec {
1040        let result = BitVec::from_zeros(self.bits_per_element());
1041
1042        self.partial_quantile_search_unchecked(range, k, 0, result)
1043    }
1044
1045    /// Internal function to reuse the quantile code for the predecessor and successor search.
1046    /// This function performs the quantile search starting at the given level with the given prefix.
1047    ///
1048    /// The function does not perform any checks, so the caller must ensure that the range is valid,
1049    /// and that the prefix is a valid prefix for the given level.
1050    #[inline(always)]
1051    fn partial_quantile_search_unchecked(
1052        &self,
1053        mut range: Range<usize>,
1054        mut k: usize,
1055        start_level: usize,
1056        mut prefix: BitVec,
1057    ) -> BitVec {
1058        debug_assert!(prefix.len() == self.bits_per_element());
1059        debug_assert!(!range.is_empty());
1060        debug_assert!(range.end <= self.len());
1061
1062        for (level, data) in self.data.iter().enumerate().skip(start_level) {
1063            let zeros_start = data.rank0(range.start);
1064            let zeros_end = data.rank0(range.end);
1065            let zeros = zeros_end - zeros_start;
1066
1067            // if k < zeros, the element is among the zeros
1068            if k < zeros {
1069                range.start = zeros_start;
1070                range.end = zeros_end;
1071            } else {
1072                // the element is among the ones, so we set the bit to 1, and move the range
1073                // into the 1-partition of the next level
1074                prefix.set_unchecked((self.bits_per_element() - 1) - level, 1);
1075                k -= zeros;
1076                range.start = data.rank0 + (range.start - zeros_start); // range.start - zeros_start is the rank1 of range.start
1077                range.end = data.rank0 + (range.end - zeros_end); // same here
1078            }
1079        }
1080
1081        prefix
1082    }
1083
1084    /// Get the `k`-th smallest element in the encoded sequence in the specified `range`,
1085    /// where `k = 0` returns the smallest element.
1086    /// The `range` is a half-open interval, meaning that the `end` index is exclusive.
1087    /// The `k`-th smallest element is returned as a [`BitVec`],
1088    /// where the least significant bit is the first element.
1089    ///
1090    /// Returns `None` if the `range` is out of bounds, or if `k` is greater than the size of the range.
1091    ///
1092    /// # Example
1093    /// ```
1094    /// use vers_vecs::{BitVec, WaveletMatrix};
1095    ///
1096    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1097    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1098    ///
1099    /// assert_eq!(wavelet_matrix.quantile(0..3, 0), Some(BitVec::pack_sequence_u8(&[1], 3)));
1100    /// assert_eq!(wavelet_matrix.quantile(0..3, 1), Some(BitVec::pack_sequence_u8(&[4], 3)));
1101    /// assert_eq!(wavelet_matrix.quantile(1..4, 0), Some(BitVec::pack_sequence_u8(&[1], 3)));
1102    /// ```
1103    #[must_use]
1104    pub fn quantile(&self, range: Range<usize>, k: usize) -> Option<BitVec> {
1105        if range.start >= self.len() || range.end > self.len() || k >= range.end - range.start {
1106            None
1107        } else {
1108            Some(self.quantile_unchecked(range, k))
1109        }
1110    }
1111
1112    /// Get the `i`-th smallest element in the entire wavelet matrix.
1113    /// The `i`-th smallest element is returned as a `BitVec`,
1114    /// where the least significant bit is the first element.
1115    ///
1116    /// This method does not perform bounds checking.
1117    /// Use [`get_sorted`] for a checked version.
1118    ///
1119    /// # Panics
1120    /// May panic if the `i` is out of bounds, or returns an empty bit vector.
1121    ///
1122    /// [`get_sorted`]: Self::get_sorted
1123    #[must_use]
1124    pub fn get_sorted_unchecked(&self, i: usize) -> BitVec {
1125        self.quantile_unchecked(0..self.len(), i)
1126    }
1127
1128    /// Get the `i`-th smallest element in the wavelet matrix.
1129    /// The `i`-th smallest element is returned as a `BitVec`,
1130    /// where the least significant bit is the first element.
1131    /// This method call is equivalent to `self.quantile(0..self.len(), i)`.
1132    ///
1133    /// Returns `None` if the `i` is out of bounds.
1134    ///
1135    /// # Example
1136    /// ```
1137    /// use vers_vecs::{BitVec, WaveletMatrix};
1138    ///
1139    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1140    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1141    ///
1142    /// assert_eq!(wavelet_matrix.get_sorted(0), Some(BitVec::pack_sequence_u8(&[1], 3)));
1143    /// assert_eq!(wavelet_matrix.get_sorted(1), Some(BitVec::pack_sequence_u8(&[1], 3)));
1144    /// assert_eq!(wavelet_matrix.get_sorted(2), Some(BitVec::pack_sequence_u8(&[2], 3)));
1145    /// ```
1146    #[must_use]
1147    pub fn get_sorted(&self, i: usize) -> Option<BitVec> {
1148        if i >= self.len() {
1149            None
1150        } else {
1151            Some(self.get_sorted_unchecked(i))
1152        }
1153    }
1154
1155    /// Get the `k`-th smallest element in the encoded sequence in the specified `range`,
1156    /// where `k = 0` returns the smallest element.
1157    /// The `range` is a half-open interval, meaning that the `end` index is exclusive.
1158    /// The `k`-th smallest element is returned as a `u64` numeral.
1159    /// If the number of bits per element exceeds 64, the value is truncated.
1160    ///
1161    /// This method does not perform bounds checking.
1162    /// It returns a nonsensical result if the `k` is greater than the size of the range.
1163    /// Use [`quantile_u64`] for a checked version.
1164    ///
1165    /// # Panics
1166    /// May panic if the `range` is out of bounds.
1167    /// May instead return 0.
1168    ///
1169    /// [`quantile_u64`]: WaveletMatrix::quantile_u64
1170    #[must_use]
1171    pub fn quantile_u64_unchecked(&self, range: Range<usize>, k: usize) -> u64 {
1172        self.partial_quantile_search_u64_unchecked(range, k, 0, 0)
1173    }
1174
1175    /// Internal function to reuse the quantile code for the predecessor and successor search.
1176    /// This function performs the quantile search starting at the given level with the given prefix.
1177    ///
1178    /// The function does not perform any checks, so the caller must ensure that the range is valid,
1179    /// and that the prefix is a valid prefix for the given level (i.e. the prefix is not shifted
1180    /// to another level).
1181    #[inline(always)]
1182    fn partial_quantile_search_u64_unchecked(
1183        &self,
1184        mut range: Range<usize>,
1185        mut k: usize,
1186        start_level: usize,
1187        mut prefix: u64,
1188    ) -> u64 {
1189        debug_assert!(self.bits_per_element() <= 64);
1190        debug_assert!(!range.is_empty());
1191        debug_assert!(range.end <= self.len());
1192
1193        for data in self.data.iter().skip(start_level) {
1194            prefix <<= 1;
1195            let zeros_start = data.rank0(range.start);
1196            let zeros_end = data.rank0(range.end);
1197            let zeros = zeros_end - zeros_start;
1198
1199            if k < zeros {
1200                range.start = zeros_start;
1201                range.end = zeros_end;
1202            } else {
1203                prefix |= 1;
1204                k -= zeros;
1205                range.start = data.rank0 + (range.start - zeros_start);
1206                range.end = data.rank0 + (range.end - zeros_end);
1207            }
1208        }
1209
1210        prefix
1211    }
1212
1213    /// Get the `k`-th smallest element in the encoded sequence in the specified `range`,
1214    /// where `k = 0` returns the smallest element.
1215    /// The `range` is a half-open interval, meaning that the `end` index is exclusive.
1216    /// The `k`-th smallest element is returned as a `u64` numeral.
1217    ///
1218    /// Returns `None` if the `range` is out of bounds, or if the number of bits per element exceeds 64,
1219    /// or if `k` is greater than the size of the range.
1220    ///
1221    /// # Example
1222    /// ```
1223    /// use vers_vecs::{BitVec, WaveletMatrix};
1224    ///
1225    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1226    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1227    ///
1228    /// assert_eq!(wavelet_matrix.quantile_u64(0..3, 0), Some(1));
1229    /// assert_eq!(wavelet_matrix.quantile_u64(0..3, 1), Some(4));
1230    /// assert_eq!(wavelet_matrix.quantile_u64(1..4, 0), Some(1));
1231    /// ```
1232    #[must_use]
1233    pub fn quantile_u64(&self, range: Range<usize>, k: usize) -> Option<u64> {
1234        if range.start >= self.len()
1235            || range.end > self.len()
1236            || self.bits_per_element() > 64
1237            || k >= range.end - range.start
1238        {
1239            None
1240        } else {
1241            Some(self.quantile_u64_unchecked(range, k))
1242        }
1243    }
1244
1245    /// Get the `i`-th smallest element in the wavelet matrix.
1246    /// The `i`-th smallest element is returned as a u64 numeral.
1247    ///
1248    /// If the number of bits per element exceeds 64, the value is truncated.
1249    ///
1250    /// This method does not perform bounds checking.
1251    /// Use [`get_sorted_u64`] for a checked version.
1252    ///
1253    /// # Panics
1254    /// May panic if the `i` is out of bounds, or returns an empty bit vector.
1255    ///
1256    /// [`get_sorted_u64`]: WaveletMatrix::get_sorted_u64
1257    #[must_use]
1258    pub fn get_sorted_u64_unchecked(&self, i: usize) -> u64 {
1259        self.quantile_u64_unchecked(0..self.len(), i)
1260    }
1261
1262    /// Get the `i`-th smallest element in the entire wavelet matrix.
1263    /// The `i`-th smallest element is returned as a u64 numeral.
1264    ///
1265    /// Returns `None` if the `i` is out of bounds, or if the number of bits per element exceeds 64.
1266    ///
1267    /// # Example
1268    /// ```
1269    /// use vers_vecs::{BitVec, WaveletMatrix};
1270    ///
1271    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1272    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1273    ///
1274    /// assert_eq!(wavelet_matrix.get_sorted_u64(0), Some(1));
1275    /// assert_eq!(wavelet_matrix.get_sorted_u64(1), Some(1));
1276    /// assert_eq!(wavelet_matrix.get_sorted_u64(2), Some(2));
1277    /// ```
1278    #[must_use]
1279    pub fn get_sorted_u64(&self, i: usize) -> Option<u64> {
1280        if i >= self.len() || self.bits_per_element() > 64 {
1281            None
1282        } else {
1283            Some(self.get_sorted_u64_unchecked(i))
1284        }
1285    }
1286
1287    /// Get the smallest element in the encoded sequence in the specified `range`.
1288    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1289    /// The smallest element is returned as a `BitVec`,
1290    ///
1291    /// This method does not perform bounds checking.
1292    /// Use [`range_min`] for a checked version.
1293    ///
1294    /// # Panics
1295    /// May panic if the `range` is out of bounds or if the range is empty.
1296    /// May instead return an empty bit vector.
1297    ///
1298    /// [`range_min`]: WaveletMatrix::range_min
1299    #[must_use]
1300    pub fn range_min_unchecked(&self, range: Range<usize>) -> BitVec {
1301        self.quantile_unchecked(range, 0)
1302    }
1303
1304    /// Get the smallest element in the encoded sequence in the specified `range`.
1305    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1306    /// The smallest element is returned as a `BitVec`,
1307    ///
1308    /// Returns `None` if the `range` is out of bounds or if the range is empty.
1309    ///
1310    /// # Example
1311    /// ```
1312    /// use vers_vecs::{BitVec, WaveletMatrix};
1313    ///
1314    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1315    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1316    ///
1317    /// assert_eq!(wavelet_matrix.range_min(0..3), Some(BitVec::pack_sequence_u8(&[1], 3)));
1318    /// assert_eq!(wavelet_matrix.range_min(1..4), Some(BitVec::pack_sequence_u8(&[1], 3)));
1319    /// assert_eq!(wavelet_matrix.range_min(1..3), Some(BitVec::pack_sequence_u8(&[4], 3)));
1320    /// ```
1321    #[must_use]
1322    pub fn range_min(&self, range: Range<usize>) -> Option<BitVec> {
1323        self.quantile(range, 0)
1324    }
1325
1326    /// Get the smallest element in the encoded sequence in the specified `range`.
1327    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1328    /// The smallest element is returned as a `u64` numeral.
1329    /// If the number of bits per element exceeds 64, the value is truncated.
1330    ///
1331    /// This method does not perform bounds checking.
1332    /// Use [`range_min_u64`] for a checked version.
1333    ///
1334    /// # Panics
1335    /// May panic if the `range` is out of bounds or if the range is empty.
1336    /// May instead return 0.
1337    ///
1338    /// [`range_min_u64`]: WaveletMatrix::range_min_u64
1339    #[must_use]
1340    pub fn range_min_u64_unchecked(&self, range: Range<usize>) -> u64 {
1341        self.quantile_u64_unchecked(range, 0)
1342    }
1343
1344    /// Get the smallest element in the encoded sequence in the specified `range`.
1345    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1346    /// The smallest element is returned as a `u64` numeral.
1347    ///
1348    /// Returns `None` if the `range` is out of bounds, if the range is empty, or if the number of bits
1349    /// per element exceeds 64.
1350    ///
1351    /// # Example
1352    /// ```
1353    /// use vers_vecs::{BitVec, WaveletMatrix};
1354    ///
1355    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1356    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1357    ///
1358    /// assert_eq!(wavelet_matrix.range_min_u64(0..3), Some(1));
1359    /// assert_eq!(wavelet_matrix.range_min_u64(1..4), Some(1));
1360    /// assert_eq!(wavelet_matrix.range_min_u64(1..3), Some(4));
1361    /// ```
1362    #[must_use]
1363    pub fn range_min_u64(&self, range: Range<usize>) -> Option<u64> {
1364        self.quantile_u64(range, 0)
1365    }
1366
1367    /// Get the largest element in the encoded sequence in the specified `range`.
1368    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1369    /// The largest element is returned as a `BitVec`,
1370    /// where the least significant bit is the first element.
1371    ///
1372    /// This method does not perform bounds checking.
1373    /// Use [`range_max`] for a checked version.
1374    ///
1375    /// # Panics
1376    /// May panic if the `range` is out of bounds or if the range is empty.
1377    /// May instead return an empty bit vector.
1378    ///
1379    /// [`range_max`]: WaveletMatrix::range_max
1380    #[must_use]
1381    pub fn range_max_unchecked(&self, range: Range<usize>) -> BitVec {
1382        let k = range.end - range.start - 1;
1383        self.quantile_unchecked(range, k)
1384    }
1385
1386    /// Get the largest element in the encoded sequence in the specified `range`.
1387    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1388    /// The largest element is returned as a `BitVec`,
1389    /// where the least significant bit is the first element.
1390    ///
1391    /// Returns `None` if the `range` is out of bounds or if the range is empty.
1392    ///
1393    /// # Example
1394    /// ```
1395    /// use vers_vecs::{BitVec, WaveletMatrix};
1396    ///
1397    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1398    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1399    ///
1400    /// assert_eq!(wavelet_matrix.range_max(0..3), Some(BitVec::pack_sequence_u8(&[4], 3)));
1401    /// assert_eq!(wavelet_matrix.range_max(3..6), Some(BitVec::pack_sequence_u8(&[7], 3)));
1402    /// ```
1403    #[must_use]
1404    pub fn range_max(&self, range: Range<usize>) -> Option<BitVec> {
1405        if range.is_empty() {
1406            None
1407        } else {
1408            let k = range.end - range.start - 1;
1409            self.quantile(range, k)
1410        }
1411    }
1412
1413    /// Get the largest element in the encoded sequence in the specified `range`.
1414    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1415    /// The largest element is returned as a `u64` numeral.
1416    /// If the number of bits per element exceeds 64, the value is truncated.
1417    ///
1418    /// This method does not perform bounds checking.
1419    /// Use [`range_max_u64`] for a checked version.
1420    ///
1421    /// # Panics
1422    /// May panic if the `range` is out of bounds or if the range is empty.
1423    /// May instead return 0.
1424    ///
1425    /// [`range_max_u64`]: WaveletMatrix::range_max_u64
1426    #[must_use]
1427    pub fn range_max_u64_unchecked(&self, range: Range<usize>) -> u64 {
1428        let k = range.end - range.start - 1;
1429        self.quantile_u64_unchecked(range, k)
1430    }
1431
1432    /// Get the largest element in the encoded sequence in the specified `range`.
1433    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1434    /// The largest element is returned as a `u64` numeral.
1435    ///
1436    /// Returns `None` if the `range` is out of bounds, if the range is empty, or if the number of bits
1437    /// per element exceeds 64.
1438    ///
1439    /// # Example
1440    /// ```
1441    /// use vers_vecs::{BitVec, WaveletMatrix};
1442    ///
1443    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1444    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1445    ///
1446    /// assert_eq!(wavelet_matrix.range_max_u64(0..3), Some(4));
1447    /// assert_eq!(wavelet_matrix.range_max_u64(3..6), Some(7));
1448    /// ```
1449    #[must_use]
1450    pub fn range_max_u64(&self, range: Range<usize>) -> Option<u64> {
1451        if range.is_empty() {
1452            None
1453        } else {
1454            let k = range.end - range.start - 1;
1455            self.quantile_u64(range, k)
1456        }
1457    }
1458
1459    /// Get the median element in the encoded sequence in the specified `range`.
1460    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1461    /// The median element is returned as a `BitVec`,
1462    /// where the least significant bit is the first element.
1463    ///
1464    /// If the range does not contain an odd number of elements, the position is rounded down.
1465    ///
1466    /// This method does not perform bounds checking.
1467    /// Use [`range_median`] for a checked version.
1468    ///
1469    /// # Panics
1470    /// May panic if the `range` is out of bounds or if the range is empty.
1471    /// May instead return an empty bit vector.
1472    ///
1473    /// [`range_median`]: WaveletMatrix::range_median
1474    #[must_use]
1475    pub fn range_median_unchecked(&self, range: Range<usize>) -> BitVec {
1476        let k = (range.end - 1 - range.start) / 2;
1477        self.quantile_unchecked(range, k)
1478    }
1479
1480    /// Get the median element in the encoded sequence in the specified `range`.
1481    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1482    /// The median element is returned as a `BitVec`,
1483    /// where the least significant bit is the first element.
1484    ///
1485    /// If the range does not contain an odd number of elements, the position is rounded down.
1486    ///
1487    /// Returns `None` if the `range` is out of bounds or if the range is empty.
1488    ///
1489    /// # Example
1490    /// ```
1491    /// use vers_vecs::{BitVec, WaveletMatrix};
1492    ///
1493    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1494    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1495    ///
1496    /// assert_eq!(wavelet_matrix.range_median(0..3), Some(BitVec::pack_sequence_u8(&[4], 3)));
1497    /// assert_eq!(wavelet_matrix.range_median(1..4), Some(BitVec::pack_sequence_u8(&[4], 3)));
1498    /// assert_eq!(wavelet_matrix.range_median(0..6), Some(BitVec::pack_sequence_u8(&[2], 3)));
1499    /// ```
1500    #[must_use]
1501    pub fn range_median(&self, range: Range<usize>) -> Option<BitVec> {
1502        if range.is_empty() {
1503            None
1504        } else {
1505            let k = (range.end - 1 - range.start) / 2;
1506            self.quantile(range, k)
1507        }
1508    }
1509
1510    /// Get the median element in the encoded sequence in the specified `range`.
1511    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1512    /// The median element is returned as a `u64` numeral.
1513    /// If the number of bits per element exceeds 64, the value is truncated.
1514    ///
1515    /// If the range does not contain an odd number of elements, the position is rounded down.
1516    ///
1517    /// This method does not perform bounds checking.
1518    /// Use [`range_median_u64`] for a checked version.
1519    ///
1520    /// # Panics
1521    /// May panic if the `range` is out of bounds or if the range is empty.
1522    /// May instead return 0.
1523    ///
1524    /// [`range_median_u64`]: WaveletMatrix::range_median_u64
1525    #[must_use]
1526    pub fn range_median_u64_unchecked(&self, range: Range<usize>) -> u64 {
1527        let k = (range.end - 1 - range.start) / 2;
1528        self.quantile_u64_unchecked(range, k)
1529    }
1530
1531    /// Get the median element in the encoded sequence in the specified `range`.
1532    /// The range is a half-open interval, meaning that the `end` index is exclusive.
1533    /// The median element is returned as a `u64` numeral.
1534    ///
1535    /// If the range does not contain an odd number of elements, the position is rounded down.
1536    ///
1537    /// Returns `None` if the `range` is out of bounds, if the range is empty, or if the number of bits
1538    /// per element exceeds 64.
1539    ///
1540    /// # Example
1541    /// ```
1542    /// use vers_vecs::{BitVec, WaveletMatrix};
1543    ///
1544    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1545    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1546    ///
1547    /// assert_eq!(wavelet_matrix.range_median_u64(0..3), Some(4));
1548    /// assert_eq!(wavelet_matrix.range_median_u64(1..4), Some(4));
1549    /// assert_eq!(wavelet_matrix.range_median_u64(0..6), Some(2));
1550    /// ```
1551    #[must_use]
1552    pub fn range_median_u64(&self, range: Range<usize>) -> Option<u64> {
1553        if range.is_empty() || self.bits_per_element() > 64 || range.end > self.len() {
1554            None
1555        } else {
1556            let k = (range.end - 1 - range.start) / 2;
1557            self.quantile_u64(range, k)
1558        }
1559    }
1560
1561    /// Get the predecessor of the given `symbol` in the given `range`.
1562    /// This is a private generic helper function to implement the public `predecessor` functions.
1563    ///
1564    /// The read and write access to the query and result values are abstracted by the `Reader` and `Writer` closures.
1565    #[inline(always)] // even though the function is pretty large, inlining probably gets rid of the closure calls in favor of static calls
1566    fn predecessor_generic_unchecked<
1567        T: Clone,
1568        Reader: Fn(usize, &T) -> u64,
1569        Writer: Fn(u64, usize, &mut T),
1570        Quantile: Fn(&Self, Range<usize>, usize, usize, T) -> T,
1571    >(
1572        &self,
1573        mut range: Range<usize>,
1574        symbol: &T,
1575        mut result_value: T,
1576        bit_reader: Reader,
1577        result_writer: Writer,
1578        quantile_search: Quantile,
1579    ) -> Option<T> {
1580        // the bit-prefix at the last node where we could go to an interval with smaller elements,
1581        // i.e. where we need to go if the current prefix has no elements smaller than the query
1582        let mut last_smaller_prefix = result_value.clone();
1583        // the level of the last node where we could go to an interval with smaller elements
1584        let mut last_one_level: Option<usize> = None;
1585        // the range of the last node where we could go to an interval with smaller elements
1586        let mut next_smaller_range: Option<Range<usize>> = None;
1587
1588        for (level, data) in self.data.iter().enumerate() {
1589            let query_bit = bit_reader(level, symbol);
1590
1591            // read the amount of elements with the current result-prefix plus one 0 bit.
1592            // if the query_bit is 1, we can calculate the amount of elements with the result-prefix
1593            // plus one 1 from there
1594            let zeros_start = data.rank0(range.start);
1595            let zeros_end = data.rank0(range.end);
1596            let elements_zero = zeros_end - zeros_start;
1597
1598            if query_bit == 0 {
1599                if elements_zero == 0 {
1600                    // if our query bit is zero in this level and suddenly our new interval is empty,
1601                    // all elements that were in the previous interval are bigger than the query element,
1602                    // because they all have a 1 in the current level.
1603                    // this means the predecessor is the largest element in the last smaller interval,
1604                    // i.e. the interval that has a 0 bit at the last level where our prefix had a 1 bit.
1605
1606                    return next_smaller_range.map(|r| {
1607                        let idx = r.end - r.start - 1;
1608                        result_writer(0, last_one_level.unwrap(), &mut last_smaller_prefix);
1609                        quantile_search(
1610                            self,
1611                            r,
1612                            idx,
1613                            last_one_level.unwrap() + 1,
1614                            last_smaller_prefix,
1615                        )
1616                    });
1617                }
1618
1619                // update the prefix
1620                result_writer(0, level, &mut result_value);
1621
1622                // update the range to the interval of the new prefix
1623                range.start = zeros_start;
1624                range.end = zeros_end;
1625            } else {
1626                if elements_zero == range.end - range.start {
1627                    // if our query element is 1 in this level and suddenly our new interval is empty,
1628                    // all elements that were in the previous interval are smaller than the query element,
1629                    // because they all have a 0 in the current level.
1630                    // this means the predecessor is the largest element in the last level's interval.
1631
1632                    let idx = range.end - range.start - 1;
1633                    return Some(quantile_search(self, range, idx, level, result_value));
1634                }
1635
1636                // if the other interval is not empty, we update the last smaller interval to the last interval where we can switch to a 0 prefix
1637                if !(zeros_start..zeros_end).is_empty() {
1638                    last_one_level = Some(level);
1639                    next_smaller_range = Some(zeros_start..zeros_end);
1640                    last_smaller_prefix = result_value.clone();
1641                }
1642
1643                // update the prefix
1644                result_writer(1, level, &mut result_value);
1645
1646                // update the range to the interval of the new prefix
1647                range.start = data.rank0 + (range.start - zeros_start);
1648                range.end = data.rank0 + (range.end - zeros_end);
1649            }
1650        }
1651
1652        Some(result_value)
1653    }
1654
1655    /// Get the predecessor of the given `symbol` in the given `range`.
1656    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
1657    /// where the least significant bit is the first element, and `k` is the number of bits per element
1658    /// in the wavelet matrix.
1659    /// The `symbol` does not have to be in the encoded sequence.
1660    /// The predecessor is the largest element in the `range` that is smaller than or equal
1661    /// to the `symbol`.
1662    ///
1663    /// Returns `None` if the number of bits in the `symbol` is not equal to `k`,
1664    /// if the range is empty, if the wavelet matrix is empty, if the range is out of bounds,
1665    /// or if the `symbol` is smaller than all elements in the range.
1666    ///
1667    /// # Example
1668    /// ```
1669    /// use vers_vecs::{BitVec, WaveletMatrix};
1670    ///
1671    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1672    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1673    ///
1674    /// assert_eq!(wavelet_matrix.predecessor(0..3, &BitVec::pack_sequence_u8(&[7], 3)), Some(BitVec::pack_sequence_u8(&[4], 3)));
1675    /// assert_eq!(wavelet_matrix.predecessor(0..3, &BitVec::pack_sequence_u8(&[4], 3)), Some(BitVec::pack_sequence_u8(&[4], 3)));
1676    /// assert_eq!(wavelet_matrix.predecessor(0..6, &BitVec::pack_sequence_u8(&[7], 3)), Some(BitVec::pack_sequence_u8(&[7], 3)));
1677    /// ```
1678    ///
1679    /// [`BitVec`]: BitVec
1680    #[must_use]
1681    pub fn predecessor(&self, range: Range<usize>, symbol: &BitVec) -> Option<BitVec> {
1682        if symbol.len() != self.bits_per_element()
1683            || range.is_empty()
1684            || self.is_empty()
1685            || range.end > self.len()
1686        {
1687            return None;
1688        }
1689
1690        self.predecessor_generic_unchecked(
1691            range,
1692            symbol,
1693            BitVec::from_zeros(self.bits_per_element()),
1694            |level, symbol| symbol.get_unchecked((self.bits_per_element() - 1) - level),
1695            |bit, level, result| {
1696                result.set_unchecked((self.bits_per_element() - 1) - level, bit);
1697            },
1698            Self::partial_quantile_search_unchecked,
1699        )
1700    }
1701
1702    /// Get the predecessor of the given `symbol` in the given `range`.
1703    /// The `symbol` is a `k`-bit word encoded in a `u64` numeral,
1704    /// where k is less than or equal to 64.
1705    /// The `symbol` does not have to be in the encoded sequence.
1706    /// The predecessor is the largest element in the `range` that is smaller than or equal
1707    /// to the `symbol`.
1708    ///
1709    /// Returns `None` if the number of bits in the matrix is greater than 64,
1710    /// if the range is empty, if the wavelet matrix is empty, if the range is out of bounds,
1711    /// or if the `symbol` is smaller than all elements in the range.
1712    ///
1713    /// # Example
1714    /// ```
1715    /// use vers_vecs::{BitVec, WaveletMatrix};
1716    ///
1717    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1718    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1719    ///
1720    /// assert_eq!(wavelet_matrix.predecessor_u64(0..3, 7), Some(4));
1721    /// assert_eq!(wavelet_matrix.predecessor_u64(0..3, 4), Some(4));
1722    /// assert_eq!(wavelet_matrix.predecessor_u64(0..6, 7), Some(7));
1723    /// ```
1724    #[must_use]
1725    pub fn predecessor_u64(&self, range: Range<usize>, symbol: u64) -> Option<u64> {
1726        if self.bits_per_element() > 64
1727            || range.is_empty()
1728            || self.is_empty()
1729            || range.end > self.len()
1730        {
1731            return None;
1732        }
1733
1734        self.predecessor_generic_unchecked(
1735            range,
1736            &symbol,
1737            0,
1738            |level, symbol| (symbol >> ((self.bits_per_element() - 1) - level)) & 1,
1739            |bit, _level, result| {
1740                // we ignore the level here, and instead rely on the fact that the bits are set in order.
1741                // we have to do that, because the quantile_search_u64 does the same.
1742                *result <<= 1;
1743                *result |= bit;
1744            },
1745            Self::partial_quantile_search_u64_unchecked,
1746        )
1747    }
1748
1749    #[inline(always)]
1750    fn successor_generic_unchecked<
1751        T: Clone,
1752        Reader: Fn(usize, &T) -> u64,
1753        Writer: Fn(u64, usize, &mut T),
1754        Quantile: Fn(&Self, Range<usize>, usize, usize, T) -> T,
1755    >(
1756        &self,
1757        mut range: Range<usize>,
1758        symbol: &T,
1759        mut result_value: T,
1760        bit_reader: Reader,
1761        result_writer: Writer,
1762        quantile_search: Quantile,
1763    ) -> Option<T> {
1764        // the bit-prefix at the last node where we could go to an interval with larger elements,
1765        // i.e. where we need to go if the current prefix has no elements larger than the query
1766        let mut last_larger_prefix = result_value.clone();
1767        // the level of the last node where we could go to an interval with larger elements
1768        let mut last_zero_level: Option<usize> = None;
1769        // the range of the last node where we could go to an interval with larger elements
1770        let mut next_larger_range: Option<Range<usize>> = None;
1771
1772        for (level, data) in self.data.iter().enumerate() {
1773            let query_bit = bit_reader(level, symbol);
1774
1775            // read the amount of elements with the current result-prefix plus one 0 bit.
1776            // if the query_bit is 1, we can calculate the amount of elements with the result-prefix
1777            // plus one 1 from there
1778            let zeros_start = data.rank0(range.start);
1779            let zeros_end = data.rank0(range.end);
1780            let elements_zero = zeros_end - zeros_start;
1781
1782            if query_bit == 0 {
1783                if elements_zero == 0 {
1784                    // if our query element is 0 in this level and suddenly our new interval is empty,
1785                    // all elements that were in the previous interval are larger than the query element,
1786                    // because they all have a 1 in the current level.
1787                    // this means the successor is the smallest element in the last level's interval.
1788
1789                    return Some(quantile_search(self, range, 0, level, result_value));
1790                }
1791
1792                // if the other interval is not empty, we update the last interval where we can switch to a prefix with a 1
1793                if !(data.rank0 + (range.start - zeros_start)..data.rank0 + (range.end - zeros_end))
1794                    .is_empty()
1795                {
1796                    last_zero_level = Some(level);
1797                    next_larger_range = Some(
1798                        data.rank0 + (range.start - zeros_start)
1799                            ..data.rank0 + (range.end - zeros_end),
1800                    );
1801                    last_larger_prefix = result_value.clone();
1802                }
1803
1804                // update the prefix
1805                result_writer(0, level, &mut result_value);
1806
1807                // update the range to the interval of the new prefix
1808                range.start = zeros_start;
1809                range.end = zeros_end;
1810            } else {
1811                if elements_zero == range.end - range.start {
1812                    // if our query bit is 1 in this level and suddenly our new interval is empty,
1813                    // all elements that were in the previous interval are smaller than the query element,
1814                    // because they all have a 0 in the current level.
1815                    // this means the successor is the smallest element in the last interval with a larger prefix,
1816                    // i.e. the interval that has a 1 bit at the last level where our prefix had a 0 bit.
1817
1818                    return next_larger_range.map(|r| {
1819                        result_writer(1, last_zero_level.unwrap(), &mut last_larger_prefix);
1820                        quantile_search(
1821                            self,
1822                            r,
1823                            0,
1824                            last_zero_level.unwrap() + 1,
1825                            last_larger_prefix,
1826                        )
1827                    });
1828                }
1829
1830                // update the prefix
1831                result_writer(1, level, &mut result_value);
1832
1833                // update the range to the interval of the new prefix
1834                range.start = data.rank0 + (range.start - zeros_start);
1835                range.end = data.rank0 + (range.end - zeros_end);
1836            }
1837        }
1838
1839        Some(result_value)
1840    }
1841
1842    /// Get the successor of the given `symbol` in the given range.
1843    /// The `symbol` is a `k`-bit word encoded in a [`BitVec`],
1844    /// where the least significant bit is the first element, and `k` is the number of bits per element
1845    /// in the wavelet matrix.
1846    /// The `symbol` does not have to be in the encoded sequence.
1847    /// The successor is the smallest element in the range that is greater than or equal
1848    /// to the `symbol`.
1849    ///
1850    /// Returns `None` if the number of bits in the `symbol` is not equal to `k`,
1851    /// if the range is empty, if the wavelet matrix is empty, if the range is out of bounds,
1852    /// or if the `symbol` is greater than all elements in the range.
1853    ///
1854    /// # Example
1855    /// ```
1856    /// use vers_vecs::{BitVec, WaveletMatrix};
1857    ///
1858    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1859    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1860    ///
1861    /// assert_eq!(wavelet_matrix.successor(0..3, &BitVec::pack_sequence_u8(&[2], 3)), Some(BitVec::pack_sequence_u8(&[4], 3)));
1862    /// assert_eq!(wavelet_matrix.successor(0..3, &BitVec::pack_sequence_u8(&[5], 3)), None);
1863    /// assert_eq!(wavelet_matrix.successor(0..6, &BitVec::pack_sequence_u8(&[2], 3)), Some(BitVec::pack_sequence_u8(&[2], 3)));
1864    /// ```
1865    ///
1866    /// [`BitVec`]: BitVec
1867    #[must_use]
1868    pub fn successor(&self, range: Range<usize>, symbol: &BitVec) -> Option<BitVec> {
1869        if symbol.len() != self.bits_per_element()
1870            || range.is_empty()
1871            || self.is_empty()
1872            || range.end > self.len()
1873        {
1874            return None;
1875        }
1876
1877        self.successor_generic_unchecked(
1878            range,
1879            symbol,
1880            BitVec::from_zeros(self.bits_per_element()),
1881            |level, symbol| symbol.get_unchecked((self.bits_per_element() - 1) - level),
1882            |bit, level, result| {
1883                result.set_unchecked((self.bits_per_element() - 1) - level, bit);
1884            },
1885            Self::partial_quantile_search_unchecked,
1886        )
1887    }
1888
1889    /// Get the successor of the given `symbol` in the given range.
1890    /// The `symbol` is a `k`-bit word encoded in a `u64` numeral,
1891    /// where k is less than or equal to 64.
1892    /// The `symbol` does not have to be in the encoded sequence.
1893    /// The successor is the smallest element in the range that is greater than or equal
1894    /// to the `symbol`.
1895    ///
1896    /// Returns `None` if the number of bits in the matrix is greater than 64,
1897    /// if the range is empty, if the wavelet matrix is empty, if the range is out of bounds,
1898    /// or if the `symbol` is greater than all elements in the range.
1899    ///
1900    /// # Example
1901    /// ```
1902    /// use vers_vecs::{BitVec, WaveletMatrix};
1903    ///
1904    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1905    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1906    ///
1907    /// assert_eq!(wavelet_matrix.successor_u64(0..3, 2), Some(4));
1908    /// assert_eq!(wavelet_matrix.successor_u64(0..3, 5), None);
1909    /// assert_eq!(wavelet_matrix.successor_u64(0..6, 2), Some(2));
1910    /// ```
1911    #[must_use]
1912    pub fn successor_u64(&self, range: Range<usize>, symbol: u64) -> Option<u64> {
1913        if self.bits_per_element() > 64
1914            || range.is_empty()
1915            || self.is_empty()
1916            || range.end > self.len()
1917        {
1918            return None;
1919        }
1920
1921        self.successor_generic_unchecked(
1922            range,
1923            &symbol,
1924            0,
1925            |level, symbol| (symbol >> ((self.bits_per_element() - 1) - level)) & 1,
1926            |bit, _level, result| {
1927                // we ignore the level here, and instead rely on the fact that the bits are set in order.
1928                // we have to do that, because the quantile_search_u64 does the same.
1929                *result <<= 1;
1930                *result |= bit;
1931            },
1932            Self::partial_quantile_search_u64_unchecked,
1933        )
1934    }
1935
1936    /// Get an iterator over the elements of the encoded sequence.
1937    /// The iterator yields `u64` elements.
1938    /// If the number of bits per element exceeds 64, `None` is returned.
1939    ///
1940    /// # Example
1941    /// ```
1942    /// use vers_vecs::{BitVec, WaveletMatrix};
1943    ///
1944    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1945    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1946    ///
1947    /// let mut iter = wavelet_matrix.iter_u64().unwrap();
1948    /// assert_eq!(iter.collect::<Vec<_>>(), vec![1, 4, 4, 1, 2, 7]);
1949    /// ```
1950    #[must_use]
1951    pub fn iter_u64(&self) -> Option<WaveletNumRefIter<'_>> {
1952        if self.bits_per_element() > 64 {
1953            None
1954        } else {
1955            Some(WaveletNumRefIter::new(self))
1956        }
1957    }
1958
1959    /// Turn the encoded sequence into an iterator.
1960    /// The iterator yields `u64` elements.
1961    /// If the number of bits per element exceeds 64, `None` is returned.
1962    #[must_use]
1963    pub fn into_iter_u64(self) -> Option<WaveletNumIter> {
1964        if self.bits_per_element() > 64 {
1965            None
1966        } else {
1967            Some(WaveletNumIter::new(self))
1968        }
1969    }
1970
1971    /// Get an iterator over the sorted elements of the encoded sequence.
1972    /// The iterator yields `BitVec` elements.
1973    ///
1974    /// See also [`iter_sorted_u64`] for an iterator that yields `u64` elements.
1975    ///
1976    /// [`iter_sorted_u64`]: Self::iter_sorted_u64
1977    #[must_use]
1978    pub fn iter_sorted(&self) -> WaveletSortedRefIter<'_> {
1979        WaveletSortedRefIter::new(self)
1980    }
1981
1982    /// Turn the encoded sequence into an iterator over the sorted sequence.
1983    /// The iterator yields `BitVec` elements.
1984    #[must_use]
1985    pub fn into_iter_sorted(self) -> WaveletSortedIter {
1986        WaveletSortedIter::new(self)
1987    }
1988
1989    /// Get an iterator over the sorted elements of the encoded sequence.
1990    /// The iterator yields `u64` elements.
1991    /// If the number of bits per element exceeds 64, `None` is returned.
1992    ///
1993    /// # Example
1994    /// ```
1995    /// use vers_vecs::{BitVec, WaveletMatrix};
1996    ///
1997    /// let bit_vec = BitVec::pack_sequence_u8(&[1, 4, 4, 1, 2, 7], 3);
1998    /// let wavelet_matrix = WaveletMatrix::from_bit_vec(&bit_vec, 3);
1999    ///
2000    /// let mut iter = wavelet_matrix.iter_sorted_u64().unwrap();
2001    /// assert_eq!(iter.collect::<Vec<_>>(), vec![1, 1, 2, 4, 4, 7]);
2002    /// ```
2003    #[must_use]
2004    pub fn iter_sorted_u64(&self) -> Option<WaveletSortedNumRefIter<'_>> {
2005        if self.bits_per_element() > 64 {
2006            None
2007        } else {
2008            Some(WaveletSortedNumRefIter::new(self))
2009        }
2010    }
2011
2012    /// Turn the encoded sequence into an iterator over the sorted sequence.
2013    /// The iterator yields `u64` elements.
2014    /// If the number of bits per element exceeds 64, `None` is returned.
2015    #[must_use]
2016    pub fn into_iter_sorted_u64(self) -> Option<WaveletSortedNumIter> {
2017        if self.bits_per_element() > 64 {
2018            None
2019        } else {
2020            Some(WaveletSortedNumIter::new(self))
2021        }
2022    }
2023
2024    /// Get the number of bits per element in the alphabet of the encoded sequence.
2025    #[must_use]
2026    #[inline(always)]
2027    pub fn bits_per_element(&self) -> usize {
2028        self.data.len()
2029    }
2030
2031    /// Get the number of bits per element in the alphabet of the encoded sequence.
2032    #[must_use]
2033    #[deprecated(since = "1.5.1", note = "please use `bits_per_element` instead")]
2034    #[allow(clippy::cast_possible_truncation)]
2035    pub fn bit_len(&self) -> u16 {
2036        self.bits_per_element() as u16
2037    }
2038
2039    /// Get the number of elements stored in the encoded sequence.
2040    #[must_use]
2041    pub fn len(&self) -> usize {
2042        if self.data.is_empty() {
2043            0
2044        } else {
2045            self.data[0].len()
2046        }
2047    }
2048
2049    /// Check if the wavelet matrix is empty.
2050    #[must_use]
2051    pub fn is_empty(&self) -> bool {
2052        self.len() == 0
2053    }
2054
2055    /// Get the number of bytes allocated on the heap for the wavelet matrix.
2056    /// This does not include memory that is allocated but unused due to allocation policies of
2057    /// internal data structures.
2058    #[must_use]
2059    pub fn heap_size(&self) -> usize {
2060        self.data.iter().map(RsVec::heap_size).sum::<usize>()
2061    }
2062}
2063
2064impl_vector_iterator!(
2065    WaveletMatrix,
2066    WaveletIter,
2067    WaveletRefIter,
2068    get_value_unchecked,
2069    get_value,
2070    BitVec
2071);
2072
2073impl_vector_iterator!(
2074    WaveletMatrix,
2075    WaveletNumIter,
2076    WaveletNumRefIter,
2077    get_u64_unchecked,
2078    get_u64,
2079    u64,
2080    special
2081);
2082
2083impl_vector_iterator!(
2084    WaveletMatrix,
2085    WaveletSortedIter,
2086    WaveletSortedRefIter,
2087    get_sorted_unchecked,
2088    get_sorted,
2089    BitVec,
2090    special
2091);
2092
2093impl_vector_iterator!(
2094    WaveletMatrix,
2095    WaveletSortedNumIter,
2096    WaveletSortedNumRefIter,
2097    get_sorted_u64_unchecked,
2098    get_sorted_u64,
2099    u64,
2100    special
2101);
2102
2103#[cfg(test)]
2104mod tests;