Skip to main content

sux/traits/
rank_sel.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8//! Basic traits for succinct operations on bit vectors, including [`Rank`] and
9//! [`Select`].
10//!
11//! All traits in this module are automatically implemented for references,
12//! mutable references, and boxes. Moreover, usually they are all forwarded to
13//! underlying implementations.
14
15use crate::ambassador_impl_AsRef;
16use crate::ambassador_impl_Index;
17use ambassador::{Delegate, delegatable_trait};
18use impl_tools::autoimpl;
19use mem_dbg::{MemDbg, MemSize};
20use std::ops::Deref;
21use std::ops::Index;
22
23/// A trait expressing a length in bits.
24///
25/// This trait is typically used in conjunction with `AsRef<[usize]>` to provide
26/// word-based access to a bit vector.
27#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
28#[delegatable_trait]
29pub trait BitLength {
30    /// Returns a length in bits.
31    fn len(&self) -> usize;
32}
33
34/// Potentially expensive bit-counting methods.
35///
36/// The methods in this trait compute the number of ones or zeros
37/// in a bit vector (possibly underlying a succinct data structure).
38/// The computation can be expensive: if you need a constant-time
39/// version, use [`NumBits`]. If you need to cache the result
40/// of these methods, you can use [`AddNumBits`].
41#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
42#[delegatable_trait]
43pub trait BitCount: BitLength {
44    /// Returns the number of ones in the underlying bit vector,
45    /// with a possibly expensive computation; see [`NumBits::num_ones`]
46    /// for constant-time version.
47    fn count_ones(&self) -> usize;
48    /// Returns the number of zeros in the underlying bit vector,
49    /// with a possibly expensive computation; see [`NumBits::num_zeros`]
50    /// for constant-time version.
51    #[inline(always)]
52    fn count_zeros(&self) -> usize {
53        self.len() - self.count_ones()
54    }
55}
56
57/// Constant-time bit-counting methods.
58///
59/// The methods in this trait compute the number of ones or zeros
60/// in a bit vector (possibly underlying a succinct data structure)
61/// in constant time. If you can be contented with a potentially
62/// expensive computation, use [`BitCount`].
63///
64/// If you need to implement this trait on a structure that already
65/// implements [`BitCount`], you can use [`AddNumBits`].
66#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
67#[delegatable_trait]
68pub trait NumBits: BitLength {
69    /// Returns the number of ones in the underlying bit vector
70    /// in constant time. If you can be contented with a potentially
71    /// expensive computation, use [`BitCount::count_ones`].
72    fn num_ones(&self) -> usize;
73    /// Returns the number of zeros in the underlying bit vector
74    /// in constant time. If you can be contented with a potentially
75    /// expensive computation, use [`BitCount::count_zeros`].
76    #[inline(always)]
77    fn num_zeros(&self) -> usize {
78        self.len() - self.num_ones()
79    }
80}
81
82/// Ranking over a bit vector.
83#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
84#[delegatable_trait]
85pub trait Rank: BitLength + NumBits + RankUnchecked {
86    /// Returns the number of ones preceding the specified position.
87    ///
88    /// The bit vector is virtually zero-extended. If `pos` is greater than or equal to the
89    /// [length of the underlying bit vector](`BitLength::len`), the number of
90    /// ones in the underlying bit vector is returned.
91    #[inline(always)]
92    fn rank(&self, pos: usize) -> usize {
93        if pos >= self.len() {
94            self.num_ones()
95        } else {
96            unsafe { self.rank_unchecked(pos) }
97        }
98    }
99}
100
101#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
102#[delegatable_trait]
103pub trait RankUnchecked {
104    /// Returns the number of ones preceding the specified position.
105    ///
106    /// # Safety
107    /// `pos` must be between 0 (included) and the [length of the underlying bit
108    /// vector](`BitLength::len`) (excluded).
109    ///
110    /// Some implementation might accept the length as a valid argument. If
111    /// you need to be sure that the length is a valid argument, just
112    /// add a padding zero bit at the end of your vector (at which
113    /// point the original length will fall within the valid range).
114    unsafe fn rank_unchecked(&self, pos: usize) -> usize;
115
116    /// Prefetches the cache lines needed to compute
117    /// [`rank_unchecked(pos)](#tymethod.rank_unchecked).
118    ///
119    /// This can speed up computing the rank of many positions in parallel.
120    ///
121    /// # Examples
122    ///
123    /// For example, take the following for loop:
124    /// ```
125    /// use sux::prelude::RankUnchecked;
126    /// fn query_all(rank: &impl RankUnchecked, positions: &[usize]) {
127    ///    for i in 0..positions.len() {
128    ///        let r = unsafe { rank.rank_unchecked(positions[i]) };
129    ///        // ...
130    ///    }
131    /// }
132    /// ```
133    /// By prefetching cache lines some iterations ahead, we can make sure that
134    /// they are already loaded in memory by the time we get to that loop iteration:
135    /// ```
136    /// use sux::prelude::RankUnchecked;
137    /// fn query_all(rank: &impl RankUnchecked, positions: &[usize]) {
138    ///    for i in 0..positions.len() {
139    ///        rank.prefetch(positions[(i + 32).min(positions.len() - 1)]);
140    ///        let r = unsafe { rank.rank_unchecked(positions[i]) };
141    ///        // ...
142    ///    }
143    /// }
144    /// ```
145    ///
146    /// For [`Rank9`](crate::rank_sel::Rank9) and
147    /// [`RankSmall`](crate::rank_sel::RankSmall), this gives around 10% to 30%
148    /// speedup when there are 16 billion keys.
149    ///
150    /// Prefetching out-of-bounds is never unsafe, and neither is this method.
151    fn prefetch(&self, _pos: usize) {
152        // Default implementation is no-op
153    }
154}
155
156/// Ranking zeros over a bit vector.
157///
158/// Note that this is just an extension trait for [`Rank`].
159#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
160#[delegatable_trait]
161pub trait RankZero: Rank {
162    /// Returns the number of zeros preceding the specified position.
163    ///
164    /// The bit vector is virtually zero-extended. If `pos` is greater than or
165    /// equal to the [length of the underlying bit vector](`BitLength::len`),
166    /// the `pos` minus the number of ones in the underlying bit vector is
167    /// returned.
168    fn rank_zero(&self, pos: usize) -> usize {
169        pos - self.rank(pos)
170    }
171
172    /// Returns the number of zeros preceding the specified position.
173    ///
174    /// # Safety
175    /// `pos` must be between 0 and the [length of the underlying bit
176    /// vector](`BitLength::len`) (excluded).
177    ///
178    /// Some implementation might consider the length as a valid argument.
179    unsafe fn rank_zero_unchecked(&self, pos: usize) -> usize {
180        pos - unsafe { self.rank_unchecked(pos) }
181    }
182}
183
184/// Ranking over a bit vector, with a hint.
185///
186/// This trait is used to implement fast ranking by adding to bit vectors
187/// counters of different kind.
188#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
189#[delegatable_trait]
190pub trait RankHinted<const HINT_BIT_SIZE: usize> {
191    /// Returns the number of ones preceding the specified position,
192    /// provided a preceding position and its associated rank.
193    ///
194    /// The hinted position, `hint_pos`, is expressed as a multiple of
195    /// `HINT_BIT_SIZE`. This parameter is necessary as some rank implementation
196    /// can accept only hints at specific positions (usually, multiples of the
197    /// word size, to which `HINT_BIT_SIZE` should be set, in that case).
198    ///
199    /// # Safety
200    ///
201    /// `pos` must be between 0 (included) and
202    /// the [length of the underlying bit vector](`BitLength::len`) (excluded).
203    /// `hint_pos` * `HINT_BIT_SIZE` must be between 0 (included) and
204    /// `pos` (included).
205    /// `hint_rank` must be the number of ones in the underlying bit vector
206    /// before `hint_pos` * `HINT_BIT_SIZE`.
207    ///
208    /// Some implementation might consider the length as a valid argument.
209    unsafe fn rank_hinted(&self, pos: usize, hint_pos: usize, hint_rank: usize) -> usize;
210}
211
212/// Selection over a bit vector without bound checks.
213#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
214#[delegatable_trait]
215pub trait SelectUnchecked {
216    /// Returns the position of the one of given rank.
217    ///
218    /// # Safety
219    /// `rank` must be between zero (included) and the number of ones in the
220    /// underlying bit vector (excluded).
221    unsafe fn select_unchecked(&self, rank: usize) -> usize;
222}
223
224/// Selection over a bit vector.
225#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
226#[delegatable_trait]
227pub trait Select: SelectUnchecked + NumBits {
228    /// Returns the position of the one of given rank, or `None` if no such
229    /// bit exists.
230    fn select(&self, rank: usize) -> Option<usize> {
231        if rank >= self.num_ones() {
232            None
233        } else {
234            Some(unsafe { self.select_unchecked(rank) })
235        }
236    }
237}
238
239/// Selection of zeros over a bit vector without bound checks.
240#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
241#[delegatable_trait]
242pub trait SelectZeroUnchecked {
243    /// Returns the position of the zero of given rank.
244    ///
245    /// # Safety
246    /// `rank` must be between zero (included) and the number of zeros in the
247    /// underlying bit vector (excluded).
248    unsafe fn select_zero_unchecked(&self, rank: usize) -> usize;
249}
250
251/// Selection of zeros over a bit vector.
252#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
253#[delegatable_trait]
254pub trait SelectZero: SelectZeroUnchecked + NumBits {
255    /// Returns the position of the zero of given rank, or `None` if no such
256    /// bit exists.
257    fn select_zero(&self, rank: usize) -> Option<usize> {
258        if rank >= self.num_zeros() {
259            None
260        } else {
261            Some(unsafe { self.select_zero_unchecked(rank) })
262        }
263    }
264}
265
266/// Selection over a bit vector, with a hint.
267///
268/// This trait is used to implement fast selection by adding to bit vectors
269/// indices of different kind. See, for example,
270/// [`SelectAdapt`](crate::rank_sel::SelectAdapt).
271#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
272#[delegatable_trait]
273pub trait SelectHinted {
274    /// Selects the one of given rank, provided the position of a preceding one
275    /// and its rank.
276    ///
277    /// # Safety
278    ///
279    /// `rank` must be between zero (included) and the number of ones
280    /// in the underlying bit vector (excluded). `hint_pos` must be between 0
281    /// (included) and the [length of the underlying bit
282    /// vector](`BitLength::len`) (included), and must be the position of a one
283    /// in the underlying bit vector. `hint_rank` must be the number of ones in
284    /// the underlying bit vector before `hint_pos`, and must be less than or
285    /// equal to `rank`.
286    unsafe fn select_hinted(&self, rank: usize, hint_pos: usize, hint_rank: usize) -> usize;
287}
288
289/// Selection of zeros over a bit vector, with a hint.
290///
291/// This trait is used to implement fast selection over zeros by adding to bit
292/// vectors indices of different kind.
293#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
294#[delegatable_trait]
295pub trait SelectZeroHinted {
296    /// Selects the zero of given rank, provided the position of a preceding zero
297    /// and its rank.
298    ///
299    /// # Safety
300    /// `rank` must be between zero (included) and the number of zeros in the
301    /// underlying bit vector (excluded). `hint_pos` must be between 0 (included) and
302    /// the [length of the underlying bit vector](`BitLength::len`) (included),
303    /// and must be the position of a zero in the underlying bit vector.
304    /// `hint_rank` must be the number of zeros in the underlying bit vector
305    /// before `hint_pos`, and must be less than or equal to `rank`.
306    unsafe fn select_zero_hinted(&self, rank: usize, hint_pos: usize, hint_rank: usize) -> usize;
307}
308
309/// A thin wrapper implementing [`NumBits`] by caching the result of
310/// [`BitCount::count_ones`].
311///
312/// This structure forwards to the wrapped structure all traits defined in [this
313/// module](crate::traits::rank_sel) except for [`NumBits`] and [`BitCount`]. It
314/// is typically used to provide [`NumBits`] to [`Select`]/[`SelectZero`]
315/// implementations; see,
316/// for example, [`SelectAdapt`](crate::rank_sel::SelectAdapt).
317#[derive(Debug, Clone, MemDbg, MemSize, Delegate)]
318#[cfg_attr(feature = "epserde", derive(epserde::Epserde))]
319#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
320#[delegate(AsRef<[usize]>, target = "bits")]
321#[delegate(Index<usize>, target = "bits")]
322#[delegate(crate::traits::rank_sel::BitLength, target = "bits")]
323#[delegate(crate::traits::rank_sel::Rank, target = "bits")]
324#[delegate(crate::traits::rank_sel::RankHinted<64>, target = "bits")]
325#[delegate(crate::traits::rank_sel::RankUnchecked, target = "bits")]
326#[delegate(crate::traits::rank_sel::RankZero, target = "bits")]
327#[delegate(crate::traits::rank_sel::Select, target = "bits")]
328#[delegate(crate::traits::rank_sel::SelectHinted, target = "bits")]
329#[delegate(crate::traits::rank_sel::SelectUnchecked, target = "bits")]
330#[delegate(crate::traits::rank_sel::SelectZero, target = "bits")]
331#[delegate(crate::traits::rank_sel::SelectZeroHinted, target = "bits")]
332#[delegate(crate::traits::rank_sel::SelectZeroUnchecked, target = "bits")]
333pub struct AddNumBits<B> {
334    bits: B,
335    number_of_ones: usize,
336}
337
338impl<B> AddNumBits<B> {
339    /// Returns the underlying bit structure.
340    pub fn into_inner(self) -> B {
341        self.bits
342    }
343
344    /// Creates a new `AddNumBits` from raw parts.
345    ///
346    /// # Safety
347    ///
348    /// `number_of_ones` must be the actual number of ones in `bits`. No
349    /// validation is performed to verify this invariant.
350    #[inline(always)]
351    pub unsafe fn from_raw_parts(bits: B, number_of_ones: usize) -> Self {
352        Self {
353            bits,
354            number_of_ones,
355        }
356    }
357
358    /// Decomposes this `AddNumBits` into its raw parts.
359    ///
360    /// Returns a tuple containing the underlying bit structure and the cached
361    /// number of ones.
362    #[inline(always)]
363    pub fn into_raw_parts(self) -> (B, usize) {
364        (self.bits, self.number_of_ones)
365    }
366}
367
368impl<B: BitLength> AddNumBits<B> {
369    /// Returns the number of bits in the underlying bit vector.
370    ///
371    /// This method is equivalent to [`BitLength::len`], but it is provided to
372    /// reduce ambiguity in method resolution.
373    #[inline(always)]
374    pub fn len(&self) -> usize {
375        BitLength::len(self)
376    }
377}
378
379impl<B: BitLength> NumBits for AddNumBits<B> {
380    #[inline(always)]
381    fn num_ones(&self) -> usize {
382        self.number_of_ones
383    }
384}
385
386impl<B: BitLength> BitCount for AddNumBits<B> {
387    #[inline(always)]
388    fn count_ones(&self) -> usize {
389        self.number_of_ones
390    }
391}
392
393impl<B> Deref for AddNumBits<B> {
394    type Target = B;
395    fn deref(&self) -> &Self::Target {
396        &self.bits
397    }
398}
399
400impl<B: BitCount> From<B> for AddNumBits<B> {
401    fn from(bits: B) -> Self {
402        let number_of_ones = bits.count_ones();
403        AddNumBits {
404            bits,
405            number_of_ones,
406        }
407    }
408}