sux/traits/rank_sel.rs
1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8//! Basic traits for succinct operations on bit vectors, including [`Rank`] and
9//! [`Select`].
10//!
11//! All traits in this module are automatically implemented for references,
12//! mutable references, and boxes. Moreover, usually they are all forwarded to
13//! underlying implementations.
14
15use crate::ambassador_impl_AsRef;
16use crate::ambassador_impl_Index;
17use ambassador::{Delegate, delegatable_trait};
18use impl_tools::autoimpl;
19use mem_dbg::{MemDbg, MemSize};
20use std::ops::Deref;
21use std::ops::Index;
22
23/// A trait expressing a length in bits.
24///
25/// This trait is typically used in conjunction with `AsRef<[usize]>` to provide
26/// word-based access to a bit vector.
27#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
28#[delegatable_trait]
29pub trait BitLength {
30 /// Returns a length in bits.
31 fn len(&self) -> usize;
32}
33
34/// Potentially expensive bit-counting methods.
35///
36/// The methods in this trait compute the number of ones or zeros
37/// in a bit vector (possibly underlying a succinct data structure).
38/// The computation can be expensive: if you need a constant-time
39/// version, use [`NumBits`]. If you need to cache the result
40/// of these methods, you can use [`AddNumBits`].
41#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
42#[delegatable_trait]
43pub trait BitCount: BitLength {
44 /// Returns the number of ones in the underlying bit vector,
45 /// with a possibly expensive computation; see [`NumBits::num_ones`]
46 /// for constant-time version.
47 fn count_ones(&self) -> usize;
48 /// Returns the number of zeros in the underlying bit vector,
49 /// with a possibly expensive computation; see [`NumBits::num_zeros`]
50 /// for constant-time version.
51 #[inline(always)]
52 fn count_zeros(&self) -> usize {
53 self.len() - self.count_ones()
54 }
55}
56
57/// Constant-time bit-counting methods.
58///
59/// The methods in this trait compute the number of ones or zeros
60/// in a bit vector (possibly underlying a succinct data structure)
61/// in constant time. If you can be contented with a potentially
62/// expensive computation, use [`BitCount`].
63///
64/// If you need to implement this trait on a structure that already
65/// implements [`BitCount`], you can use [`AddNumBits`].
66#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
67#[delegatable_trait]
68pub trait NumBits: BitLength {
69 /// Returns the number of ones in the underlying bit vector
70 /// in constant time. If you can be contented with a potentially
71 /// expensive computation, use [`BitCount::count_ones`].
72 fn num_ones(&self) -> usize;
73 /// Returns the number of zeros in the underlying bit vector
74 /// in constant time. If you can be contented with a potentially
75 /// expensive computation, use [`BitCount::count_zeros`].
76 #[inline(always)]
77 fn num_zeros(&self) -> usize {
78 self.len() - self.num_ones()
79 }
80}
81
82/// Ranking over a bit vector.
83#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
84#[delegatable_trait]
85pub trait Rank: BitLength + NumBits + RankUnchecked {
86 /// Returns the number of ones preceding the specified position.
87 ///
88 /// The bit vector is virtually zero-extended. If `pos` is greater than or equal to the
89 /// [length of the underlying bit vector](`BitLength::len`), the number of
90 /// ones in the underlying bit vector is returned.
91 #[inline(always)]
92 fn rank(&self, pos: usize) -> usize {
93 if pos >= self.len() {
94 self.num_ones()
95 } else {
96 unsafe { self.rank_unchecked(pos) }
97 }
98 }
99}
100
101#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
102#[delegatable_trait]
103pub trait RankUnchecked {
104 /// Returns the number of ones preceding the specified position.
105 ///
106 /// # Safety
107 /// `pos` must be between 0 (included) and the [length of the underlying bit
108 /// vector](`BitLength::len`) (excluded).
109 ///
110 /// Some implementation might accept the length as a valid argument. If
111 /// you need to be sure that the length is a valid argument, just
112 /// add a padding zero bit at the end of your vector (at which
113 /// point the original length will fall within the valid range).
114 unsafe fn rank_unchecked(&self, pos: usize) -> usize;
115
116 /// Prefetches the cache lines needed to compute
117 /// [`rank_unchecked(pos)](#tymethod.rank_unchecked).
118 ///
119 /// This can speed up computing the rank of many positions in parallel.
120 ///
121 /// # Examples
122 ///
123 /// For example, take the following for loop:
124 /// ```
125 /// use sux::prelude::RankUnchecked;
126 /// fn query_all(rank: &impl RankUnchecked, positions: &[usize]) {
127 /// for i in 0..positions.len() {
128 /// let r = unsafe { rank.rank_unchecked(positions[i]) };
129 /// // ...
130 /// }
131 /// }
132 /// ```
133 /// By prefetching cache lines some iterations ahead, we can make sure that
134 /// they are already loaded in memory by the time we get to that loop iteration:
135 /// ```
136 /// use sux::prelude::RankUnchecked;
137 /// fn query_all(rank: &impl RankUnchecked, positions: &[usize]) {
138 /// for i in 0..positions.len() {
139 /// rank.prefetch(positions[(i + 32).min(positions.len() - 1)]);
140 /// let r = unsafe { rank.rank_unchecked(positions[i]) };
141 /// // ...
142 /// }
143 /// }
144 /// ```
145 ///
146 /// For [`Rank9`](crate::rank_sel::Rank9) and
147 /// [`RankSmall`](crate::rank_sel::RankSmall), this gives around 10% to 30%
148 /// speedup when there are 16 billion keys.
149 ///
150 /// Prefetching out-of-bounds is never unsafe, and neither is this method.
151 fn prefetch(&self, _pos: usize) {
152 // Default implementation is no-op
153 }
154}
155
156/// Ranking zeros over a bit vector.
157///
158/// Note that this is just an extension trait for [`Rank`].
159#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
160#[delegatable_trait]
161pub trait RankZero: Rank {
162 /// Returns the number of zeros preceding the specified position.
163 ///
164 /// The bit vector is virtually zero-extended. If `pos` is greater than or
165 /// equal to the [length of the underlying bit vector](`BitLength::len`),
166 /// the `pos` minus the number of ones in the underlying bit vector is
167 /// returned.
168 fn rank_zero(&self, pos: usize) -> usize {
169 pos - self.rank(pos)
170 }
171
172 /// Returns the number of zeros preceding the specified position.
173 ///
174 /// # Safety
175 /// `pos` must be between 0 and the [length of the underlying bit
176 /// vector](`BitLength::len`) (excluded).
177 ///
178 /// Some implementation might consider the length as a valid argument.
179 unsafe fn rank_zero_unchecked(&self, pos: usize) -> usize {
180 pos - unsafe { self.rank_unchecked(pos) }
181 }
182}
183
184/// Ranking over a bit vector, with a hint.
185///
186/// This trait is used to implement fast ranking by adding to bit vectors
187/// counters of different kind.
188#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
189#[delegatable_trait]
190pub trait RankHinted<const HINT_BIT_SIZE: usize> {
191 /// Returns the number of ones preceding the specified position,
192 /// provided a preceding position and its associated rank.
193 ///
194 /// The hinted position, `hint_pos`, is expressed as a multiple of
195 /// `HINT_BIT_SIZE`. This parameter is necessary as some rank implementation
196 /// can accept only hints at specific positions (usually, multiples of the
197 /// word size, to which `HINT_BIT_SIZE` should be set, in that case).
198 ///
199 /// # Safety
200 ///
201 /// `pos` must be between 0 (included) and
202 /// the [length of the underlying bit vector](`BitLength::len`) (excluded).
203 /// `hint_pos` * `HINT_BIT_SIZE` must be between 0 (included) and
204 /// `pos` (included).
205 /// `hint_rank` must be the number of ones in the underlying bit vector
206 /// before `hint_pos` * `HINT_BIT_SIZE`.
207 ///
208 /// Some implementation might consider the length as a valid argument.
209 unsafe fn rank_hinted(&self, pos: usize, hint_pos: usize, hint_rank: usize) -> usize;
210}
211
212/// Selection over a bit vector without bound checks.
213#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
214#[delegatable_trait]
215pub trait SelectUnchecked {
216 /// Returns the position of the one of given rank.
217 ///
218 /// # Safety
219 /// `rank` must be between zero (included) and the number of ones in the
220 /// underlying bit vector (excluded).
221 unsafe fn select_unchecked(&self, rank: usize) -> usize;
222}
223
224/// Selection over a bit vector.
225#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
226#[delegatable_trait]
227pub trait Select: SelectUnchecked + NumBits {
228 /// Returns the position of the one of given rank, or `None` if no such
229 /// bit exists.
230 fn select(&self, rank: usize) -> Option<usize> {
231 if rank >= self.num_ones() {
232 None
233 } else {
234 Some(unsafe { self.select_unchecked(rank) })
235 }
236 }
237}
238
239/// Selection of zeros over a bit vector without bound checks.
240#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
241#[delegatable_trait]
242pub trait SelectZeroUnchecked {
243 /// Returns the position of the zero of given rank.
244 ///
245 /// # Safety
246 /// `rank` must be between zero (included) and the number of zeros in the
247 /// underlying bit vector (excluded).
248 unsafe fn select_zero_unchecked(&self, rank: usize) -> usize;
249}
250
251/// Selection of zeros over a bit vector.
252#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
253#[delegatable_trait]
254pub trait SelectZero: SelectZeroUnchecked + NumBits {
255 /// Returns the position of the zero of given rank, or `None` if no such
256 /// bit exists.
257 fn select_zero(&self, rank: usize) -> Option<usize> {
258 if rank >= self.num_zeros() {
259 None
260 } else {
261 Some(unsafe { self.select_zero_unchecked(rank) })
262 }
263 }
264}
265
266/// Selection over a bit vector, with a hint.
267///
268/// This trait is used to implement fast selection by adding to bit vectors
269/// indices of different kind. See, for example,
270/// [`SelectAdapt`](crate::rank_sel::SelectAdapt).
271#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
272#[delegatable_trait]
273pub trait SelectHinted {
274 /// Selects the one of given rank, provided the position of a preceding one
275 /// and its rank.
276 ///
277 /// # Safety
278 ///
279 /// `rank` must be between zero (included) and the number of ones
280 /// in the underlying bit vector (excluded). `hint_pos` must be between 0
281 /// (included) and the [length of the underlying bit
282 /// vector](`BitLength::len`) (included), and must be the position of a one
283 /// in the underlying bit vector. `hint_rank` must be the number of ones in
284 /// the underlying bit vector before `hint_pos`, and must be less than or
285 /// equal to `rank`.
286 unsafe fn select_hinted(&self, rank: usize, hint_pos: usize, hint_rank: usize) -> usize;
287}
288
289/// Selection of zeros over a bit vector, with a hint.
290///
291/// This trait is used to implement fast selection over zeros by adding to bit
292/// vectors indices of different kind.
293#[autoimpl(for<T: trait + ?Sized> &T, &mut T, Box<T>)]
294#[delegatable_trait]
295pub trait SelectZeroHinted {
296 /// Selects the zero of given rank, provided the position of a preceding zero
297 /// and its rank.
298 ///
299 /// # Safety
300 /// `rank` must be between zero (included) and the number of zeros in the
301 /// underlying bit vector (excluded). `hint_pos` must be between 0 (included) and
302 /// the [length of the underlying bit vector](`BitLength::len`) (included),
303 /// and must be the position of a zero in the underlying bit vector.
304 /// `hint_rank` must be the number of zeros in the underlying bit vector
305 /// before `hint_pos`, and must be less than or equal to `rank`.
306 unsafe fn select_zero_hinted(&self, rank: usize, hint_pos: usize, hint_rank: usize) -> usize;
307}
308
309/// A thin wrapper implementing [`NumBits`] by caching the result of
310/// [`BitCount::count_ones`].
311///
312/// This structure forwards to the wrapped structure all traits defined in [this
313/// module](crate::traits::rank_sel) except for [`NumBits`] and [`BitCount`]. It
314/// is typically used to provide [`NumBits`] to [`Select`]/[`SelectZero`]
315/// implementations; see,
316/// for example, [`SelectAdapt`](crate::rank_sel::SelectAdapt).
317#[derive(Debug, Clone, MemDbg, MemSize, Delegate)]
318#[cfg_attr(feature = "epserde", derive(epserde::Epserde))]
319#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
320#[delegate(AsRef<[usize]>, target = "bits")]
321#[delegate(Index<usize>, target = "bits")]
322#[delegate(crate::traits::rank_sel::BitLength, target = "bits")]
323#[delegate(crate::traits::rank_sel::Rank, target = "bits")]
324#[delegate(crate::traits::rank_sel::RankHinted<64>, target = "bits")]
325#[delegate(crate::traits::rank_sel::RankUnchecked, target = "bits")]
326#[delegate(crate::traits::rank_sel::RankZero, target = "bits")]
327#[delegate(crate::traits::rank_sel::Select, target = "bits")]
328#[delegate(crate::traits::rank_sel::SelectHinted, target = "bits")]
329#[delegate(crate::traits::rank_sel::SelectUnchecked, target = "bits")]
330#[delegate(crate::traits::rank_sel::SelectZero, target = "bits")]
331#[delegate(crate::traits::rank_sel::SelectZeroHinted, target = "bits")]
332#[delegate(crate::traits::rank_sel::SelectZeroUnchecked, target = "bits")]
333pub struct AddNumBits<B> {
334 bits: B,
335 number_of_ones: usize,
336}
337
338impl<B> AddNumBits<B> {
339 /// Returns the underlying bit structure.
340 pub fn into_inner(self) -> B {
341 self.bits
342 }
343
344 /// Creates a new `AddNumBits` from raw parts.
345 ///
346 /// # Safety
347 ///
348 /// `number_of_ones` must be the actual number of ones in `bits`. No
349 /// validation is performed to verify this invariant.
350 #[inline(always)]
351 pub unsafe fn from_raw_parts(bits: B, number_of_ones: usize) -> Self {
352 Self {
353 bits,
354 number_of_ones,
355 }
356 }
357
358 /// Decomposes this `AddNumBits` into its raw parts.
359 ///
360 /// Returns a tuple containing the underlying bit structure and the cached
361 /// number of ones.
362 #[inline(always)]
363 pub fn into_raw_parts(self) -> (B, usize) {
364 (self.bits, self.number_of_ones)
365 }
366}
367
368impl<B: BitLength> AddNumBits<B> {
369 /// Returns the number of bits in the underlying bit vector.
370 ///
371 /// This method is equivalent to [`BitLength::len`], but it is provided to
372 /// reduce ambiguity in method resolution.
373 #[inline(always)]
374 pub fn len(&self) -> usize {
375 BitLength::len(self)
376 }
377}
378
379impl<B: BitLength> NumBits for AddNumBits<B> {
380 #[inline(always)]
381 fn num_ones(&self) -> usize {
382 self.number_of_ones
383 }
384}
385
386impl<B: BitLength> BitCount for AddNumBits<B> {
387 #[inline(always)]
388 fn count_ones(&self) -> usize {
389 self.number_of_ones
390 }
391}
392
393impl<B> Deref for AddNumBits<B> {
394 type Target = B;
395 fn deref(&self) -> &Self::Target {
396 &self.bits
397 }
398}
399
400impl<B: BitCount> From<B> for AddNumBits<B> {
401 fn from(bits: B) -> Self {
402 let number_of_ones = bits.count_ones();
403 AddNumBits {
404 bits,
405 number_of_ones,
406 }
407 }
408}