Skip to main content

vortex_buffer/bit/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Packed bitmaps that can be used to store boolean values.
5//!
6//! This module provides a wrapper on top of the `Buffer` type to store mutable and immutable
7//! bitsets. The bitsets are stored in little-endian order, meaning that the least significant bit
8//! of the first byte is the first bit in the bitset.
9#[cfg(feature = "arrow")]
10mod arrow;
11mod buf;
12mod buf_mut;
13mod count_ones;
14mod macros;
15mod ops;
16mod select;
17
18pub use arrow_buffer::bit_chunk_iterator::BitChunkIterator;
19pub use arrow_buffer::bit_chunk_iterator::BitChunks;
20pub use arrow_buffer::bit_chunk_iterator::UnalignedBitChunk;
21pub use arrow_buffer::bit_chunk_iterator::UnalignedBitChunkIterator;
22pub use arrow_buffer::bit_iterator::BitIndexIterator;
23pub use arrow_buffer::bit_iterator::BitIterator;
24pub use arrow_buffer::bit_iterator::BitSliceIterator;
25pub use buf::*;
26pub use buf_mut::*;
27
28/// Packs up to 64 boolean values into a little-endian `u64` word.
29#[inline]
30pub fn collect_bool_word<F>(len: usize, mut f: F) -> u64
31where
32    F: FnMut(usize) -> bool,
33{
34    assert!(len <= 64, "cannot pack {len} bits into a u64 word");
35
36    let mut packed = 0;
37    for bit_idx in 0..len {
38        packed |= (f(bit_idx) as u64) << bit_idx;
39    }
40    packed
41}
42
43/// Pack `len` boolean values returned by `f` into the prefix of `words`, LSB-first,
44/// 64 bits per `u64`. `words` must have capacity for at least `len.div_ceil(64)` entries.
45///
46/// Writes via `=` (not `|=`), so the destination need not be zero-initialised.
47#[inline]
48pub fn collect_bool_words<F>(words: &mut [u64], len: usize, mut f: F)
49where
50    F: FnMut(usize) -> bool,
51{
52    let num_words = len.div_ceil(64);
53    assert!(
54        words.len() >= num_words,
55        "words slice has {} entries, need at least {num_words}",
56        words.len(),
57    );
58
59    let full = len / 64;
60    let remainder = len % 64;
61
62    for word_idx in 0..full {
63        let offset = word_idx * 64;
64        words[word_idx] = collect_bool_word(64, |bit_idx| f(offset + bit_idx));
65    }
66
67    if remainder != 0 {
68        let offset = full * 64;
69        words[full] = collect_bool_word(remainder, |bit_idx| f(offset + bit_idx));
70    }
71}
72
73/// Splice a packed word `w` (whose bits above the highest valid bit are zero) into
74/// `words` at the given bit position.
75///
76/// The destination word at `bit_offset / 64` is OR'd, preserving any bits below
77/// `bit_offset % 64`. When `w` has high bits that spill into the next word, those
78/// bits are *assigned* (not OR'd) — so callers must ensure that next slot is zero
79/// (e.g. via `BufferMut::zeroed`).
80///
81/// `words.len()` need only cover the slots `w` actually writes to: skipping the
82/// spillover when its bits are all zero means a tail that fits entirely in the
83/// leading word never touches `words[dest_word + 1]`.
84#[inline]
85pub fn splice_word_at_bit(words: &mut [u64], bit_offset: usize, word: u64) {
86    let dest_word = bit_offset / 64;
87    let bit_in_word = bit_offset % 64;
88    words[dest_word] |= word << bit_in_word;
89    if bit_in_word != 0 {
90        let high = word >> (64 - bit_in_word);
91        if high != 0 {
92            words[dest_word + 1] = high;
93        }
94    }
95}
96
97/// Pack `len` boolean values returned by `f` into `words` starting at bit position
98/// `bit_offset`, LSB-first.
99///
100/// Composes [`collect_bool_word`] (pack up to 64 bools into a u64) with
101/// [`splice_word_at_bit`] (merge the packed word into the destination via shift-OR).
102///
103/// `words` must have at least `(bit_offset + len).div_ceil(64)` entries; see
104/// [`splice_word_at_bit`] for zero-init requirements on words above the cursor.
105#[inline]
106pub fn pack_bools_into_words<F>(words: &mut [u64], bit_offset: usize, len: usize, mut f: F)
107where
108    F: FnMut(usize) -> bool,
109{
110    if len == 0 {
111        return;
112    }
113    let num_words = (bit_offset + len).div_ceil(64);
114    assert!(
115        words.len() >= num_words,
116        "words slice has {} entries, need at least {num_words}",
117        words.len(),
118    );
119
120    let mut done = 0;
121    while len - done >= 64 {
122        let word = collect_bool_word(64, |bit| f(done + bit));
123        splice_word_at_bit(words, bit_offset + done, word);
124        done += 64;
125    }
126    let tail = len - done;
127    if tail > 0 {
128        let word = collect_bool_word(tail, |bit| f(done + bit));
129        splice_word_at_bit(words, bit_offset + done, word);
130    }
131}
132
133/// Get the bit value at `index` out of `buf`.
134///
135/// # Panics
136///
137/// Panics if `index` is not between 0 and length of `buf * 8`.
138#[inline(always)]
139pub fn get_bit(buf: &[u8], index: usize) -> bool {
140    buf[index / 8] & (1 << (index % 8)) != 0
141}
142
143/// Get the bit value at `index` out of `buf` without bounds checking.
144///
145/// # Safety
146///
147/// `index` must be between 0 and length of `buf * 8`.
148#[inline(always)]
149pub unsafe fn get_bit_unchecked(buf: *const u8, index: usize) -> bool {
150    (unsafe { *buf.add(index / 8) } & (1 << (index % 8))) != 0
151}
152
153/// Set the bit value at `index` in `buf` without bounds checking.
154///
155/// # Safety
156///
157/// `index` must be between 0 and length of `buf * 8`.
158#[inline(always)]
159pub unsafe fn set_bit_unchecked(buf: *mut u8, index: usize) {
160    unsafe { *buf.add(index / 8) |= 1 << (index % 8) };
161}
162
163/// Unset the bit value at `index` in `buf` without bounds checking.
164///
165/// # Safety
166///
167/// `index` must be between 0 and length of `buf * 8`.
168#[inline(always)]
169pub unsafe fn unset_bit_unchecked(buf: *mut u8, index: usize) {
170    unsafe { *buf.add(index / 8) &= !(1 << (index % 8)) };
171}
172
173#[cfg(test)]
174mod tests {
175    use super::collect_bool_word;
176    use super::pack_bools_into_words;
177
178    #[test]
179    fn collect_bool_word_packs_lsb_first() {
180        let word = collect_bool_word(5, |idx| idx.is_multiple_of(2));
181        assert_eq!(word, 0b10101);
182    }
183
184    #[test]
185    fn collect_bool_word_empty() {
186        assert_eq!(collect_bool_word(0, |_| true), 0);
187    }
188
189    #[test]
190    #[should_panic(expected = "cannot pack 65 bits into a u64 word")]
191    fn collect_bool_word_rejects_too_many_bits() {
192        let _ = collect_bool_word(65, |_| true);
193    }
194
195    fn pack(bit_offset: usize, len: usize, f: impl Fn(usize) -> bool) -> Vec<bool> {
196        let num_words = (bit_offset + len).div_ceil(64);
197        let mut words = vec![0u64; num_words];
198        pack_bools_into_words(&mut words, bit_offset, len, &f);
199        (0..bit_offset + len)
200            .map(|i| (words[i / 64] >> (i % 64)) & 1 == 1)
201            .collect()
202    }
203
204    #[test]
205    fn pack_bools_aligned_multi_word_with_tail() {
206        let bits = pack(0, 130, |i| i.is_multiple_of(3));
207        for i in 0..130 {
208            assert_eq!(bits[i], i.is_multiple_of(3), "bit {i}");
209        }
210    }
211
212    #[test]
213    fn pack_bools_unaligned_crossing_words() {
214        let bits = pack(40, 200, |i| i.is_multiple_of(7));
215        assert!(bits[..40].iter().all(|&b| !b));
216        for i in 0..200 {
217            assert_eq!(bits[40 + i], i.is_multiple_of(7), "bit {}", 40 + i);
218        }
219    }
220
221    #[test]
222    fn pack_bools_preserves_low_bits_of_leading_word() {
223        let mut words = vec![0u64; 2];
224        words[0] = 0b11111;
225        pack_bools_into_words(&mut words, 5, 70, |_| true);
226        for i in 0..5 {
227            assert_eq!((words[0] >> i) & 1, 1, "preserved bit {i}");
228        }
229        for i in 5..75 {
230            assert_eq!((words[i / 64] >> (i % 64)) & 1, 1, "extended bit {i}");
231        }
232    }
233}