Skip to main content

vortex_buffer/bit/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Packed bitmaps that can be used to store boolean values.
5//!
6//! This module provides a wrapper on top of the `Buffer` type to store mutable and immutable
7//! bitsets. The bitsets are stored in little-endian order, meaning that the least significant bit
8//! of the first byte is the first bit in the bitset.
9#[cfg(feature = "arrow")]
10mod arrow;
11mod buf;
12mod buf_mut;
13mod count_ones;
14mod macros;
15mod meta;
16mod ops;
17mod select;
18mod view;
19
20pub use arrow_buffer::bit_chunk_iterator::BitChunkIterator;
21pub use arrow_buffer::bit_chunk_iterator::BitChunks;
22pub use arrow_buffer::bit_chunk_iterator::UnalignedBitChunk;
23pub use arrow_buffer::bit_chunk_iterator::UnalignedBitChunkIterator;
24pub use arrow_buffer::bit_iterator::BitIndexIterator;
25pub use arrow_buffer::bit_iterator::BitIterator;
26pub use arrow_buffer::bit_iterator::BitSliceIterator;
27pub use buf::*;
28pub use buf_mut::*;
29pub use meta::*;
30pub use view::*;
31
32/// Packs up to 64 boolean values into a little-endian `u64` word.
33#[inline]
34pub fn collect_bool_word<F>(len: usize, mut f: F) -> u64
35where
36    F: FnMut(usize) -> bool,
37{
38    assert!(len <= 64, "cannot pack {len} bits into a u64 word");
39
40    let mut packed = 0;
41    for bit_idx in 0..len {
42        packed |= (f(bit_idx) as u64) << bit_idx;
43    }
44    packed
45}
46
47/// Pack `len` boolean values returned by `f` into the prefix of `words`, LSB-first,
48/// 64 bits per `u64`. `words` must have capacity for at least `len.div_ceil(64)` entries.
49///
50/// Writes via `=` (not `|=`), so the destination need not be zero-initialised.
51#[inline]
52pub fn collect_bool_words<F>(words: &mut [u64], len: usize, mut f: F)
53where
54    F: FnMut(usize) -> bool,
55{
56    let num_words = len.div_ceil(64);
57    assert!(
58        words.len() >= num_words,
59        "words slice has {} entries, need at least {num_words}",
60        words.len(),
61    );
62
63    let full = len / 64;
64    let remainder = len % 64;
65
66    for word_idx in 0..full {
67        let offset = word_idx * 64;
68        words[word_idx] = collect_bool_word(64, |bit_idx| f(offset + bit_idx));
69    }
70
71    if remainder != 0 {
72        let offset = full * 64;
73        words[full] = collect_bool_word(remainder, |bit_idx| f(offset + bit_idx));
74    }
75}
76
77/// Splice a packed word `w` (whose bits above the highest valid bit are zero) into
78/// `words` at the given bit position.
79///
80/// The destination word at `bit_offset / 64` is OR'd, preserving any bits below
81/// `bit_offset % 64`. When `w` has high bits that spill into the next word, those
82/// bits are *assigned* (not OR'd) — so callers must ensure that next slot is zero
83/// (e.g. via `BufferMut::zeroed`).
84///
85/// `words.len()` need only cover the slots `w` actually writes to: skipping the
86/// spillover when its bits are all zero means a tail that fits entirely in the
87/// leading word never touches `words[dest_word + 1]`.
88#[inline]
89pub fn splice_word_at_bit(words: &mut [u64], bit_offset: usize, word: u64) {
90    let dest_word = bit_offset / 64;
91    let bit_in_word = bit_offset % 64;
92    words[dest_word] |= word << bit_in_word;
93    if bit_in_word != 0 {
94        let high = word >> (64 - bit_in_word);
95        if high != 0 {
96            words[dest_word + 1] = high;
97        }
98    }
99}
100
101/// Pack `len` boolean values returned by `f` into `words` starting at bit position
102/// `bit_offset`, LSB-first.
103///
104/// Composes [`collect_bool_word`] (pack up to 64 bools into a u64) with
105/// [`splice_word_at_bit`] (merge the packed word into the destination via shift-OR).
106///
107/// `words` must have at least `(bit_offset + len).div_ceil(64)` entries; see
108/// [`splice_word_at_bit`] for zero-init requirements on words above the cursor.
109#[inline]
110pub fn pack_bools_into_words<F>(words: &mut [u64], bit_offset: usize, len: usize, mut f: F)
111where
112    F: FnMut(usize) -> bool,
113{
114    if len == 0 {
115        return;
116    }
117    let num_words = (bit_offset + len).div_ceil(64);
118    assert!(
119        words.len() >= num_words,
120        "words slice has {} entries, need at least {num_words}",
121        words.len(),
122    );
123
124    let mut done = 0;
125    while len - done >= 64 {
126        let word = collect_bool_word(64, |bit| f(done + bit));
127        splice_word_at_bit(words, bit_offset + done, word);
128        done += 64;
129    }
130    let tail = len - done;
131    if tail > 0 {
132        let word = collect_bool_word(tail, |bit| f(done + bit));
133        splice_word_at_bit(words, bit_offset + done, word);
134    }
135}
136
137/// Get the bit value at `index` out of `buf`.
138///
139/// # Panics
140///
141/// Panics if `index` is not between 0 and length of `buf * 8`.
142#[inline(always)]
143pub fn get_bit(buf: &[u8], index: usize) -> bool {
144    buf[index / 8] & (1 << (index % 8)) != 0
145}
146
147/// Get the bit value at `index` out of `buf` without bounds checking.
148///
149/// # Safety
150///
151/// `index` must be between 0 and length of `buf * 8`.
152#[inline(always)]
153pub unsafe fn get_bit_unchecked(buf: *const u8, index: usize) -> bool {
154    (unsafe { *buf.add(index / 8) } & (1 << (index % 8))) != 0
155}
156
157/// Set the bit value at `index` in `buf` without bounds checking.
158///
159/// # Safety
160///
161/// `index` must be between 0 and length of `buf * 8`.
162#[inline(always)]
163pub unsafe fn set_bit_unchecked(buf: *mut u8, index: usize) {
164    unsafe { *buf.add(index / 8) |= 1 << (index % 8) };
165}
166
167/// Unset the bit value at `index` in `buf` without bounds checking.
168///
169/// # Safety
170///
171/// `index` must be between 0 and length of `buf * 8`.
172#[inline(always)]
173pub unsafe fn unset_bit_unchecked(buf: *mut u8, index: usize) {
174    unsafe { *buf.add(index / 8) &= !(1 << (index % 8)) };
175}
176
177#[cfg(test)]
178mod tests {
179    use super::collect_bool_word;
180    use super::pack_bools_into_words;
181
182    #[test]
183    fn collect_bool_word_packs_lsb_first() {
184        let word = collect_bool_word(5, |idx| idx.is_multiple_of(2));
185        assert_eq!(word, 0b10101);
186    }
187
188    #[test]
189    fn collect_bool_word_empty() {
190        assert_eq!(collect_bool_word(0, |_| true), 0);
191    }
192
193    #[test]
194    #[should_panic(expected = "cannot pack 65 bits into a u64 word")]
195    fn collect_bool_word_rejects_too_many_bits() {
196        let _ = collect_bool_word(65, |_| true);
197    }
198
199    fn pack(bit_offset: usize, len: usize, f: impl Fn(usize) -> bool) -> Vec<bool> {
200        let num_words = (bit_offset + len).div_ceil(64);
201        let mut words = vec![0u64; num_words];
202        pack_bools_into_words(&mut words, bit_offset, len, &f);
203        (0..bit_offset + len)
204            .map(|i| (words[i / 64] >> (i % 64)) & 1 == 1)
205            .collect()
206    }
207
208    #[test]
209    fn pack_bools_aligned_multi_word_with_tail() {
210        let bits = pack(0, 130, |i| i.is_multiple_of(3));
211        for i in 0..130 {
212            assert_eq!(bits[i], i.is_multiple_of(3), "bit {i}");
213        }
214    }
215
216    #[test]
217    fn pack_bools_unaligned_crossing_words() {
218        let bits = pack(40, 200, |i| i.is_multiple_of(7));
219        assert!(bits[..40].iter().all(|&b| !b));
220        for i in 0..200 {
221            assert_eq!(bits[40 + i], i.is_multiple_of(7), "bit {}", 40 + i);
222        }
223    }
224
225    #[test]
226    fn pack_bools_preserves_low_bits_of_leading_word() {
227        let mut words = vec![0u64; 2];
228        words[0] = 0b11111;
229        pack_bools_into_words(&mut words, 5, 70, |_| true);
230        for i in 0..5 {
231            assert_eq!((words[0] >> i) & 1, 1, "preserved bit {i}");
232        }
233        for i in 5..75 {
234            assert_eq!((words[i / 64] >> (i % 64)) & 1, 1, "extended bit {i}");
235        }
236    }
237}