arrow2/bitmap/utils/
mod.rs

1//! General utilities for bitmaps representing items where LSB is the first item.
2mod chunk_iterator;
3mod chunks_exact_mut;
4mod fmt;
5mod iterator;
6mod slice_iterator;
7mod zip_validity;
8
9use std::convert::TryInto;
10
11pub(crate) use chunk_iterator::merge_reversed;
12pub use chunk_iterator::{BitChunk, BitChunkIterExact, BitChunks, BitChunksExact};
13pub use chunks_exact_mut::BitChunksExactMut;
14pub use fmt::fmt;
15pub use iterator::BitmapIter;
16pub use slice_iterator::SlicesIterator;
17pub use zip_validity::{ZipValidity, ZipValidityIter};
18
19const BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128];
20const UNSET_BIT_MASK: [u8; 8] = [
21    255 - 1,
22    255 - 2,
23    255 - 4,
24    255 - 8,
25    255 - 16,
26    255 - 32,
27    255 - 64,
28    255 - 128,
29];
30
31/// Returns whether bit at position `i` in `byte` is set or not
32#[inline]
33pub fn is_set(byte: u8, i: usize) -> bool {
34    (byte & BIT_MASK[i]) != 0
35}
36
37/// Sets bit at position `i` in `byte`
38#[inline]
39pub fn set(byte: u8, i: usize, value: bool) -> u8 {
40    if value {
41        byte | BIT_MASK[i]
42    } else {
43        byte & UNSET_BIT_MASK[i]
44    }
45}
46
47/// Sets bit at position `i` in `data`
48/// # Panics
49/// panics if `i >= data.len() / 8`
50#[inline]
51pub fn set_bit(data: &mut [u8], i: usize, value: bool) {
52    data[i / 8] = set(data[i / 8], i % 8, value);
53}
54
55/// Sets bit at position `i` in `data` without doing bound checks
56/// # Safety
57/// caller must ensure that `i < data.len() / 8`
58#[inline]
59pub unsafe fn set_bit_unchecked(data: &mut [u8], i: usize, value: bool) {
60    let byte = data.get_unchecked_mut(i / 8);
61    *byte = set(*byte, i % 8, value);
62}
63
64/// Returns whether bit at position `i` in `data` is set
65/// # Panic
66/// This function panics iff `i / 8 >= bytes.len()`
67#[inline]
68pub fn get_bit(bytes: &[u8], i: usize) -> bool {
69    is_set(bytes[i / 8], i % 8)
70}
71
72/// Returns whether bit at position `i` in `data` is set or not.
73///
74/// # Safety
75/// `i >= data.len() * 8` results in undefined behavior
76#[inline]
77pub unsafe fn get_bit_unchecked(data: &[u8], i: usize) -> bool {
78    (*data.as_ptr().add(i >> 3) & BIT_MASK[i & 7]) != 0
79}
80
81/// Returns the number of bytes required to hold `bits` bits.
82#[inline]
83pub fn bytes_for(bits: usize) -> usize {
84    bits.saturating_add(7) / 8
85}
86
87/// Returns the number of zero bits in the slice offsetted by `offset` and a length of `length`.
88/// # Panics
89/// This function panics iff `(offset + len).saturating_add(7) / 8 >= slice.len()`
90/// because it corresponds to the situation where `len` is beyond bounds.
91pub fn count_zeros(slice: &[u8], offset: usize, len: usize) -> usize {
92    if len == 0 {
93        return 0;
94    };
95
96    let mut slice = &slice[offset / 8..(offset + len).saturating_add(7) / 8];
97    let offset = offset % 8;
98
99    if (offset + len) / 8 == 0 {
100        // all within a single byte
101        let byte = (slice[0] >> offset) << (8 - len);
102        return len - byte.count_ones() as usize;
103    }
104
105    // slice: [a1,a2,a3,a4], [a5,a6,a7,a8]
106    // offset: 3
107    // len: 4
108    // [__,__,__,a4], [a5,a6,a7,__]
109    let mut set_count = 0;
110    if offset != 0 {
111        // count all ignoring the first `offset` bits
112        // i.e. [__,__,__,a4]
113        set_count += (slice[0] >> offset).count_ones() as usize;
114        slice = &slice[1..];
115    }
116    if (offset + len) % 8 != 0 {
117        let end_offset = (offset + len) % 8; // i.e. 3 + 4 = 7
118        let last_index = slice.len() - 1;
119        // count all ignoring the last `offset` bits
120        // i.e. [a5,a6,a7,__]
121        set_count += (slice[last_index] << (8 - end_offset)).count_ones() as usize;
122        slice = &slice[..last_index];
123    }
124
125    // finally, count any and all bytes in the middle in groups of 8
126    let mut chunks = slice.chunks_exact(8);
127    set_count += chunks
128        .by_ref()
129        .map(|chunk| {
130            let a = u64::from_ne_bytes(chunk.try_into().unwrap());
131            a.count_ones() as usize
132        })
133        .sum::<usize>();
134
135    // and any bytes that do not fit in the group
136    set_count += chunks
137        .remainder()
138        .iter()
139        .map(|byte| byte.count_ones() as usize)
140        .sum::<usize>();
141
142    len - set_count
143}