marrow/
bits.rs

1//! Helpers to work with bit vectors
2//!
3//! Bit vectors encode eight booleans into the bits of a byte. The Arrow format uses them to encode
4//! validity information of arrays, but also for boolean arrays.
5//!
6//! To construct bit vectors as arrays or vectors see [`marrow::bit_array!`][crate::bit_array] and
7//! [`marrow::bit_vec!`][crate::bit_vec].
8
9/// Build a fixed-size bit array from a sequence of booleans
10///
11/// ```rust
12/// assert_eq!(marrow::bit_array![true],  [0b_1]);
13/// assert_eq!(marrow::bit_array![true, true],  [0b_11]);
14/// assert_eq!(marrow::bit_array![true, true, false],  [0b_011]);
15/// assert_eq!(marrow::bit_array![true, true, false, true],  [0b_1011]);
16///
17/// assert_eq!(
18///     marrow::bit_array![
19///         // first byte
20///         true, true, false, false, true, false, true, false,
21///         // second byte
22///         true, true, true, false, true,
23///     ],
24///     [0b_01010011, 0b_10111],
25/// );
26/// ```
27///
28/// If all items are const expressions, `bit_array` can be used in const contexts, e.g.,
29///
30/// ```rust
31/// const fn func() -> bool {
32///     3 % 2 == 0
33/// }
34///
35/// const { marrow::bit_array![true, false, func()] }
36/// # ;
37/// ```
38///
39/// Rules starting with `@num_bits` and `@num_bytes` are internal and are not subject to
40/// compatibility guarantees.
41///
42/// See also [`marrow::bits`][crate::bits].
43#[macro_export]
44macro_rules! bit_array {
45    ($($items:expr),* $(,)?) => {
46        {
47            const N: usize = $crate::bit_array![@num_bits, $($items),*];
48            const M: usize = $crate::bit_array![@num_bytes, $($items),*];
49
50            let items: [bool; N] = [$($items),*];
51            let mut res: [u8; M] = [0; M];
52
53            // use while loop to be const-compatible
54            let mut idx = 0;
55            while idx < items.len() {
56                if items[idx] {
57                    res[idx / 8] |= 1 << (idx % 8);
58                }
59                idx += 1;
60            }
61            res
62        }
63    };
64    (@num_bits $(,)?) => { const { 0_usize } };
65    (@num_bits, $head:expr $(, $tail:expr)* $(,)?) => {
66        const { 1_usize + $crate::bit_array!(@num_bits, $($tail),*) }
67    };
68    (@num_bytes $(, $items:expr)* $(,)?) => {
69        const {
70            const N: usize = $crate::bit_array!(@num_bits $(, $items)*);
71            const EXTRA_BYTE: usize = if (N % 8) != 0 { 1 } else { 0 };
72            N / 8 + EXTRA_BYTE
73        }
74    }
75}
76
77const _: () = const {
78    assert!(crate::bit_array![@num_bits, ] == 0);
79    assert!(crate::bit_array![@num_bits] == 0);
80    assert!(crate::bit_array![@num_bits, 1, 2, 3] == 3);
81    assert!(crate::bit_array![@num_bits, 1, 2, 3, ] == 3);
82
83    assert!(crate::bit_array![@num_bytes, ] == 0);
84    assert!(crate::bit_array![@num_bytes] == 0);
85    assert!(crate::bit_array![@num_bytes, 1, 2, 3] == 1);
86    assert!(crate::bit_array![@num_bytes, 1, 2, 3, ] == 1);
87
88    assert!(crate::bit_array![@num_bytes, 1, 2, 3, 4, 5, 6, 7, 8, 9] == 2);
89    assert!(crate::bit_array![@num_bytes, 1, 2, 3, 4, 5, 6, 7, 8, 9, ] == 2);
90};
91
92/// Construct a bit vector (`Vec<u8>`) from a sequence of booleans
93///
94/// ```rust
95/// assert_eq!(marrow::bit_vec![true, true, false, true],  vec![0b_1011]);
96/// ```
97///
98/// Equivalent to `marrow::bit_array![..].to_vec()`. See [`marrow::bit_array`][crate::bit_array] for
99/// more details. See also [`marrow::bits`][crate::bits].
100///
101#[macro_export]
102macro_rules! bit_vec {
103    ($($items:expr),* $(,)?) => { $crate::bit_array![$($items),*].to_vec() }
104}
105
106#[test]
107fn test_bit_array() {
108    // force non-const'ness
109    fn bool(val: bool) -> bool {
110        val
111    }
112
113    assert_eq!(
114        crate::bit_array![bool(true), bool(false), bool(true), bool(true)],
115        [0b_1101]
116    );
117
118    const ARRAY: [u8; 1] = const { crate::bit_array![true, false, true, true] };
119    assert_eq!(ARRAY, [0b_1101]);
120}
121
122/// Get a bit of a bit vector
123///
124/// ```rust
125/// # use marrow::bits;
126/// #
127/// let bit_vec = &[0b_01010011, 0b_10111];
128///
129/// assert_eq!(bits::get(bit_vec, 0), true);
130/// assert_eq!(bits::get(bit_vec, 1), true);
131/// assert_eq!(bits::get(bit_vec, 2), false);
132/// assert_eq!(bits::get(bit_vec, 3), false);
133/// assert_eq!(bits::get(bit_vec, 4), true);
134/// assert_eq!(bits::get(bit_vec, 5), false);
135/// assert_eq!(bits::get(bit_vec, 6), true);
136/// assert_eq!(bits::get(bit_vec, 7), false);
137///
138/// assert_eq!(bits::get(bit_vec, 8), true);
139/// assert_eq!(bits::get(bit_vec, 9), true);
140/// assert_eq!(bits::get(bit_vec, 10), true);
141/// assert_eq!(bits::get(bit_vec, 11), false);
142/// assert_eq!(bits::get(bit_vec, 12), true);
143/// ```
144pub const fn get(bit_vec: &[u8], idx: usize) -> bool {
145    let mask = 1 << (idx % 8);
146    bit_vec[idx / 8] & mask == mask
147}
148
149/// Set a bit of a bit vector
150///
151/// ```rust
152/// # use marrow::bits;
153/// #
154/// let mut bit_vec = [0; 2];
155///
156/// // update bits in random order
157/// bits::set(&mut bit_vec, 9, true);
158/// bits::set(&mut bit_vec, 4, true);
159/// bits::set(&mut bit_vec, 11, false);
160/// bits::set(&mut bit_vec, 2, false);
161/// bits::set(&mut bit_vec, 7, false);
162/// bits::set(&mut bit_vec, 1, true);
163/// bits::set(&mut bit_vec, 12, true);
164/// bits::set(&mut bit_vec, 0, true);
165/// bits::set(&mut bit_vec, 5, false);
166/// bits::set(&mut bit_vec, 8, true);
167/// bits::set(&mut bit_vec, 3, false);
168/// bits::set(&mut bit_vec, 10, true);
169/// bits::set(&mut bit_vec, 6, true);
170///
171/// assert_eq!(&bit_vec, &[0b_01010011, 0b_10111]);
172///
173/// ```
174pub const fn set(bit_vec: &mut [u8], idx: usize, value: bool) {
175    let mask = 1 << (idx % 8);
176    if value {
177        bit_vec[idx / 8] |= mask;
178    } else {
179        bit_vec[idx / 8] &= !mask;
180    }
181}
182
183/// Push a new bit into a bit vector with `len` items
184///
185///
186/// Usage:
187///
188/// ```rust
189/// # use marrow::bits;
190/// #
191/// let mut vec = Vec::new();
192/// let mut len = 0;
193///
194/// bits::push(&mut vec, &mut len, true);
195/// assert_eq!(&vec, &[0b_1]);
196/// assert_eq!(len, 1);
197///
198/// bits::push(&mut vec, &mut len, true);
199/// assert_eq!(&vec, &[0b_11]);
200/// assert_eq!(len, 2);
201///
202/// bits::push(&mut vec, &mut len, false);
203/// assert_eq!(&vec, &[0b_011]);
204/// assert_eq!(len, 3);
205///
206/// bits::push(&mut vec, &mut len, true);
207/// assert_eq!(&vec, &[0b_1011]);
208/// assert_eq!(len, 4);
209///
210/// bits::push(&mut vec, &mut len, false);
211/// assert_eq!(&vec, &[0b_01011]);
212/// assert_eq!(len, 5);
213///
214/// bits::push(&mut vec, &mut len, false);
215/// assert_eq!(&vec, &[0b_001011]);
216/// assert_eq!(len, 6);
217///
218/// bits::push(&mut vec, &mut len, true);
219/// assert_eq!(&vec, &[0b_1001011]);
220/// assert_eq!(len, 7);
221///
222/// bits::push(&mut vec, &mut len, true);
223/// assert_eq!(&vec, &[0b_11001011]);
224/// assert_eq!(len, 8);
225///
226/// bits::push(&mut vec, &mut len, true);
227/// assert_eq!(&vec, &[0b_11001011, 0b_1]);
228/// assert_eq!(len, 9);
229/// ```
230///
231pub fn push(bit_vec: &mut Vec<u8>, len: &mut usize, value: bool) {
232    // custom impl to keep MSRV
233    fn div_ceil(a: usize, b: usize) -> usize {
234        (a / b) + if (a % b) != 0 { 1 } else { 0 }
235    }
236
237    assert_eq!(
238        div_ceil(*len, 8),
239        bit_vec.len(),
240        "len and bit_vec incompatible"
241    );
242
243    if *len == 8 * bit_vec.len() {
244        bit_vec.push(0);
245    }
246
247    set(bit_vec, *len, value);
248    // NOTE: needs to be last
249    *len += 1;
250}