marrow/bits.rs
1//! Helpers to work with bit vectors
2//!
3//! Bit vectors encode eight booleans into the bits of a byte. The Arrow format uses them to encode
4//! validity information of arrays, but also for boolean arrays.
5//!
6//! To construct bit vectors as arrays or vectors see [`marrow::bit_array!`][crate::bit_array] and
7//! [`marrow::bit_vec!`][crate::bit_vec].
8
9/// Build a fixed-size bit array from a sequence of booleans
10///
11/// ```rust
12/// assert_eq!(marrow::bit_array![true], [0b_1]);
13/// assert_eq!(marrow::bit_array![true, true], [0b_11]);
14/// assert_eq!(marrow::bit_array![true, true, false], [0b_011]);
15/// assert_eq!(marrow::bit_array![true, true, false, true], [0b_1011]);
16///
17/// assert_eq!(
18/// marrow::bit_array![
19/// // first byte
20/// true, true, false, false, true, false, true, false,
21/// // second byte
22/// true, true, true, false, true,
23/// ],
24/// [0b_01010011, 0b_10111],
25/// );
26/// ```
27///
28/// If all items are const expressions, `bit_array` can be used in const contexts, e.g.,
29///
30/// ```rust
31/// const fn func() -> bool {
32/// 3 % 2 == 0
33/// }
34///
35/// const { marrow::bit_array![true, false, func()] }
36/// # ;
37/// ```
38///
39/// Rules starting with `@num_bits` and `@num_bytes` are internal and are not subject to
40/// compatibility guarantees.
41///
42/// See also [`marrow::bits`][crate::bits].
43#[macro_export]
44macro_rules! bit_array {
45 ($($items:expr),* $(,)?) => {
46 {
47 const N: usize = $crate::bit_array![@num_bits, $($items),*];
48 const M: usize = $crate::bit_array![@num_bytes, $($items),*];
49
50 let items: [bool; N] = [$($items),*];
51 let mut res: [u8; M] = [0; M];
52
53 // use while loop to be const-compatible
54 let mut idx = 0;
55 while idx < items.len() {
56 if items[idx] {
57 res[idx / 8] |= 1 << (idx % 8);
58 }
59 idx += 1;
60 }
61 res
62 }
63 };
64 (@num_bits $(,)?) => { const { 0_usize } };
65 (@num_bits, $head:expr $(, $tail:expr)* $(,)?) => {
66 const { 1_usize + $crate::bit_array!(@num_bits, $($tail),*) }
67 };
68 (@num_bytes $(, $items:expr)* $(,)?) => {
69 const {
70 const N: usize = $crate::bit_array!(@num_bits $(, $items)*);
71 const EXTRA_BYTE: usize = if (N % 8) != 0 { 1 } else { 0 };
72 N / 8 + EXTRA_BYTE
73 }
74 }
75}
76
77const _: () = const {
78 assert!(crate::bit_array![@num_bits, ] == 0);
79 assert!(crate::bit_array![@num_bits] == 0);
80 assert!(crate::bit_array![@num_bits, 1, 2, 3] == 3);
81 assert!(crate::bit_array![@num_bits, 1, 2, 3, ] == 3);
82
83 assert!(crate::bit_array![@num_bytes, ] == 0);
84 assert!(crate::bit_array![@num_bytes] == 0);
85 assert!(crate::bit_array![@num_bytes, 1, 2, 3] == 1);
86 assert!(crate::bit_array![@num_bytes, 1, 2, 3, ] == 1);
87
88 assert!(crate::bit_array![@num_bytes, 1, 2, 3, 4, 5, 6, 7, 8, 9] == 2);
89 assert!(crate::bit_array![@num_bytes, 1, 2, 3, 4, 5, 6, 7, 8, 9, ] == 2);
90};
91
92/// Construct a bit vector (`Vec<u8>`) from a sequence of booleans
93///
94/// ```rust
95/// assert_eq!(marrow::bit_vec![true, true, false, true], vec![0b_1011]);
96/// ```
97///
98/// Equivalent to `marrow::bit_array![..].to_vec()`. See [`marrow::bit_array`][crate::bit_array] for
99/// more details. See also [`marrow::bits`][crate::bits].
100///
101#[macro_export]
102macro_rules! bit_vec {
103 ($($items:expr),* $(,)?) => { $crate::bit_array![$($items),*].to_vec() }
104}
105
106#[test]
107fn test_bit_array() {
108 // force non-const'ness
109 fn bool(val: bool) -> bool {
110 val
111 }
112
113 assert_eq!(
114 crate::bit_array![bool(true), bool(false), bool(true), bool(true)],
115 [0b_1101]
116 );
117
118 const ARRAY: [u8; 1] = const { crate::bit_array![true, false, true, true] };
119 assert_eq!(ARRAY, [0b_1101]);
120}
121
122/// Get a bit of a bit vector
123///
124/// ```rust
125/// # use marrow::bits;
126/// #
127/// let bit_vec = &[0b_01010011, 0b_10111];
128///
129/// assert_eq!(bits::get(bit_vec, 0), true);
130/// assert_eq!(bits::get(bit_vec, 1), true);
131/// assert_eq!(bits::get(bit_vec, 2), false);
132/// assert_eq!(bits::get(bit_vec, 3), false);
133/// assert_eq!(bits::get(bit_vec, 4), true);
134/// assert_eq!(bits::get(bit_vec, 5), false);
135/// assert_eq!(bits::get(bit_vec, 6), true);
136/// assert_eq!(bits::get(bit_vec, 7), false);
137///
138/// assert_eq!(bits::get(bit_vec, 8), true);
139/// assert_eq!(bits::get(bit_vec, 9), true);
140/// assert_eq!(bits::get(bit_vec, 10), true);
141/// assert_eq!(bits::get(bit_vec, 11), false);
142/// assert_eq!(bits::get(bit_vec, 12), true);
143/// ```
144pub const fn get(bit_vec: &[u8], idx: usize) -> bool {
145 let mask = 1 << (idx % 8);
146 bit_vec[idx / 8] & mask == mask
147}
148
149/// Set a bit of a bit vector
150///
151/// ```rust
152/// # use marrow::bits;
153/// #
154/// let mut bit_vec = [0; 2];
155///
156/// // update bits in random order
157/// bits::set(&mut bit_vec, 9, true);
158/// bits::set(&mut bit_vec, 4, true);
159/// bits::set(&mut bit_vec, 11, false);
160/// bits::set(&mut bit_vec, 2, false);
161/// bits::set(&mut bit_vec, 7, false);
162/// bits::set(&mut bit_vec, 1, true);
163/// bits::set(&mut bit_vec, 12, true);
164/// bits::set(&mut bit_vec, 0, true);
165/// bits::set(&mut bit_vec, 5, false);
166/// bits::set(&mut bit_vec, 8, true);
167/// bits::set(&mut bit_vec, 3, false);
168/// bits::set(&mut bit_vec, 10, true);
169/// bits::set(&mut bit_vec, 6, true);
170///
171/// assert_eq!(&bit_vec, &[0b_01010011, 0b_10111]);
172///
173/// ```
174pub const fn set(bit_vec: &mut [u8], idx: usize, value: bool) {
175 let mask = 1 << (idx % 8);
176 if value {
177 bit_vec[idx / 8] |= mask;
178 } else {
179 bit_vec[idx / 8] &= !mask;
180 }
181}
182
183/// Push a new bit into a bit vector with `len` items
184///
185///
186/// Usage:
187///
188/// ```rust
189/// # use marrow::bits;
190/// #
191/// let mut vec = Vec::new();
192/// let mut len = 0;
193///
194/// bits::push(&mut vec, &mut len, true);
195/// assert_eq!(&vec, &[0b_1]);
196/// assert_eq!(len, 1);
197///
198/// bits::push(&mut vec, &mut len, true);
199/// assert_eq!(&vec, &[0b_11]);
200/// assert_eq!(len, 2);
201///
202/// bits::push(&mut vec, &mut len, false);
203/// assert_eq!(&vec, &[0b_011]);
204/// assert_eq!(len, 3);
205///
206/// bits::push(&mut vec, &mut len, true);
207/// assert_eq!(&vec, &[0b_1011]);
208/// assert_eq!(len, 4);
209///
210/// bits::push(&mut vec, &mut len, false);
211/// assert_eq!(&vec, &[0b_01011]);
212/// assert_eq!(len, 5);
213///
214/// bits::push(&mut vec, &mut len, false);
215/// assert_eq!(&vec, &[0b_001011]);
216/// assert_eq!(len, 6);
217///
218/// bits::push(&mut vec, &mut len, true);
219/// assert_eq!(&vec, &[0b_1001011]);
220/// assert_eq!(len, 7);
221///
222/// bits::push(&mut vec, &mut len, true);
223/// assert_eq!(&vec, &[0b_11001011]);
224/// assert_eq!(len, 8);
225///
226/// bits::push(&mut vec, &mut len, true);
227/// assert_eq!(&vec, &[0b_11001011, 0b_1]);
228/// assert_eq!(len, 9);
229/// ```
230///
231pub fn push(bit_vec: &mut Vec<u8>, len: &mut usize, value: bool) {
232 // custom impl to keep MSRV
233 fn div_ceil(a: usize, b: usize) -> usize {
234 (a / b) + if (a % b) != 0 { 1 } else { 0 }
235 }
236
237 assert_eq!(
238 div_ceil(*len, 8),
239 bit_vec.len(),
240 "len and bit_vec incompatible"
241 );
242
243 if *len == 8 * bit_vec.len() {
244 bit_vec.push(0);
245 }
246
247 set(bit_vec, *len, value);
248 // NOTE: needs to be last
249 *len += 1;
250}