Skip to main content

arrow_buffer/util/
bit_util.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Utils for working with bits
19
20use crate::bit_chunk_iterator::BitChunks;
21
22/// Returns the nearest number that is `>=` than `num` and is a multiple of 64
23#[inline]
24pub fn round_upto_multiple_of_64(num: usize) -> usize {
25    num.checked_next_multiple_of(64)
26        .expect("failed to round upto multiple of 64")
27}
28
29/// Returns the nearest multiple of `factor` that is `>=` than `num`. Here `factor` must
30/// be a power of 2.
31pub fn round_upto_power_of_2(num: usize, factor: usize) -> usize {
32    debug_assert!(factor > 0 && factor.is_power_of_two());
33    num.checked_add(factor - 1)
34        .expect("failed to round to next highest power of 2")
35        & !(factor - 1)
36}
37
38/// Returns whether bit at position `i` in `data` is set or not
39#[inline]
40pub fn get_bit(data: &[u8], i: usize) -> bool {
41    data[i / 8] & (1 << (i % 8)) != 0
42}
43
44/// Returns whether bit at position `i` in `data` is set or not.
45///
46/// # Safety
47///
48/// Note this doesn't do any bound checking, for performance reason. The caller is
49/// responsible to guarantee that `i` is within bounds.
50#[inline]
51pub unsafe fn get_bit_raw(data: *const u8, i: usize) -> bool {
52    unsafe { (*data.add(i / 8) & (1 << (i % 8))) != 0 }
53}
54
55/// Sets bit at position `i` for `data` to 1
56#[inline]
57pub fn set_bit(data: &mut [u8], i: usize) {
58    data[i / 8] |= 1 << (i % 8);
59}
60
61/// Sets bit at position `i` for `data`
62///
63/// # Safety
64///
65/// Note this doesn't do any bound checking, for performance reason. The caller is
66/// responsible to guarantee that `i` is within bounds.
67#[inline]
68pub unsafe fn set_bit_raw(data: *mut u8, i: usize) {
69    unsafe {
70        *data.add(i / 8) |= 1 << (i % 8);
71    }
72}
73
74/// Sets bit at position `i` for `data` to 0
75#[inline]
76pub fn unset_bit(data: &mut [u8], i: usize) {
77    data[i / 8] &= !(1 << (i % 8));
78}
79
80/// Sets bit at position `i` for `data` to 0
81///
82/// # Safety
83///
84/// Note this doesn't do any bound checking, for performance reason. The caller is
85/// responsible to guarantee that `i` is within bounds.
86#[inline]
87pub unsafe fn unset_bit_raw(data: *mut u8, i: usize) {
88    unsafe {
89        *data.add(i / 8) &= !(1 << (i % 8));
90    }
91}
92
93/// Returns the ceil of `value`/`divisor`
94#[inline]
95pub fn ceil(value: usize, divisor: usize) -> usize {
96    value.div_ceil(divisor)
97}
98
99/// Read a u64 from a byte slice, padding with zeros if necessary
100#[inline]
101pub(crate) fn read_u64(input: &[u8]) -> u64 {
102    let len = input.len().min(8);
103    let mut buf = [0_u8; 8];
104    buf[..len].copy_from_slice(input);
105    u64::from_le_bytes(buf)
106}
107
108/// Read up to 8 bits from a byte slice starting at a given bit offset.
109///
110/// # Arguments
111///
112/// * `slice` - The byte slice to read from
113/// * `number_of_bits_to_read` - Number of bits to read (must be < 8)
114/// * `bit_offset` - Starting bit offset within the first byte (must be < 8)
115///
116/// # Returns
117///
118/// A `u8` containing the requested bits in the least significant positions
119///
120/// # Panics
121/// - Panics if `number_of_bits_to_read` is 0 or >= 8
122/// - Panics if `bit_offset` is >= 8
123/// - Panics if `slice` is empty or too small to read the requested bits
124///
125#[inline]
126pub(crate) fn read_up_to_byte_from_offset(
127    slice: &[u8],
128    number_of_bits_to_read: usize,
129    bit_offset: usize,
130) -> u8 {
131    assert!(number_of_bits_to_read < 8, "can read up to 8 bits only");
132    assert!(bit_offset < 8, "bit offset must be less than 8");
133    assert_ne!(
134        number_of_bits_to_read, 0,
135        "number of bits to read must be greater than 0"
136    );
137    assert_ne!(slice.len(), 0, "slice must not be empty");
138
139    let number_of_bytes_to_read = ceil(number_of_bits_to_read + bit_offset, 8);
140
141    // number of bytes to read
142    assert!(slice.len() >= number_of_bytes_to_read, "slice is too small");
143
144    let mut bits = slice[0] >> bit_offset;
145    for (i, &byte) in slice
146        .iter()
147        .take(number_of_bytes_to_read)
148        .enumerate()
149        .skip(1)
150    {
151        bits |= byte << (i * 8 - bit_offset);
152    }
153
154    bits & ((1 << number_of_bits_to_read) - 1)
155}
156
157/// Applies a bitwise operation relative to another bit-packed byte slice
158/// (right) in place
159///
160/// Note: applies the operation 64-bits (u64) at a time.
161///
162/// # Arguments
163///
164/// * `left` - The mutable buffer to be modified in-place
165/// * `offset_in_bits` - Starting bit offset in Self buffer
166/// * `right` - slice of bit-packed bytes in LSB order
167/// * `right_offset_in_bits` - Starting bit offset in the right buffer
168/// * `len_in_bits` - Number of bits to process
169/// * `op` - Binary operation to apply (e.g., `|a, b| a & b`). Applied a word at a time
170///
171/// # Example: Modify entire buffer
172/// ```
173/// # use arrow_buffer::MutableBuffer;
174/// # use arrow_buffer::bit_util::apply_bitwise_binary_op;
175/// let mut left = MutableBuffer::new(2);
176/// left.extend_from_slice(&[0b11110000u8, 0b00110011u8]);
177/// let right = &[0b10101010u8, 0b10101010u8];
178/// // apply bitwise AND between left and right buffers, updating left in place
179/// apply_bitwise_binary_op(left.as_slice_mut(), 0, right, 0, 16, |a, b| a & b);
180/// assert_eq!(left.as_slice(), &[0b10100000u8, 0b00100010u8]);
181/// ```
182///
183/// # Example: Modify buffer with offsets
184/// ```
185/// # use arrow_buffer::MutableBuffer;
186/// # use arrow_buffer::bit_util::apply_bitwise_binary_op;
187/// let mut left = MutableBuffer::new(2);
188/// left.extend_from_slice(&[0b00000000u8, 0b00000000u8]);
189/// let right = &[0b10110011u8, 0b11111110u8];
190/// // apply bitwise OR between left and right buffers,
191/// // Apply only 8 bits starting from bit offset 3 in left and bit offset 2 in right
192/// apply_bitwise_binary_op(left.as_slice_mut(), 3, right, 2, 8, |a, b| a | b);
193/// assert_eq!(left.as_slice(), &[0b01100000, 0b00000101u8]);
194/// ```
195///
196/// # Panics
197///
198/// If the offset or lengths exceed the buffer or slice size.
199pub fn apply_bitwise_binary_op<F>(
200    left: &mut [u8],
201    left_offset_in_bits: usize,
202    right: impl AsRef<[u8]>,
203    right_offset_in_bits: usize,
204    len_in_bits: usize,
205    mut op: F,
206) where
207    F: FnMut(u64, u64) -> u64,
208{
209    if len_in_bits == 0 {
210        return;
211    }
212
213    // offset inside a byte
214    let bit_offset = left_offset_in_bits % 8;
215
216    let is_mutable_buffer_byte_aligned = bit_offset == 0;
217
218    if is_mutable_buffer_byte_aligned {
219        byte_aligned_bitwise_bin_op_helper(
220            left,
221            left_offset_in_bits,
222            right,
223            right_offset_in_bits,
224            len_in_bits,
225            op,
226        );
227    } else {
228        // If we are not byte aligned, run `op` on the first few bits to reach byte alignment
229        let bits_to_next_byte = (8 - bit_offset)
230            // Minimum with the amount of bits we need to process
231            // to avoid reading out of bounds
232            .min(len_in_bits);
233
234        {
235            let right_byte_offset = right_offset_in_bits / 8;
236
237            // Read the same amount of bits from the right buffer
238            let right_first_byte: u8 = crate::util::bit_util::read_up_to_byte_from_offset(
239                &right.as_ref()[right_byte_offset..],
240                bits_to_next_byte,
241                // Right bit offset
242                right_offset_in_bits % 8,
243            );
244
245            align_to_byte(
246                left,
247                // Hope it gets inlined
248                &mut |left| op(left, right_first_byte as u64),
249                left_offset_in_bits,
250            );
251        }
252
253        let offset_in_bits = left_offset_in_bits + bits_to_next_byte;
254        let right_offset_in_bits = right_offset_in_bits + bits_to_next_byte;
255        let len_in_bits = len_in_bits.saturating_sub(bits_to_next_byte);
256
257        if len_in_bits == 0 {
258            return;
259        }
260
261        // We are now byte aligned
262        byte_aligned_bitwise_bin_op_helper(
263            left,
264            offset_in_bits,
265            right,
266            right_offset_in_bits,
267            len_in_bits,
268            op,
269        );
270    }
271}
272
273/// Apply a bitwise operation to a mutable buffer, updating it in place.
274///
275/// Note: applies the operation 64-bits (u64) at a time.
276///
277/// # Arguments
278///
279/// * `offset_in_bits` - Starting bit offset for the current buffer
280/// * `len_in_bits` - Number of bits to process
281/// * `op` - Unary operation to apply (e.g., `|a| !a`). Applied a word at a time
282///
283/// # Example: Modify entire buffer
284/// ```
285/// # use arrow_buffer::MutableBuffer;
286/// # use arrow_buffer::bit_util::apply_bitwise_unary_op;
287/// let mut buffer = MutableBuffer::new(2);
288/// buffer.extend_from_slice(&[0b11110000u8, 0b00110011u8]);
289/// // apply bitwise NOT to the buffer in place
290/// apply_bitwise_unary_op(buffer.as_slice_mut(), 0, 16, |a| !a);
291/// assert_eq!(buffer.as_slice(), &[0b00001111u8, 0b11001100u8]);
292/// ```
293///
294/// # Example: Modify buffer with offsets
295/// ```
296/// # use arrow_buffer::MutableBuffer;
297/// # use arrow_buffer::bit_util::apply_bitwise_unary_op;
298/// let mut buffer = MutableBuffer::new(2);
299/// buffer.extend_from_slice(&[0b00000000u8, 0b00000000u8]);
300/// // apply bitwise NOT to 8 bits starting from bit offset 3
301/// apply_bitwise_unary_op(buffer.as_slice_mut(), 3, 8, |a| !a);
302/// assert_eq!(buffer.as_slice(), &[0b11111000u8, 0b00000111u8]);
303/// ```
304///
305/// # Panics
306///
307/// If the offset and length exceed the buffer size.
308pub fn apply_bitwise_unary_op<F>(
309    buffer: &mut [u8],
310    offset_in_bits: usize,
311    len_in_bits: usize,
312    mut op: F,
313) where
314    F: FnMut(u64) -> u64,
315{
316    if len_in_bits == 0 {
317        return;
318    }
319
320    // offset inside a byte
321    let left_bit_offset = offset_in_bits % 8;
322
323    let is_mutable_buffer_byte_aligned = left_bit_offset == 0;
324
325    if is_mutable_buffer_byte_aligned {
326        byte_aligned_bitwise_unary_op_helper(buffer, offset_in_bits, len_in_bits, op);
327    } else {
328        align_to_byte(buffer, &mut op, offset_in_bits);
329
330        // If we are not byte aligned we will read the first few bits
331        let bits_to_next_byte = 8 - left_bit_offset;
332
333        let offset_in_bits = offset_in_bits + bits_to_next_byte;
334        let len_in_bits = len_in_bits.saturating_sub(bits_to_next_byte);
335
336        if len_in_bits == 0 {
337            return;
338        }
339
340        // We are now byte aligned
341        byte_aligned_bitwise_unary_op_helper(buffer, offset_in_bits, len_in_bits, op);
342    }
343}
344
345/// Perform bitwise binary operation on byte-aligned buffers (i.e. not offsetting into a middle of a byte).
346///
347/// This is the optimized path for byte-aligned operations. It processes data in
348/// u64 chunks for maximum efficiency, then handles any remainder bits.
349///
350/// # Arguments
351///
352/// * `left` - The left mutable buffer (must be byte-aligned)
353/// * `left_offset_in_bits` - Starting bit offset in the left buffer (must be multiple of 8)
354/// * `right` - The right buffer as byte slice
355/// * `right_offset_in_bits` - Starting bit offset in the right buffer
356/// * `len_in_bits` - Number of bits to process
357/// * `op` - Binary operation to apply
358#[inline]
359fn byte_aligned_bitwise_bin_op_helper<F>(
360    left: &mut [u8],
361    left_offset_in_bits: usize,
362    right: impl AsRef<[u8]>,
363    right_offset_in_bits: usize,
364    len_in_bits: usize,
365    mut op: F,
366) where
367    F: FnMut(u64, u64) -> u64,
368{
369    // Must not reach here if we not byte aligned
370    assert_eq!(
371        left_offset_in_bits % 8,
372        0,
373        "offset_in_bits must be byte aligned"
374    );
375
376    // 1. Prepare the buffers
377    let (complete_u64_chunks, remainder_bytes) =
378        U64UnalignedSlice::split(left, left_offset_in_bits, len_in_bits);
379
380    let right_chunks = BitChunks::new(right.as_ref(), right_offset_in_bits, len_in_bits);
381    assert_eq!(
382        self::ceil(right_chunks.remainder_len(), 8),
383        remainder_bytes.len()
384    );
385
386    let right_chunks_iter = right_chunks.iter();
387    assert_eq!(right_chunks_iter.len(), complete_u64_chunks.len());
388
389    // 2. Process complete u64 chunks
390    complete_u64_chunks.zip_modify(right_chunks_iter, &mut op);
391
392    // Handle remainder bits if any
393    if right_chunks.remainder_len() > 0 {
394        handle_mutable_buffer_remainder(
395            &mut op,
396            remainder_bytes,
397            right_chunks.remainder_bits(),
398            right_chunks.remainder_len(),
399        )
400    }
401}
402
403/// Perform bitwise unary operation on byte-aligned buffer.
404///
405/// This is the optimized path for byte-aligned unary operations. It processes data in
406/// u64 chunks for maximum efficiency, then handles any remainder bits.
407///
408/// # Arguments
409///
410/// * `buffer` - The mutable buffer (must be byte-aligned)
411/// * `offset_in_bits` - Starting bit offset (must be multiple of 8)
412/// * `len_in_bits` - Number of bits to process
413/// * `op` - Unary operation to apply (e.g., `|a| !a`)
414#[inline]
415fn byte_aligned_bitwise_unary_op_helper<F>(
416    buffer: &mut [u8],
417    offset_in_bits: usize,
418    len_in_bits: usize,
419    mut op: F,
420) where
421    F: FnMut(u64) -> u64,
422{
423    // Must not reach here if we not byte aligned
424    assert_eq!(offset_in_bits % 8, 0, "offset_in_bits must be byte aligned");
425
426    let remainder_len = len_in_bits % 64;
427
428    let (complete_u64_chunks, remainder_bytes) =
429        U64UnalignedSlice::split(buffer, offset_in_bits, len_in_bits);
430
431    assert_eq!(self::ceil(remainder_len, 8), remainder_bytes.len());
432
433    // 2. Process complete u64 chunks
434    complete_u64_chunks.apply_unary_op(&mut op);
435
436    // Handle remainder bits if any
437    if remainder_len > 0 {
438        handle_mutable_buffer_remainder_unary(&mut op, remainder_bytes, remainder_len)
439    }
440}
441
442/// Align to byte boundary by applying operation to bits before the next byte boundary.
443///
444/// This function handles non-byte-aligned operations by processing bits from the current
445/// position up to the next byte boundary, while preserving all other bits in the byte.
446///
447/// # Arguments
448///
449/// * `op` - Unary operation to apply
450/// * `buffer` - The mutable buffer to modify
451/// * `offset_in_bits` - Starting bit offset (not byte-aligned)
452fn align_to_byte<F>(buffer: &mut [u8], op: &mut F, offset_in_bits: usize)
453where
454    F: FnMut(u64) -> u64,
455{
456    let byte_offset = offset_in_bits / 8;
457    let bit_offset = offset_in_bits % 8;
458
459    // 1. read the first byte from the buffer
460    let first_byte: u8 = buffer[byte_offset];
461
462    // 2. Shift byte by the bit offset, keeping only the relevant bits
463    let relevant_first_byte = first_byte >> bit_offset;
464
465    // 3. run the op on the first byte only
466    let result_first_byte = op(relevant_first_byte as u64) as u8;
467
468    // 4. Shift back the result to the original position
469    let result_first_byte = result_first_byte << bit_offset;
470
471    // 5. Mask the bits that are outside the relevant bits in the byte
472    //    so the bits until bit_offset are 1 and the rest are 0
473    let mask_for_first_bit_offset = (1 << bit_offset) - 1;
474
475    let result_first_byte =
476        (first_byte & mask_for_first_bit_offset) | (result_first_byte & !mask_for_first_bit_offset);
477
478    // 6. write back the result to the buffer
479    buffer[byte_offset] = result_first_byte;
480}
481
482/// Centralized structure to handle a mutable u8 slice as a mutable u64 pointer.
483///
484/// Handle the following:
485/// 1. the lifetime is correct
486/// 2. we read/write within the bounds
487/// 3. We read and write using unaligned
488///
489/// This does not deallocate the underlying pointer when dropped
490///
491/// This is the only place that uses unsafe code to read and write unaligned
492///
493struct U64UnalignedSlice<'a> {
494    /// Pointer to the start of the u64 data
495    ///
496    /// We are using raw pointer as the data came from a u8 slice so we need to read and write unaligned
497    ptr: *mut u64,
498
499    /// Number of u64 elements
500    len: usize,
501
502    /// Marker to tie the lifetime of the pointer to the lifetime of the u8 slice
503    _marker: std::marker::PhantomData<&'a u8>,
504}
505
506impl<'a> U64UnalignedSlice<'a> {
507    /// Create a new [`U64UnalignedSlice`] from a `&mut [u8]` buffer
508    ///
509    /// return the [`U64UnalignedSlice`] and slice of bytes that are not part of the u64 chunks (guaranteed to be less than 8 bytes)
510    ///
511    fn split(
512        buffer: &'a mut [u8],
513        offset_in_bits: usize,
514        len_in_bits: usize,
515    ) -> (Self, &'a mut [u8]) {
516        // 1. Prepare the buffers
517        let left_buffer_mut: &mut [u8] = {
518            let last_offset = self::ceil(offset_in_bits + len_in_bits, 8);
519            assert!(last_offset <= buffer.len());
520
521            let byte_offset = offset_in_bits / 8;
522
523            &mut buffer[byte_offset..last_offset]
524        };
525
526        let number_of_u64_we_can_fit = len_in_bits / (u64::BITS as usize);
527
528        // 2. Split
529        let u64_len_in_bytes = number_of_u64_we_can_fit * size_of::<u64>();
530
531        assert!(u64_len_in_bytes <= left_buffer_mut.len());
532        let (bytes_for_u64, remainder) = left_buffer_mut.split_at_mut(u64_len_in_bytes);
533
534        let ptr = bytes_for_u64.as_mut_ptr() as *mut u64;
535
536        let this = Self {
537            ptr,
538            len: number_of_u64_we_can_fit,
539            _marker: std::marker::PhantomData,
540        };
541
542        (this, remainder)
543    }
544
545    fn len(&self) -> usize {
546        self.len
547    }
548
549    /// Modify the underlying u64 data in place using a binary operation
550    /// with another iterator.
551    fn zip_modify(
552        mut self,
553        mut zip_iter: impl ExactSizeIterator<Item = u64>,
554        mut map: impl FnMut(u64, u64) -> u64,
555    ) {
556        assert_eq!(self.len, zip_iter.len());
557
558        // In order to avoid advancing the pointer at the end of the loop which will
559        // make the last pointer invalid, we handle the first element outside the loop
560        // and then advance the pointer at the start of the loop
561        // making sure that the iterator is not empty
562        if let Some(right) = zip_iter.next() {
563            // SAFETY: We asserted that the iterator length and the current length are the same
564            // and the iterator is not empty, so the pointer is valid
565            unsafe {
566                self.apply_bin_op(right, &mut map);
567            }
568
569            // Because this consumes self we don't update the length
570        }
571
572        for right in zip_iter {
573            // Advance the pointer
574            //
575            // SAFETY: We asserted that the iterator length and the current length are the same
576            self.ptr = unsafe { self.ptr.add(1) };
577
578            // SAFETY: the pointer is valid as we are within the length
579            unsafe {
580                self.apply_bin_op(right, &mut map);
581            }
582
583            // Because this consumes self we don't update the length
584        }
585    }
586
587    /// Centralized function to correctly read the current u64 value and write back the result
588    ///
589    /// # SAFETY
590    /// the caller must ensure that the pointer is valid for reads and writes
591    ///
592    #[inline]
593    unsafe fn apply_bin_op(&mut self, right: u64, mut map: impl FnMut(u64, u64) -> u64) {
594        // SAFETY: The constructor ensures the pointer is valid,
595        // and as to all modifications in U64UnalignedSlice
596        let current_input = unsafe {
597            self.ptr
598                // Reading unaligned as we came from u8 slice
599                .read_unaligned()
600                // bit-packed buffers are stored starting with the least-significant byte first
601                // so when reading as u64 on a big-endian machine, the bytes need to be swapped
602                .to_le()
603        };
604
605        let combined = map(current_input, right);
606
607        // Write the result back
608        //
609        // The pointer came from mutable u8 slice so the pointer is valid for writes,
610        // and we need to write unaligned
611        unsafe { self.ptr.write_unaligned(combined) }
612    }
613
614    /// Modify the underlying u64 data in place using a unary operation.
615    fn apply_unary_op(mut self, mut map: impl FnMut(u64) -> u64) {
616        if self.len == 0 {
617            return;
618        }
619
620        // In order to avoid advancing the pointer at the end of the loop which will
621        // make the last pointer invalid, we handle the first element outside the loop
622        // and then advance the pointer at the start of the loop
623        // making sure that the iterator is not empty
624        unsafe {
625            // I hope the function get inlined and the compiler remove the dead right parameter
626            self.apply_bin_op(0, &mut |left, _| map(left));
627
628            // Because this consumes self we don't update the length
629        }
630
631        for _ in 1..self.len {
632            // Advance the pointer
633            //
634            // SAFETY: we only advance the pointer within the length and not beyond
635            self.ptr = unsafe { self.ptr.add(1) };
636
637            // SAFETY: the pointer is valid as we are within the length
638            unsafe {
639                // I hope the function get inlined and the compiler remove the dead right parameter
640                self.apply_bin_op(0, &mut |left, _| map(left));
641            }
642
643            // Because this consumes self we don't update the length
644        }
645    }
646}
647
648/// Handle remainder bits (< 64 bits) for binary operations.
649///
650/// This function processes the bits that don't form a complete u64 chunk,
651/// ensuring that bits outside the operation range are preserved.
652///
653/// # Arguments
654///
655/// * `op` - Binary operation to apply
656/// * `start_remainder_mut_slice` - slice to the start of remainder bytes
657///   the length must be equal to `ceil(remainder_len, 8)`
658/// * `right_remainder_bits` - Right operand bits
659/// * `remainder_len` - Number of remainder bits
660#[inline]
661fn handle_mutable_buffer_remainder<F>(
662    op: &mut F,
663    start_remainder_mut_slice: &mut [u8],
664    right_remainder_bits: u64,
665    remainder_len: usize,
666) where
667    F: FnMut(u64, u64) -> u64,
668{
669    // Only read from slice the number of remainder bits
670    let left_remainder_bits = get_remainder_bits(start_remainder_mut_slice, remainder_len);
671
672    // Apply the operation
673    let rem = op(left_remainder_bits, right_remainder_bits);
674
675    // Write only the relevant bits back the result to the mutable slice
676    set_remainder_bits(start_remainder_mut_slice, rem, remainder_len);
677}
678
679/// Write remainder bits back to buffer while preserving bits outside the range.
680///
681/// This function carefully updates only the specified bits, leaving all other
682/// bits in the affected bytes unchanged.
683///
684/// # Arguments
685///
686/// * `start_remainder_mut_slice` - the slice of bytes to write the remainder bits to,
687///   the length must be equal to `ceil(remainder_len, 8)`
688/// * `rem` - The result bits to write
689/// * `remainder_len` - Number of bits to write
690#[inline]
691fn set_remainder_bits(start_remainder_mut_slice: &mut [u8], rem: u64, remainder_len: usize) {
692    assert_ne!(
693        start_remainder_mut_slice.len(),
694        0,
695        "start_remainder_mut_slice must not be empty"
696    );
697    assert!(remainder_len < 64, "remainder_len must be less than 64");
698
699    // This assertion is to make sure that the last byte in the slice is the boundary byte
700    // (i.e., the byte that contains both remainder bits and bits outside the remainder)
701    assert_eq!(
702        start_remainder_mut_slice.len(),
703        self::ceil(remainder_len, 8),
704        "start_remainder_mut_slice length must be equal to ceil(remainder_len, 8)"
705    );
706
707    // Need to update the remainder bytes in the mutable buffer
708    // but not override the bits outside the remainder
709
710    // Update `rem` end with the current bytes in the mutable buffer
711    // to preserve the bits outside the remainder
712    let rem = {
713        // 1. Read the byte that we will override
714        //    we only read the last byte as we verified that start_remainder_mut_slice length is
715        //    equal to ceil(remainder_len, 8), which means the last byte is the boundary byte
716        //    containing both remainder bits and bits outside the remainder
717        let current = start_remainder_mut_slice
718            .last()
719            // Unwrap as we already validated the slice is not empty
720            .unwrap();
721
722        let current = *current as u64;
723
724        // Mask where the bits that are inside the remainder are 1
725        // and the bits outside the remainder are 0
726        let inside_remainder_mask = (1 << remainder_len) - 1;
727        // Mask where the bits that are outside the remainder are 1
728        // and the bits inside the remainder are 0
729        let outside_remainder_mask = !inside_remainder_mask;
730
731        // 2. Only keep the bits that are outside the remainder for the value from the mutable buffer
732        let current = current & outside_remainder_mask;
733
734        // 3. Only keep the bits that are inside the remainder for the value from the operation
735        let rem = rem & inside_remainder_mask;
736
737        // 4. Combine the two values
738        current | rem
739    };
740
741    // Write back the result to the mutable slice
742    {
743        let remainder_bytes = self::ceil(remainder_len, 8);
744
745        // we are counting starting from the least significant bit, so to_le_bytes should be correct
746        let rem = &rem.to_le_bytes()[0..remainder_bytes];
747
748        // this assumes that `[ToByteSlice]` can be copied directly
749        // without calling `to_byte_slice` for each element,
750        // which is correct for all ArrowNativeType implementations including u64.
751        let src = rem.as_ptr();
752        unsafe {
753            std::ptr::copy_nonoverlapping(
754                src,
755                start_remainder_mut_slice.as_mut_ptr(),
756                remainder_bytes,
757            )
758        };
759    }
760}
761
762/// Read remainder bits from a slice.
763///
764/// Reads the specified number of bits from slice and returns them as a u64.
765///
766/// # Arguments
767///
768/// * `remainder` - slice to the start of the bits
769/// * `remainder_len` - Number of bits to read (must be < 64)
770///
771/// # Returns
772///
773/// A u64 containing the bits in the least significant positions
774#[inline]
775fn get_remainder_bits(remainder: &[u8], remainder_len: usize) -> u64 {
776    assert!(remainder.len() < 64, "remainder_len must be less than 64");
777    assert_eq!(
778        remainder.len(),
779        self::ceil(remainder_len, 8),
780        "remainder and remainder len ceil must be the same"
781    );
782
783    let bits = remainder
784        .iter()
785        .enumerate()
786        .fold(0_u64, |acc, (index, &byte)| {
787            acc | (byte as u64) << (index * 8)
788        });
789
790    bits & ((1 << remainder_len) - 1)
791}
792
793/// Handle remainder bits (< 64 bits) for unary operations.
794///
795/// This function processes the bits that don't form a complete u64 chunk,
796/// ensuring that bits outside the operation range are preserved.
797///
798/// # Arguments
799///
800/// * `op` - Unary operation to apply
801/// * `start_remainder_mut` - Slice of bytes to write the remainder bits to
802/// * `remainder_len` - Number of remainder bits
803#[inline]
804fn handle_mutable_buffer_remainder_unary<F>(
805    op: &mut F,
806    start_remainder_mut: &mut [u8],
807    remainder_len: usize,
808) where
809    F: FnMut(u64) -> u64,
810{
811    // Only read from the slice the number of remainder bits
812    let left_remainder_bits = get_remainder_bits(start_remainder_mut, remainder_len);
813
814    // Apply the operation
815    let rem = op(left_remainder_bits);
816
817    // Write only the relevant bits back the result to the slice
818    set_remainder_bits(start_remainder_mut, rem, remainder_len);
819}
820
821#[cfg(test)]
822mod tests {
823    use std::collections::HashSet;
824
825    use super::*;
826    use crate::bit_iterator::BitIterator;
827    use crate::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
828    use rand::rngs::StdRng;
829    use rand::{Rng, SeedableRng};
830
831    #[test]
832    fn test_round_upto_multiple_of_64() {
833        assert_eq!(0, round_upto_multiple_of_64(0));
834        assert_eq!(64, round_upto_multiple_of_64(1));
835        assert_eq!(64, round_upto_multiple_of_64(63));
836        assert_eq!(64, round_upto_multiple_of_64(64));
837        assert_eq!(128, round_upto_multiple_of_64(65));
838        assert_eq!(192, round_upto_multiple_of_64(129));
839    }
840
841    #[test]
842    #[should_panic(expected = "failed to round upto multiple of 64")]
843    fn test_round_upto_multiple_of_64_panic() {
844        let _ = round_upto_multiple_of_64(usize::MAX);
845    }
846
847    #[test]
848    #[should_panic(expected = "failed to round to next highest power of 2")]
849    fn test_round_upto_panic() {
850        let _ = round_upto_power_of_2(usize::MAX, 2);
851    }
852
853    #[test]
854    fn test_get_bit() {
855        // 00001101
856        assert!(get_bit(&[0b00001101], 0));
857        assert!(!get_bit(&[0b00001101], 1));
858        assert!(get_bit(&[0b00001101], 2));
859        assert!(get_bit(&[0b00001101], 3));
860
861        // 01001001 01010010
862        assert!(get_bit(&[0b01001001, 0b01010010], 0));
863        assert!(!get_bit(&[0b01001001, 0b01010010], 1));
864        assert!(!get_bit(&[0b01001001, 0b01010010], 2));
865        assert!(get_bit(&[0b01001001, 0b01010010], 3));
866        assert!(!get_bit(&[0b01001001, 0b01010010], 4));
867        assert!(!get_bit(&[0b01001001, 0b01010010], 5));
868        assert!(get_bit(&[0b01001001, 0b01010010], 6));
869        assert!(!get_bit(&[0b01001001, 0b01010010], 7));
870        assert!(!get_bit(&[0b01001001, 0b01010010], 8));
871        assert!(get_bit(&[0b01001001, 0b01010010], 9));
872        assert!(!get_bit(&[0b01001001, 0b01010010], 10));
873        assert!(!get_bit(&[0b01001001, 0b01010010], 11));
874        assert!(get_bit(&[0b01001001, 0b01010010], 12));
875        assert!(!get_bit(&[0b01001001, 0b01010010], 13));
876        assert!(get_bit(&[0b01001001, 0b01010010], 14));
877        assert!(!get_bit(&[0b01001001, 0b01010010], 15));
878    }
879
880    pub fn seedable_rng() -> StdRng {
881        StdRng::seed_from_u64(42)
882    }
883
884    #[test]
885    fn test_get_bit_raw() {
886        const NUM_BYTE: usize = 10;
887        let mut buf = [0; NUM_BYTE];
888        let mut expected = vec![];
889        let mut rng = seedable_rng();
890        for i in 0..8 * NUM_BYTE {
891            let b = rng.random_bool(0.5);
892            expected.push(b);
893            if b {
894                set_bit(&mut buf[..], i)
895            }
896        }
897
898        let raw_ptr = buf.as_ptr();
899        for (i, b) in expected.iter().enumerate() {
900            unsafe {
901                assert_eq!(*b, get_bit_raw(raw_ptr, i));
902            }
903        }
904    }
905
906    #[test]
907    fn test_set_bit() {
908        let mut b = [0b00000010];
909        set_bit(&mut b, 0);
910        assert_eq!([0b00000011], b);
911        set_bit(&mut b, 1);
912        assert_eq!([0b00000011], b);
913        set_bit(&mut b, 7);
914        assert_eq!([0b10000011], b);
915    }
916
917    #[test]
918    fn test_unset_bit() {
919        let mut b = [0b11111101];
920        unset_bit(&mut b, 0);
921        assert_eq!([0b11111100], b);
922        unset_bit(&mut b, 1);
923        assert_eq!([0b11111100], b);
924        unset_bit(&mut b, 7);
925        assert_eq!([0b01111100], b);
926    }
927
928    #[test]
929    fn test_set_bit_raw() {
930        const NUM_BYTE: usize = 10;
931        let mut buf = vec![0; NUM_BYTE];
932        let mut expected = vec![];
933        let mut rng = seedable_rng();
934        for i in 0..8 * NUM_BYTE {
935            let b = rng.random_bool(0.5);
936            expected.push(b);
937            if b {
938                unsafe {
939                    set_bit_raw(buf.as_mut_ptr(), i);
940                }
941            }
942        }
943
944        let raw_ptr = buf.as_ptr();
945        for (i, b) in expected.iter().enumerate() {
946            unsafe {
947                assert_eq!(*b, get_bit_raw(raw_ptr, i));
948            }
949        }
950    }
951
952    #[test]
953    fn test_unset_bit_raw() {
954        const NUM_BYTE: usize = 10;
955        let mut buf = vec![255; NUM_BYTE];
956        let mut expected = vec![];
957        let mut rng = seedable_rng();
958        for i in 0..8 * NUM_BYTE {
959            let b = rng.random_bool(0.5);
960            expected.push(b);
961            if !b {
962                unsafe {
963                    unset_bit_raw(buf.as_mut_ptr(), i);
964                }
965            }
966        }
967
968        let raw_ptr = buf.as_ptr();
969        for (i, b) in expected.iter().enumerate() {
970            unsafe {
971                assert_eq!(*b, get_bit_raw(raw_ptr, i));
972            }
973        }
974    }
975
976    #[test]
977    fn test_get_set_bit_roundtrip() {
978        const NUM_BYTES: usize = 10;
979        const NUM_SETS: usize = 10;
980
981        let mut buffer: [u8; NUM_BYTES * 8] = [0; NUM_BYTES * 8];
982        let mut v = HashSet::new();
983        let mut rng = seedable_rng();
984        for _ in 0..NUM_SETS {
985            let offset = rng.random_range(0..8 * NUM_BYTES);
986            v.insert(offset);
987            set_bit(&mut buffer[..], offset);
988        }
989        for i in 0..NUM_BYTES * 8 {
990            assert_eq!(v.contains(&i), get_bit(&buffer[..], i));
991        }
992    }
993
994    #[test]
995    fn test_ceil() {
996        assert_eq!(ceil(0, 1), 0);
997        assert_eq!(ceil(1, 1), 1);
998        assert_eq!(ceil(1, 2), 1);
999        assert_eq!(ceil(1, 8), 1);
1000        assert_eq!(ceil(7, 8), 1);
1001        assert_eq!(ceil(8, 8), 1);
1002        assert_eq!(ceil(9, 8), 2);
1003        assert_eq!(ceil(9, 9), 1);
1004        assert_eq!(ceil(10000000000, 10), 1000000000);
1005        assert_eq!(ceil(10, 10000000000), 1);
1006        assert_eq!(ceil(10000000000, 1000000000), 10);
1007    }
1008
1009    #[test]
1010    fn test_read_up_to() {
1011        let all_ones = &[0b10111001, 0b10001100];
1012
1013        for (bit_offset, expected) in [
1014            (0, 0b00000001),
1015            (1, 0b00000000),
1016            (2, 0b00000000),
1017            (3, 0b00000001),
1018            (4, 0b00000001),
1019            (5, 0b00000001),
1020            (6, 0b00000000),
1021            (7, 0b00000001),
1022        ] {
1023            let result = read_up_to_byte_from_offset(all_ones, 1, bit_offset);
1024            assert_eq!(
1025                result, expected,
1026                "failed at bit_offset {bit_offset}. result, expected:\n{result:08b}\n{expected:08b}"
1027            );
1028        }
1029
1030        for (bit_offset, expected) in [
1031            (0, 0b00000001),
1032            (1, 0b00000000),
1033            (2, 0b00000010),
1034            (3, 0b00000011),
1035            (4, 0b00000011),
1036            (5, 0b00000001),
1037            (6, 0b00000010),
1038            (7, 0b00000001),
1039        ] {
1040            let result = read_up_to_byte_from_offset(all_ones, 2, bit_offset);
1041            assert_eq!(
1042                result, expected,
1043                "failed at bit_offset {bit_offset}. result, expected:\n{result:08b}\n{expected:08b}"
1044            );
1045        }
1046
1047        for (bit_offset, expected) in [
1048            (0, 0b00111001),
1049            (1, 0b00011100),
1050            (2, 0b00101110),
1051            (3, 0b00010111),
1052            (4, 0b00001011),
1053            (5, 0b00100101),
1054            (6, 0b00110010),
1055            (7, 0b00011001),
1056        ] {
1057            let result = read_up_to_byte_from_offset(all_ones, 6, bit_offset);
1058            assert_eq!(
1059                result, expected,
1060                "failed at bit_offset {bit_offset}. result, expected:\n{result:08b}\n{expected:08b}"
1061            );
1062        }
1063
1064        for (bit_offset, expected) in [
1065            (0, 0b00111001),
1066            (1, 0b01011100),
1067            (2, 0b00101110),
1068            (3, 0b00010111),
1069            (4, 0b01001011),
1070            (5, 0b01100101),
1071            (6, 0b00110010),
1072            (7, 0b00011001),
1073        ] {
1074            let result = read_up_to_byte_from_offset(all_ones, 7, bit_offset);
1075            assert_eq!(
1076                result, expected,
1077                "failed at bit_offset {bit_offset}. result, expected:\n{result:08b}\n{expected:08b}"
1078            );
1079        }
1080    }
1081
1082    /// Verifies that a unary operation applied to a buffer using u64 chunks
1083    /// is the same as applying the operation bit by bit.
1084    fn test_mutable_buffer_bin_op_helper<F, G>(
1085        left_data: &[bool],
1086        right_data: &[bool],
1087        left_offset_in_bits: usize,
1088        right_offset_in_bits: usize,
1089        len_in_bits: usize,
1090        op: F,
1091        mut expected_op: G,
1092    ) where
1093        F: FnMut(u64, u64) -> u64,
1094        G: FnMut(bool, bool) -> bool,
1095    {
1096        let mut left_buffer = BooleanBufferBuilder::new(len_in_bits);
1097        left_buffer.append_slice(left_data);
1098        let right_buffer = BooleanBuffer::from(right_data);
1099
1100        let expected: Vec<bool> = left_data
1101            .iter()
1102            .skip(left_offset_in_bits)
1103            .zip(right_data.iter().skip(right_offset_in_bits))
1104            .take(len_in_bits)
1105            .map(|(l, r)| expected_op(*l, *r))
1106            .collect();
1107
1108        apply_bitwise_binary_op(
1109            left_buffer.as_slice_mut(),
1110            left_offset_in_bits,
1111            right_buffer.inner(),
1112            right_offset_in_bits,
1113            len_in_bits,
1114            op,
1115        );
1116
1117        let result: Vec<bool> =
1118            BitIterator::new(left_buffer.as_slice(), left_offset_in_bits, len_in_bits).collect();
1119
1120        assert_eq!(
1121            result, expected,
1122            "Failed with left_offset={}, right_offset={}, len={}",
1123            left_offset_in_bits, right_offset_in_bits, len_in_bits
1124        );
1125    }
1126
1127    /// Verifies that a unary operation applied to a buffer using u64 chunks
1128    /// is the same as applying the operation bit by bit.
1129    fn test_mutable_buffer_unary_op_helper<F, G>(
1130        data: &[bool],
1131        offset_in_bits: usize,
1132        len_in_bits: usize,
1133        op: F,
1134        mut expected_op: G,
1135    ) where
1136        F: FnMut(u64) -> u64,
1137        G: FnMut(bool) -> bool,
1138    {
1139        let mut buffer = BooleanBufferBuilder::new(len_in_bits);
1140        buffer.append_slice(data);
1141
1142        let expected: Vec<bool> = data
1143            .iter()
1144            .skip(offset_in_bits)
1145            .take(len_in_bits)
1146            .map(|b| expected_op(*b))
1147            .collect();
1148
1149        apply_bitwise_unary_op(buffer.as_slice_mut(), offset_in_bits, len_in_bits, op);
1150
1151        let result: Vec<bool> =
1152            BitIterator::new(buffer.as_slice(), offset_in_bits, len_in_bits).collect();
1153
1154        assert_eq!(
1155            result, expected,
1156            "Failed with offset={}, len={}",
1157            offset_in_bits, len_in_bits
1158        );
1159    }
1160
1161    // Helper to create test data of specific length
1162    fn create_test_data(len: usize) -> (Vec<bool>, Vec<bool>) {
1163        let mut rng = rand::rng();
1164        let left: Vec<bool> = (0..len).map(|_| rng.random_bool(0.5)).collect();
1165        let right: Vec<bool> = (0..len).map(|_| rng.random_bool(0.5)).collect();
1166        (left, right)
1167    }
1168
1169    /// Test all binary operations (AND, OR, XOR) with the given parameters
1170    fn test_all_binary_ops(
1171        left_data: &[bool],
1172        right_data: &[bool],
1173        left_offset_in_bits: usize,
1174        right_offset_in_bits: usize,
1175        len_in_bits: usize,
1176    ) {
1177        // Test AND
1178        test_mutable_buffer_bin_op_helper(
1179            left_data,
1180            right_data,
1181            left_offset_in_bits,
1182            right_offset_in_bits,
1183            len_in_bits,
1184            |a, b| a & b,
1185            |a, b| a & b,
1186        );
1187
1188        // Test OR
1189        test_mutable_buffer_bin_op_helper(
1190            left_data,
1191            right_data,
1192            left_offset_in_bits,
1193            right_offset_in_bits,
1194            len_in_bits,
1195            |a, b| a | b,
1196            |a, b| a | b,
1197        );
1198
1199        // Test XOR
1200        test_mutable_buffer_bin_op_helper(
1201            left_data,
1202            right_data,
1203            left_offset_in_bits,
1204            right_offset_in_bits,
1205            len_in_bits,
1206            |a, b| a ^ b,
1207            |a, b| a ^ b,
1208        );
1209    }
1210
1211    // ===== Combined Binary Operation Tests =====
1212
1213    #[test]
1214    fn test_binary_ops_less_than_byte() {
1215        let (left, right) = create_test_data(4);
1216        test_all_binary_ops(&left, &right, 0, 0, 4);
1217    }
1218
1219    #[test]
1220    fn test_binary_ops_less_than_byte_across_boundary() {
1221        let (left, right) = create_test_data(16);
1222        test_all_binary_ops(&left, &right, 6, 6, 4);
1223    }
1224
1225    #[test]
1226    fn test_binary_ops_exactly_byte() {
1227        let (left, right) = create_test_data(16);
1228        test_all_binary_ops(&left, &right, 0, 0, 8);
1229    }
1230
1231    #[test]
1232    fn test_binary_ops_more_than_byte_less_than_u64() {
1233        let (left, right) = create_test_data(64);
1234        test_all_binary_ops(&left, &right, 0, 0, 32);
1235    }
1236
1237    #[test]
1238    fn test_binary_ops_exactly_u64() {
1239        let (left, right) = create_test_data(180);
1240        test_all_binary_ops(&left, &right, 0, 0, 64);
1241        test_all_binary_ops(&left, &right, 64, 9, 64);
1242        test_all_binary_ops(&left, &right, 8, 100, 64);
1243        test_all_binary_ops(&left, &right, 1, 15, 64);
1244        test_all_binary_ops(&left, &right, 12, 10, 64);
1245        test_all_binary_ops(&left, &right, 180 - 64, 2, 64);
1246    }
1247
1248    #[test]
1249    fn test_binary_ops_more_than_u64_not_multiple() {
1250        let (left, right) = create_test_data(200);
1251        test_all_binary_ops(&left, &right, 0, 0, 100);
1252    }
1253
1254    #[test]
1255    fn test_binary_ops_exactly_multiple_u64() {
1256        let (left, right) = create_test_data(256);
1257        test_all_binary_ops(&left, &right, 0, 0, 128);
1258    }
1259
1260    #[test]
1261    fn test_binary_ops_more_than_multiple_u64() {
1262        let (left, right) = create_test_data(300);
1263        test_all_binary_ops(&left, &right, 0, 0, 200);
1264    }
1265
1266    #[test]
1267    fn test_binary_ops_byte_aligned_no_remainder() {
1268        let (left, right) = create_test_data(200);
1269        test_all_binary_ops(&left, &right, 0, 0, 128);
1270    }
1271
1272    #[test]
1273    fn test_binary_ops_byte_aligned_with_remainder() {
1274        let (left, right) = create_test_data(200);
1275        test_all_binary_ops(&left, &right, 0, 0, 100);
1276    }
1277
1278    #[test]
1279    fn test_binary_ops_not_byte_aligned_no_remainder() {
1280        let (left, right) = create_test_data(200);
1281        test_all_binary_ops(&left, &right, 3, 3, 128);
1282    }
1283
1284    #[test]
1285    fn test_binary_ops_not_byte_aligned_with_remainder() {
1286        let (left, right) = create_test_data(200);
1287        test_all_binary_ops(&left, &right, 5, 5, 100);
1288    }
1289
1290    #[test]
1291    fn test_binary_ops_different_offsets() {
1292        let (left, right) = create_test_data(200);
1293        test_all_binary_ops(&left, &right, 3, 7, 50);
1294    }
1295
1296    #[test]
1297    fn test_binary_ops_offsets_greater_than_8_less_than_64() {
1298        let (left, right) = create_test_data(200);
1299        test_all_binary_ops(&left, &right, 13, 27, 100);
1300    }
1301
1302    // ===== NOT (Unary) Operation Tests =====
1303
1304    #[test]
1305    fn test_not_less_than_byte() {
1306        let data = vec![true, false, true, false];
1307        test_mutable_buffer_unary_op_helper(&data, 0, 4, |a| !a, |a| !a);
1308    }
1309
1310    #[test]
1311    fn test_not_less_than_byte_across_boundary() {
1312        let data: Vec<bool> = (0..16).map(|i| i % 2 == 0).collect();
1313        test_mutable_buffer_unary_op_helper(&data, 6, 4, |a| !a, |a| !a);
1314    }
1315
1316    #[test]
1317    fn test_not_exactly_byte() {
1318        let data: Vec<bool> = (0..16).map(|i| i % 2 == 0).collect();
1319        test_mutable_buffer_unary_op_helper(&data, 0, 8, |a| !a, |a| !a);
1320    }
1321
1322    #[test]
1323    fn test_not_more_than_byte_less_than_u64() {
1324        let data: Vec<bool> = (0..64).map(|i| i % 2 == 0).collect();
1325        test_mutable_buffer_unary_op_helper(&data, 0, 32, |a| !a, |a| !a);
1326    }
1327
1328    #[test]
1329    fn test_not_exactly_u64() {
1330        let data: Vec<bool> = (0..128).map(|i| i % 2 == 0).collect();
1331        test_mutable_buffer_unary_op_helper(&data, 0, 64, |a| !a, |a| !a);
1332    }
1333
1334    #[test]
1335    fn test_not_more_than_u64_not_multiple() {
1336        let data: Vec<bool> = (0..200).map(|i| i % 2 == 0).collect();
1337        test_mutable_buffer_unary_op_helper(&data, 0, 100, |a| !a, |a| !a);
1338    }
1339
1340    #[test]
1341    fn test_not_exactly_multiple_u64() {
1342        let data: Vec<bool> = (0..256).map(|i| i % 2 == 0).collect();
1343        test_mutable_buffer_unary_op_helper(&data, 0, 128, |a| !a, |a| !a);
1344    }
1345
1346    #[test]
1347    fn test_not_more_than_multiple_u64() {
1348        let data: Vec<bool> = (0..300).map(|i| i % 2 == 0).collect();
1349        test_mutable_buffer_unary_op_helper(&data, 0, 200, |a| !a, |a| !a);
1350    }
1351
1352    #[test]
1353    fn test_not_byte_aligned_no_remainder() {
1354        let data: Vec<bool> = (0..200).map(|i| i % 2 == 0).collect();
1355        test_mutable_buffer_unary_op_helper(&data, 0, 128, |a| !a, |a| !a);
1356    }
1357
1358    #[test]
1359    fn test_not_byte_aligned_with_remainder() {
1360        let data: Vec<bool> = (0..200).map(|i| i % 2 == 0).collect();
1361        test_mutable_buffer_unary_op_helper(&data, 0, 100, |a| !a, |a| !a);
1362    }
1363
1364    #[test]
1365    fn test_not_not_byte_aligned_no_remainder() {
1366        let data: Vec<bool> = (0..200).map(|i| i % 2 == 0).collect();
1367        test_mutable_buffer_unary_op_helper(&data, 3, 128, |a| !a, |a| !a);
1368    }
1369
1370    #[test]
1371    fn test_not_not_byte_aligned_with_remainder() {
1372        let data: Vec<bool> = (0..200).map(|i| i % 2 == 0).collect();
1373        test_mutable_buffer_unary_op_helper(&data, 5, 100, |a| !a, |a| !a);
1374    }
1375
1376    // ===== Edge Cases =====
1377
1378    #[test]
1379    fn test_empty_length() {
1380        let (left, right) = create_test_data(16);
1381        test_all_binary_ops(&left, &right, 0, 0, 0);
1382    }
1383
1384    #[test]
1385    fn test_single_bit() {
1386        let (left, right) = create_test_data(16);
1387        test_all_binary_ops(&left, &right, 0, 0, 1);
1388    }
1389
1390    #[test]
1391    fn test_single_bit_at_offset() {
1392        let (left, right) = create_test_data(16);
1393        test_all_binary_ops(&left, &right, 7, 7, 1);
1394    }
1395
1396    #[test]
1397    fn test_not_single_bit() {
1398        let data = vec![true, false, true, false];
1399        test_mutable_buffer_unary_op_helper(&data, 0, 1, |a| !a, |a| !a);
1400    }
1401
1402    #[test]
1403    fn test_not_empty_length() {
1404        let data = vec![true, false, true, false];
1405        test_mutable_buffer_unary_op_helper(&data, 0, 0, |a| !a, |a| !a);
1406    }
1407
1408    #[test]
1409    fn test_less_than_byte_unaligned_and_not_enough_bits() {
1410        let left_offset_in_bits = 2;
1411        let right_offset_in_bits = 4;
1412        let len_in_bits = 1;
1413
1414        // Single byte
1415        let right = (0..8).map(|i| (i / 2) % 2 == 0).collect::<Vec<_>>();
1416        // less than a byte
1417        let left = (0..3).map(|i| i % 2 == 0).collect::<Vec<_>>();
1418        test_all_binary_ops(
1419            &left,
1420            &right,
1421            left_offset_in_bits,
1422            right_offset_in_bits,
1423            len_in_bits,
1424        );
1425    }
1426
1427    #[test]
1428    fn test_bitwise_binary_op_offset_out_of_bounds() {
1429        let input = vec![0b10101010u8, 0b01010101u8];
1430        let mut buffer = MutableBuffer::new(2); // space for 16 bits
1431        buffer.extend_from_slice(&input); // only 2 bytes
1432        apply_bitwise_binary_op(
1433            buffer.as_slice_mut(),
1434            100, // exceeds buffer length, becomes a noop
1435            [0b11110000u8, 0b00001111u8],
1436            0,
1437            0,
1438            |a, b| a & b,
1439        );
1440        assert_eq!(buffer.as_slice(), &input);
1441    }
1442
1443    #[test]
1444    #[should_panic(expected = "assertion failed: last_offset <= buffer.len()")]
1445    fn test_bitwise_binary_op_length_out_of_bounds() {
1446        let mut buffer = MutableBuffer::new(2); // space for 16 bits
1447        buffer.extend_from_slice(&[0b10101010u8, 0b01010101u8]); // only 2 bytes
1448        apply_bitwise_binary_op(
1449            buffer.as_slice_mut(),
1450            0, // exceeds buffer length
1451            [0b11110000u8, 0b00001111u8],
1452            0,
1453            100,
1454            |a, b| a & b,
1455        );
1456        assert_eq!(buffer.as_slice(), &[0b10101010u8, 0b01010101u8]);
1457    }
1458
1459    #[test]
1460    #[should_panic(expected = "offset + len out of bounds")]
1461    fn test_bitwise_binary_op_right_len_out_of_bounds() {
1462        let mut buffer = MutableBuffer::new(2); // space for 16 bits
1463        buffer.extend_from_slice(&[0b10101010u8, 0b01010101u8]); // only 2 bytes
1464        apply_bitwise_binary_op(
1465            buffer.as_slice_mut(),
1466            0, // exceeds buffer length
1467            [0b11110000u8, 0b00001111u8],
1468            1000,
1469            16,
1470            |a, b| a & b,
1471        );
1472        assert_eq!(buffer.as_slice(), &[0b10101010u8, 0b01010101u8]);
1473    }
1474
1475    #[test]
1476    #[should_panic(expected = "the len is 2 but the index is 12")]
1477    fn test_bitwise_unary_op_offset_out_of_bounds() {
1478        let input = vec![0b10101010u8, 0b01010101u8];
1479        let mut buffer = MutableBuffer::new(2); // space for 16 bits
1480        buffer.extend_from_slice(&input); // only 2 bytes
1481        apply_bitwise_unary_op(
1482            buffer.as_slice_mut(),
1483            100, // exceeds buffer length, becomes a noop
1484            8,
1485            |a| !a,
1486        );
1487        assert_eq!(buffer.as_slice(), &input);
1488    }
1489
1490    #[test]
1491    #[should_panic(expected = "assertion failed: last_offset <= buffer.len()")]
1492    fn test_bitwise_unary_op_length_out_of_bounds2() {
1493        let input = vec![0b10101010u8, 0b01010101u8];
1494        let mut buffer = MutableBuffer::new(2); // space for 16 bits
1495        buffer.extend_from_slice(&input); // only 2 bytes
1496        apply_bitwise_unary_op(
1497            buffer.as_slice_mut(),
1498            3,   // start at bit 3, to exercise different path
1499            100, // exceeds buffer length
1500            |a| !a,
1501        );
1502        assert_eq!(buffer.as_slice(), &input);
1503    }
1504}