simd_kernels/kernels/bitmask/
mod.rs

1// Copyright Peter Bower 2025. All Rights Reserved.
2// Licensed under Mozilla Public License (MPL) 2.0.
3
4//! # **Bitmask Kernels Module** - *High-Performance Null-Aware Bitmask Operations*
5//!
6//! SIMD-optimised bitmask operations for Arrow-compatible nullable array processing with efficient null handling.
7//!
8//! ## Overview
9//! 
10//! This module provides the foundational bitmask operations that enable null-aware and bit-packed boolean computing 
11//! throughout the minarrow ecosystem, but can be applied to any bitmasking contenxt. 
12//! These kernels handle bitwise logical operations, set membership tests, equality comparisons,
13//! and population counts on Arrow-format bitmasks with optimal performance characteristics.
14//!
15//! ## Architecture
16//! 
17//! The bitmask module follows a three-tier architecture:
18//! - **Dispatch layer**: Smart runtime selection between SIMD and scalar implementations
19//! - **SIMD kernels**: Vectorised implementations using `std::simd` with portable lane counts
20//! - **Scalar kernels**: High-performance but non-SIMD fallback implementations for compatibility
21//!
22//! ## Modules
23//! - **`dispatch`**: Runtime dispatch layer selecting SIMD vs scalar implementations based on feature flags
24//! - **`simd`**: SIMD-accelerated implementations using vectorised bitwise operations with configurable lane counts
25//! - **`std`**: Scalar fallback implementations for word-level operations on 64-bit boundaries
26//!
27//! ## Core Operations
28//! 
29//! ### **Logical Operations**
30//! - **`and_masks`**: Bitwise AND across two bitmasks for intersection operations
31//! - **`or_masks`**: Bitwise OR across two bitmasks for union operations  
32//! - **`xor_masks`**: Bitwise XOR across two bitmasks for symmetric difference
33//! - **`not_mask`**: Bitwise NOT for complement operations
34//!
35//! ### **Set Membership**
36//! - **`in_mask`**: Set inclusion tests - output bits indicate membership of LHS values in RHS set
37//! - **`not_in_mask`**: Set exclusion tests - complement of inclusion operations
38//!
39//! ### **Equality Testing**
40//! - **`eq_mask`**: Element-wise equality comparison producing result bitmask
41//! - **`ne_mask`**: Element-wise inequality comparison producing result bitmask
42//! - **`all_eq`**: Bulk equality test across entire bitmask windows
43//! - **`all_ne`**: Bulk inequality test across entire bitmask windows
44//!
45//! ### **Population Analysis**
46//! - **`popcount_mask`**: Fast population count (number of set bits) using SIMD reduction
47//! - **`all_true_mask`**: Test if all bits in bitmask are set to 1
48//! - **`all_false_mask`**: Test if all bits in bitmask are set to 0
49//!
50//! ## Arrow Compatibility
51//! 
52//! All operations maintain full compatibility with Apache Arrow's bitmask format:
53//! - **LSB bit ordering**: Bit 0 is the least significant bit in each byte
54//! - **Byte-packed storage**: 8 bits per byte with proper alignment handling
55//! - **Trailing bit management**: Automatic masking of unused bits in final bytes
56//! - **64-bit word alignment**: Optimised for modern CPU architectures
57
58pub mod dispatch;
59#[cfg(feature = "simd")]
60pub mod simd;
61#[cfg(not(feature = "simd"))]
62pub mod std;
63
64use core::mem;
65use minarrow::{Bitmask, BitmaskVT};
66
67/// Fundamental word type for bitmask operations on 64-bit architectures.
68///
69/// Defines the basic unit of bitmask storage and manipulation. All bitmask
70/// operations are optimised around 64-bit word boundaries for maximum
71/// performance on modern CPU architectures.
72///
73/// # Architecture Alignment
74/// This type is chosen to align with:
75/// - 64-bit CPU registers: Native register width on x86-64 and AArch64
76/// - Cache line efficiency: Optimal memory access patterns
77/// - SIMD compatibility: Natural alignment for vectorised operations
78pub type Word = u64;
79
80/// Number of bits in a `Word` for bit-level bitmask calculations.
81///
82/// This constant determines the fundamental granularity of bitmask operations,
83/// enabling efficient bit manipulation algorithms that operate on word
84/// boundaries.
85pub const WORD_BITS: usize = mem::size_of::<Word>() * 8;
86
87/// Helper to compute number of u64 words required for bitmask of `len` bits.
88#[inline(always)]
89pub fn words_for(len: usize) -> usize {
90    (len + WORD_BITS - 1) / WORD_BITS
91}
92
93/// Cast &[u8] to &[u64] for word-wise access.
94#[inline(always)]
95pub unsafe fn mask_bits_as_words(bits: &[u8]) -> *const Word {
96    bits.as_ptr() as *const Word
97}
98
99/// Cast &mut [u8] to &mut [u64] for word-wise mutation.
100#[inline(always)]
101pub unsafe fn mask_bits_as_words_mut(bits: &mut [u8]) -> *mut Word {
102    bits.as_mut_ptr() as *mut Word
103}
104
105/// Create zeroed bitmask of length `len` bits.
106#[inline(always)]
107pub fn new_mask(len: usize) -> Bitmask {
108    Bitmask::new_set_all(len, false)
109}
110
111/// Return window into bitmask's bits slice covering offset..offset+len bits.
112#[inline(always)]
113pub fn bitmask_window_bytes(mask: &Bitmask, offset: usize, len: usize) -> &[u8] {
114    let start = offset / 8;
115    let end = (offset + len + 7) / 8;
116    &mask.bits[start..end]
117}
118
119/// Return mutable window into bitmask's bits slice covering offset..offset+len bits.
120/// Enables efficient in-place modification of bitmask regions.
121#[inline(always)]
122pub fn bitmask_window_bytes_mut(mask: &mut Bitmask, offset: usize, len: usize) -> &mut [u8] {
123    let start = offset / 8;
124    let end = (offset + len + 7) / 8;
125    &mut mask.bits[start..end]
126}
127
128/// Zero all slack bits ≥ `bm.len`.
129#[inline(always)]
130pub fn clear_trailing_bits(bm: &mut Bitmask) {
131    if bm.len == 0 {
132        return;
133    }
134    let used = bm.len & 7;
135    if used != 0 {
136        let last = bm.bits.last_mut().unwrap();
137        *last &= (1u8 << used) - 1;
138    }
139}
140
141/// Quick population count of true bits
142#[inline(always)]
143pub fn popcount_bits(m: BitmaskVT<'_>) -> usize {
144    #[cfg(feature = "simd")]
145    {
146        // Use a default SIMD width if not otherwise specified
147        use crate::kernels::bitmask::simd::popcount_mask_simd;
148        popcount_mask_simd::<8>(m)
149    }
150    #[cfg(not(feature = "simd"))]
151    {
152        use crate::kernels::bitmask::std::popcount_mask;
153        popcount_mask(m)
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160    use minarrow::Bitmask;
161
162    #[test]
163    fn test_words_for() {
164        assert_eq!(words_for(0), 0);
165        assert_eq!(words_for(1), 1);
166        assert_eq!(words_for(63), 1);
167        assert_eq!(words_for(64), 1);
168        assert_eq!(words_for(65), 2);
169        assert_eq!(words_for(128), 2);
170        assert_eq!(words_for(129), 3);
171    }
172
173    #[test]
174    fn test_mask_bits_as_words_and_mut() {
175        // Test reading words from bits
176        let mut mask = Bitmask::new_set_all(128, false);
177        // Write known pattern into mask
178        mask.set(0, true);
179        mask.set(63, true);
180        mask.set(64, true);
181        mask.set(127, true);
182        let bits = &mask.bits;
183        unsafe {
184            let words = mask_bits_as_words(bits);
185            assert_eq!(*words, 1u64 | (1u64 << 63));
186            assert_eq!(*words.add(1), 1u64 | (1u64 << 63));
187        }
188
189        // Test writing via mask_bits_as_words_mut
190        let mut mask = Bitmask::new_set_all(128, false);
191        let bits = &mut mask.bits;
192        unsafe {
193            let words_mut = mask_bits_as_words_mut(bits);
194            *words_mut = 0xDEADBEEFDEADBEEF;
195            *words_mut.add(1) = 0xCAFEBABECAFEBABE;
196        }
197        assert_eq!(mask.bits[0], 0xEF);
198        assert_eq!(mask.bits[7], 0xDE);
199        assert_eq!(mask.bits[8], 0xBE);
200        assert_eq!(mask.bits[15], 0xCA);
201    }
202
203    #[test]
204    fn test_bitmask_window_bytes() {
205        let mut mask = Bitmask::new_set_all(24, false);
206        mask.set(0, true);
207        mask.set(7, true);
208        mask.set(8, true);
209        mask.set(15, true);
210        mask.set(16, true);
211        mask.set(23, true);
212        // Window: bytes 1..3 should cover bits 8..24
213        let bytes = bitmask_window_bytes(&mask, 8, 16);
214        assert_eq!(bytes.len(), 2); // 16 bits = 2 bytes
215        assert_eq!(bytes[0], 0b10000001); // bits 8 and 15 set
216        assert_eq!(bytes[1], 0b10000001); // bits 16 and 23 set
217    }
218
219    #[test]
220    fn test_bitmask_window_bytes_mut() {
221        let mut mask = Bitmask::new_set_all(16, false);
222        {
223            let window = bitmask_window_bytes_mut(&mut mask, 0, 16);
224            window[0] = 0xAA;
225            window[1] = 0x55;
226        }
227        assert_eq!(mask.bits[0], 0xAA);
228        assert_eq!(mask.bits[1], 0x55);
229    }
230
231    #[test]
232    fn test_clear_trailing_bits() {
233        let mut mask = Bitmask::new_set_all(10, true);
234        // The last byte should have only the low 2 bits set after clearing trailing bits
235        clear_trailing_bits(&mut mask);
236        // 10 bits => 2 bytes, last byte should have only bits 0 and 1 set (0b00000011 == 0x03)
237        assert_eq!(mask.bits[1], 0x03);
238        // The remaining bits should still be set
239        for i in 0..10 {
240            assert!(mask.get(i));
241        }
242        // Any bits beyond 10 should be cleared
243        for i in 10..16 {
244            assert!(!mask.get(i));
245        }
246    }
247}