simd_kernels/kernels/bitmask/mod.rs
1// Copyright Peter Bower 2025. All Rights Reserved.
2// Licensed under Mozilla Public License (MPL) 2.0.
3
4//! # **Bitmask Kernels Module** - *High-Performance Null-Aware Bitmask Operations*
5//!
6//! SIMD-optimised bitmask operations for Arrow-compatible nullable array processing with efficient null handling.
7//!
8//! ## Overview
9//!
10//! This module provides the foundational bitmask operations that enable null-aware and bit-packed boolean computing
11//! throughout the minarrow ecosystem, but can be applied to any bitmasking contenxt.
12//! These kernels handle bitwise logical operations, set membership tests, equality comparisons,
13//! and population counts on Arrow-format bitmasks with optimal performance characteristics.
14//!
15//! ## Architecture
16//!
17//! The bitmask module follows a three-tier architecture:
18//! - **Dispatch layer**: Smart runtime selection between SIMD and scalar implementations
19//! - **SIMD kernels**: Vectorised implementations using `std::simd` with portable lane counts
20//! - **Scalar kernels**: High-performance but non-SIMD fallback implementations for compatibility
21//!
22//! ## Modules
23//! - **`dispatch`**: Runtime dispatch layer selecting SIMD vs scalar implementations based on feature flags
24//! - **`simd`**: SIMD-accelerated implementations using vectorised bitwise operations with configurable lane counts
25//! - **`std`**: Scalar fallback implementations for word-level operations on 64-bit boundaries
26//!
27//! ## Core Operations
28//!
29//! ### **Logical Operations**
30//! - **`and_masks`**: Bitwise AND across two bitmasks for intersection operations
31//! - **`or_masks`**: Bitwise OR across two bitmasks for union operations
32//! - **`xor_masks`**: Bitwise XOR across two bitmasks for symmetric difference
33//! - **`not_mask`**: Bitwise NOT for complement operations
34//!
35//! ### **Set Membership**
36//! - **`in_mask`**: Set inclusion tests - output bits indicate membership of LHS values in RHS set
37//! - **`not_in_mask`**: Set exclusion tests - complement of inclusion operations
38//!
39//! ### **Equality Testing**
40//! - **`eq_mask`**: Element-wise equality comparison producing result bitmask
41//! - **`ne_mask`**: Element-wise inequality comparison producing result bitmask
42//! - **`all_eq`**: Bulk equality test across entire bitmask windows
43//! - **`all_ne`**: Bulk inequality test across entire bitmask windows
44//!
45//! ### **Population Analysis**
46//! - **`popcount_mask`**: Fast population count (number of set bits) using SIMD reduction
47//! - **`all_true_mask`**: Test if all bits in bitmask are set to 1
48//! - **`all_false_mask`**: Test if all bits in bitmask are set to 0
49//!
50//! ## Arrow Compatibility
51//!
52//! All operations maintain full compatibility with Apache Arrow's bitmask format:
53//! - **LSB bit ordering**: Bit 0 is the least significant bit in each byte
54//! - **Byte-packed storage**: 8 bits per byte with proper alignment handling
55//! - **Trailing bit management**: Automatic masking of unused bits in final bytes
56//! - **64-bit word alignment**: Optimised for modern CPU architectures
57
58pub mod dispatch;
59#[cfg(feature = "simd")]
60pub mod simd;
61#[cfg(not(feature = "simd"))]
62pub mod std;
63
64use core::mem;
65use minarrow::{Bitmask, BitmaskVT};
66
67/// Fundamental word type for bitmask operations on 64-bit architectures.
68///
69/// Defines the basic unit of bitmask storage and manipulation. All bitmask
70/// operations are optimised around 64-bit word boundaries for maximum
71/// performance on modern CPU architectures.
72///
73/// # Architecture Alignment
74/// This type is chosen to align with:
75/// - 64-bit CPU registers: Native register width on x86-64 and AArch64
76/// - Cache line efficiency: Optimal memory access patterns
77/// - SIMD compatibility: Natural alignment for vectorised operations
78pub type Word = u64;
79
80/// Number of bits in a `Word` for bit-level bitmask calculations.
81///
82/// This constant determines the fundamental granularity of bitmask operations,
83/// enabling efficient bit manipulation algorithms that operate on word
84/// boundaries.
85pub const WORD_BITS: usize = mem::size_of::<Word>() * 8;
86
87/// Helper to compute number of u64 words required for bitmask of `len` bits.
88#[inline(always)]
89pub fn words_for(len: usize) -> usize {
90 (len + WORD_BITS - 1) / WORD_BITS
91}
92
93/// Cast &[u8] to &[u64] for word-wise access.
94#[inline(always)]
95pub unsafe fn mask_bits_as_words(bits: &[u8]) -> *const Word {
96 bits.as_ptr() as *const Word
97}
98
99/// Cast &mut [u8] to &mut [u64] for word-wise mutation.
100#[inline(always)]
101pub unsafe fn mask_bits_as_words_mut(bits: &mut [u8]) -> *mut Word {
102 bits.as_mut_ptr() as *mut Word
103}
104
105/// Create zeroed bitmask of length `len` bits.
106#[inline(always)]
107pub fn new_mask(len: usize) -> Bitmask {
108 Bitmask::new_set_all(len, false)
109}
110
111/// Return window into bitmask's bits slice covering offset..offset+len bits.
112#[inline(always)]
113pub fn bitmask_window_bytes(mask: &Bitmask, offset: usize, len: usize) -> &[u8] {
114 let start = offset / 8;
115 let end = (offset + len + 7) / 8;
116 &mask.bits[start..end]
117}
118
119/// Return mutable window into bitmask's bits slice covering offset..offset+len bits.
120/// Enables efficient in-place modification of bitmask regions.
121#[inline(always)]
122pub fn bitmask_window_bytes_mut(mask: &mut Bitmask, offset: usize, len: usize) -> &mut [u8] {
123 let start = offset / 8;
124 let end = (offset + len + 7) / 8;
125 &mut mask.bits[start..end]
126}
127
128/// Zero all slack bits ≥ `bm.len`.
129#[inline(always)]
130pub fn clear_trailing_bits(bm: &mut Bitmask) {
131 if bm.len == 0 {
132 return;
133 }
134 let used = bm.len & 7;
135 if used != 0 {
136 let last = bm.bits.last_mut().unwrap();
137 *last &= (1u8 << used) - 1;
138 }
139}
140
141/// Quick population count of true bits
142#[inline(always)]
143pub fn popcount_bits(m: BitmaskVT<'_>) -> usize {
144 #[cfg(feature = "simd")]
145 {
146 // Use a default SIMD width if not otherwise specified
147 use crate::kernels::bitmask::simd::popcount_mask_simd;
148 popcount_mask_simd::<8>(m)
149 }
150 #[cfg(not(feature = "simd"))]
151 {
152 use crate::kernels::bitmask::std::popcount_mask;
153 popcount_mask(m)
154 }
155}
156
157#[cfg(test)]
158mod tests {
159 use super::*;
160 use minarrow::Bitmask;
161
162 #[test]
163 fn test_words_for() {
164 assert_eq!(words_for(0), 0);
165 assert_eq!(words_for(1), 1);
166 assert_eq!(words_for(63), 1);
167 assert_eq!(words_for(64), 1);
168 assert_eq!(words_for(65), 2);
169 assert_eq!(words_for(128), 2);
170 assert_eq!(words_for(129), 3);
171 }
172
173 #[test]
174 fn test_mask_bits_as_words_and_mut() {
175 // Test reading words from bits
176 let mut mask = Bitmask::new_set_all(128, false);
177 // Write known pattern into mask
178 mask.set(0, true);
179 mask.set(63, true);
180 mask.set(64, true);
181 mask.set(127, true);
182 let bits = &mask.bits;
183 unsafe {
184 let words = mask_bits_as_words(bits);
185 assert_eq!(*words, 1u64 | (1u64 << 63));
186 assert_eq!(*words.add(1), 1u64 | (1u64 << 63));
187 }
188
189 // Test writing via mask_bits_as_words_mut
190 let mut mask = Bitmask::new_set_all(128, false);
191 let bits = &mut mask.bits;
192 unsafe {
193 let words_mut = mask_bits_as_words_mut(bits);
194 *words_mut = 0xDEADBEEFDEADBEEF;
195 *words_mut.add(1) = 0xCAFEBABECAFEBABE;
196 }
197 assert_eq!(mask.bits[0], 0xEF);
198 assert_eq!(mask.bits[7], 0xDE);
199 assert_eq!(mask.bits[8], 0xBE);
200 assert_eq!(mask.bits[15], 0xCA);
201 }
202
203 #[test]
204 fn test_bitmask_window_bytes() {
205 let mut mask = Bitmask::new_set_all(24, false);
206 mask.set(0, true);
207 mask.set(7, true);
208 mask.set(8, true);
209 mask.set(15, true);
210 mask.set(16, true);
211 mask.set(23, true);
212 // Window: bytes 1..3 should cover bits 8..24
213 let bytes = bitmask_window_bytes(&mask, 8, 16);
214 assert_eq!(bytes.len(), 2); // 16 bits = 2 bytes
215 assert_eq!(bytes[0], 0b10000001); // bits 8 and 15 set
216 assert_eq!(bytes[1], 0b10000001); // bits 16 and 23 set
217 }
218
219 #[test]
220 fn test_bitmask_window_bytes_mut() {
221 let mut mask = Bitmask::new_set_all(16, false);
222 {
223 let window = bitmask_window_bytes_mut(&mut mask, 0, 16);
224 window[0] = 0xAA;
225 window[1] = 0x55;
226 }
227 assert_eq!(mask.bits[0], 0xAA);
228 assert_eq!(mask.bits[1], 0x55);
229 }
230
231 #[test]
232 fn test_clear_trailing_bits() {
233 let mut mask = Bitmask::new_set_all(10, true);
234 // The last byte should have only the low 2 bits set after clearing trailing bits
235 clear_trailing_bits(&mut mask);
236 // 10 bits => 2 bytes, last byte should have only bits 0 and 1 set (0b00000011 == 0x03)
237 assert_eq!(mask.bits[1], 0x03);
238 // The remaining bits should still be set
239 for i in 0..10 {
240 assert!(mask.get(i));
241 }
242 // Any bits beyond 10 should be cleared
243 for i in 10..16 {
244 assert!(!mask.get(i));
245 }
246 }
247}