Skip to main content

shape_value/
typed_buffer.rs

1//! TypedBuffer<T>: width-specific buffer with optional null/validity bitmap.
2//!
3//! Used by typed array HeapValue variants (IntArray, FloatArray, BoolArray,
4//! I8Array, I16Array, etc.) to provide a uniform nullable container.
5//!
6//! The validity bitmap is bit-packed: one bit per element, packed into u64 words.
7//! A `1` bit means the element is valid; `0` means null. When `validity` is `None`,
8//! all elements are considered valid (no nulls).
9
10use std::fmt;
11
12/// Width-specific buffer with optional null bitmap.
13#[derive(Clone)]
14pub struct TypedBuffer<T> {
15    pub data: Vec<T>,
16    /// Bit-packed validity bitmap (1 = valid, 0 = null). `None` means all valid.
17    pub validity: Option<Vec<u64>>,
18}
19
20impl<T: fmt::Debug> fmt::Debug for TypedBuffer<T> {
21    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
22        f.debug_struct("TypedBuffer")
23            .field("len", &self.data.len())
24            .field("has_validity", &self.validity.is_some())
25            .finish()
26    }
27}
28
29impl<T> TypedBuffer<T> {
30    /// Create an empty TypedBuffer with no validity bitmap.
31    #[inline]
32    pub fn new() -> Self {
33        Self {
34            data: Vec::new(),
35            validity: None,
36        }
37    }
38
39    /// Create a TypedBuffer with the given capacity and no validity bitmap.
40    #[inline]
41    pub fn with_capacity(cap: usize) -> Self {
42        Self {
43            data: Vec::with_capacity(cap),
44            validity: None,
45        }
46    }
47
48    /// Create a TypedBuffer from an existing Vec, treating all elements as valid.
49    #[inline]
50    pub fn from_vec(data: Vec<T>) -> Self {
51        Self {
52            data,
53            validity: None,
54        }
55    }
56
57    /// Number of elements (including nulls).
58    #[inline]
59    pub fn len(&self) -> usize {
60        self.data.len()
61    }
62
63    /// Whether the buffer is empty.
64    #[inline]
65    pub fn is_empty(&self) -> bool {
66        self.data.len() == 0
67    }
68
69    /// Check if the element at `idx` is valid (not null).
70    /// Returns `true` if no validity bitmap exists (all valid).
71    #[inline]
72    pub fn is_valid(&self, idx: usize) -> bool {
73        match &self.validity {
74            None => true,
75            Some(bitmap) => {
76                let word = idx / 64;
77                let bit = idx % 64;
78                word < bitmap.len() && (bitmap[word] & (1u64 << bit)) != 0
79            }
80        }
81    }
82
83    /// Get a reference to the element at `idx`, or `None` if null or out of bounds.
84    #[inline]
85    pub fn get(&self, idx: usize) -> Option<&T> {
86        if idx >= self.data.len() || !self.is_valid(idx) {
87            return None;
88        }
89        Some(&self.data[idx])
90    }
91
92    /// Return a slice over the raw data (ignoring validity).
93    #[inline]
94    pub fn as_slice(&self) -> &[T] {
95        &self.data
96    }
97
98    /// Return a mutable slice over the raw data (ignoring validity).
99    #[inline]
100    pub fn as_mut_slice(&mut self) -> &mut [T] {
101        &mut self.data
102    }
103
104    /// Return an iterator over the raw data (ignoring validity).
105    #[inline]
106    pub fn iter(&self) -> std::slice::Iter<'_, T> {
107        self.data.iter()
108    }
109
110    /// Return the number of null elements.
111    pub fn null_count(&self) -> usize {
112        match &self.validity {
113            None => 0,
114            Some(bitmap) => {
115                let total_bits = self.data.len();
116                let set_bits: usize = bitmap.iter().map(|w| w.count_ones() as usize).sum();
117                // Only count bits up to len
118                let full_words = total_bits / 64;
119                let remainder = total_bits % 64;
120                let valid_count = if remainder == 0 {
121                    set_bits
122                } else {
123                    let last_word = bitmap.get(full_words).copied().unwrap_or(0);
124                    let mask = (1u64 << remainder) - 1;
125                    let full_valid: usize = bitmap[..full_words]
126                        .iter()
127                        .map(|w| w.count_ones() as usize)
128                        .sum();
129                    full_valid + (last_word & mask).count_ones() as usize
130                };
131                total_bits - valid_count
132            }
133        }
134    }
135}
136
137impl<T> std::ops::Deref for TypedBuffer<T> {
138    type Target = [T];
139    #[inline]
140    fn deref(&self) -> &[T] {
141        &self.data
142    }
143}
144
145impl<T> std::ops::DerefMut for TypedBuffer<T> {
146    #[inline]
147    fn deref_mut(&mut self) -> &mut [T] {
148        &mut self.data
149    }
150}
151
152impl<T: Default> TypedBuffer<T> {
153    /// Push a valid element.
154    #[inline]
155    pub fn push(&mut self, val: T) {
156        let idx = self.data.len();
157        self.data.push(val);
158        if let Some(ref mut bitmap) = self.validity {
159            ensure_bitmap_capacity(bitmap, idx);
160            let word = idx / 64;
161            let bit = idx % 64;
162            bitmap[word] |= 1u64 << bit;
163        }
164    }
165
166    /// Push a null element (default value in data, validity bit = 0).
167    pub fn push_null(&mut self) {
168        let idx = self.data.len();
169        self.data.push(T::default());
170        let bitmap = self.validity.get_or_insert_with(|| {
171            // Retroactively create bitmap with all previous elements marked valid
172            let words_needed = (idx + 64) / 64;
173            let mut bm = vec![!0u64; words_needed];
174            // Mask off bits beyond current length for the last word
175            if idx % 64 != 0 {
176                let last_word = (idx - 1) / 64;
177                bm[last_word] = (1u64 << (idx % 64)) - 1;
178            }
179            bm
180        });
181        ensure_bitmap_capacity(bitmap, idx);
182        // Bit is already 0 from the vec allocation (or was masked)
183        let word = idx / 64;
184        let bit = idx % 64;
185        bitmap[word] &= !(1u64 << bit);
186    }
187}
188
189impl<T> From<Vec<T>> for TypedBuffer<T> {
190    #[inline]
191    fn from(data: Vec<T>) -> Self {
192        Self::from_vec(data)
193    }
194}
195
196impl<T: PartialEq> PartialEq for TypedBuffer<T> {
197    fn eq(&self, other: &Self) -> bool {
198        if self.data.len() != other.data.len() {
199            return false;
200        }
201        // Compare element-by-element, respecting validity
202        for i in 0..self.data.len() {
203            let a_valid = self.is_valid(i);
204            let b_valid = other.is_valid(i);
205            if a_valid != b_valid {
206                return false;
207            }
208            if a_valid && self.data[i] != other.data[i] {
209                return false;
210            }
211        }
212        true
213    }
214}
215
216/// Ensure the bitmap has enough words to cover bit `idx`.
217#[inline]
218fn ensure_bitmap_capacity(bitmap: &mut Vec<u64>, idx: usize) {
219    let words_needed = idx / 64 + 1;
220    if bitmap.len() < words_needed {
221        bitmap.resize(words_needed, 0);
222    }
223}
224
225// ===== Specialization for AlignedVec<f64> =====
226
227use crate::aligned_vec::AlignedVec;
228
229/// Float-specific typed buffer that uses AlignedVec<f64> for SIMD compatibility.
230#[derive(Debug, Clone)]
231pub struct AlignedTypedBuffer {
232    pub data: AlignedVec<f64>,
233    /// Bit-packed validity bitmap (1 = valid, 0 = null). `None` means all valid.
234    pub validity: Option<Vec<u64>>,
235}
236
237impl AlignedTypedBuffer {
238    #[inline]
239    pub fn new() -> Self {
240        Self {
241            data: AlignedVec::new(),
242            validity: None,
243        }
244    }
245
246    #[inline]
247    pub fn with_capacity(cap: usize) -> Self {
248        Self {
249            data: AlignedVec::with_capacity(cap),
250            validity: None,
251        }
252    }
253
254    #[inline]
255    pub fn from_aligned(data: AlignedVec<f64>) -> Self {
256        Self {
257            data,
258            validity: None,
259        }
260    }
261
262    #[inline]
263    pub fn len(&self) -> usize {
264        self.data.len()
265    }
266
267    #[inline]
268    pub fn is_empty(&self) -> bool {
269        self.data.is_empty()
270    }
271
272    #[inline]
273    pub fn is_valid(&self, idx: usize) -> bool {
274        match &self.validity {
275            None => true,
276            Some(bitmap) => {
277                let word = idx / 64;
278                let bit = idx % 64;
279                word < bitmap.len() && (bitmap[word] & (1u64 << bit)) != 0
280            }
281        }
282    }
283
284    #[inline]
285    pub fn get(&self, idx: usize) -> Option<&f64> {
286        if idx >= self.data.len() || !self.is_valid(idx) {
287            return None;
288        }
289        self.data.get(idx)
290    }
291
292    #[inline]
293    pub fn as_slice(&self) -> &[f64] {
294        self.data.as_slice()
295    }
296
297    #[inline]
298    pub fn iter(&self) -> std::slice::Iter<'_, f64> {
299        self.data.as_slice().iter()
300    }
301
302    pub fn push(&mut self, val: f64) {
303        let idx = self.data.len();
304        self.data.push(val);
305        if let Some(ref mut bitmap) = self.validity {
306            ensure_bitmap_capacity(bitmap, idx);
307            let word = idx / 64;
308            let bit = idx % 64;
309            bitmap[word] |= 1u64 << bit;
310        }
311    }
312
313    pub fn pop(&mut self) -> Option<f64> {
314        self.data.pop()
315    }
316
317    pub fn push_null(&mut self) {
318        let idx = self.data.len();
319        self.data.push(0.0);
320        let bitmap = self.validity.get_or_insert_with(|| {
321            let words_needed = (idx + 64) / 64;
322            let mut bm = vec![!0u64; words_needed];
323            if idx % 64 != 0 {
324                let last_word = (idx - 1) / 64;
325                bm[last_word] = (1u64 << (idx % 64)) - 1;
326            }
327            bm
328        });
329        ensure_bitmap_capacity(bitmap, idx);
330        let word = idx / 64;
331        let bit = idx % 64;
332        bitmap[word] &= !(1u64 << bit);
333    }
334}
335
336impl From<AlignedVec<f64>> for AlignedTypedBuffer {
337    #[inline]
338    fn from(data: AlignedVec<f64>) -> Self {
339        Self::from_aligned(data)
340    }
341}
342
343impl std::ops::Deref for AlignedTypedBuffer {
344    type Target = [f64];
345    #[inline]
346    fn deref(&self) -> &[f64] {
347        self.data.as_slice()
348    }
349}
350
351impl std::ops::DerefMut for AlignedTypedBuffer {
352    #[inline]
353    fn deref_mut(&mut self) -> &mut [f64] {
354        self.data.as_mut_slice()
355    }
356}
357
358impl PartialEq for AlignedTypedBuffer {
359    fn eq(&self, other: &Self) -> bool {
360        if self.data.len() != other.data.len() {
361            return false;
362        }
363        for i in 0..self.data.len() {
364            let a_valid = self.is_valid(i);
365            let b_valid = other.is_valid(i);
366            if a_valid != b_valid {
367                return false;
368            }
369            if a_valid && self.data.as_slice()[i] != other.data.as_slice()[i] {
370                return false;
371            }
372        }
373        true
374    }
375}
376
377#[cfg(test)]
378mod tests {
379    use super::*;
380
381    #[test]
382    fn test_typed_buffer_basic() {
383        let mut buf = TypedBuffer::<i64>::new();
384        buf.push(10);
385        buf.push(20);
386        buf.push(30);
387        assert_eq!(buf.len(), 3);
388        assert_eq!(buf.get(0), Some(&10));
389        assert_eq!(buf.get(1), Some(&20));
390        assert_eq!(buf.get(2), Some(&30));
391        assert_eq!(buf.get(3), None);
392        assert!(buf.is_valid(0));
393        assert!(buf.is_valid(1));
394        assert!(buf.is_valid(2));
395        assert_eq!(buf.null_count(), 0);
396    }
397
398    #[test]
399    fn test_typed_buffer_with_nulls() {
400        let mut buf = TypedBuffer::<i64>::new();
401        buf.push(10);
402        buf.push_null();
403        buf.push(30);
404        assert_eq!(buf.len(), 3);
405        assert!(buf.is_valid(0));
406        assert!(!buf.is_valid(1));
407        assert!(buf.is_valid(2));
408        assert_eq!(buf.get(0), Some(&10));
409        assert_eq!(buf.get(1), None); // null
410        assert_eq!(buf.get(2), Some(&30));
411        assert_eq!(buf.null_count(), 1);
412    }
413
414    #[test]
415    fn test_typed_buffer_from_vec() {
416        let buf = TypedBuffer::from_vec(vec![1i32, 2, 3, 4]);
417        assert_eq!(buf.len(), 4);
418        assert!(buf.is_valid(0));
419        assert_eq!(buf.get(2), Some(&3));
420        assert_eq!(buf.null_count(), 0);
421    }
422
423    #[test]
424    fn test_typed_buffer_equality() {
425        let a = TypedBuffer::from_vec(vec![1i64, 2, 3]);
426        let b = TypedBuffer::from_vec(vec![1i64, 2, 3]);
427        let c = TypedBuffer::from_vec(vec![1i64, 2, 4]);
428        assert_eq!(a, b);
429        assert_ne!(a, c);
430    }
431
432    #[test]
433    fn test_typed_buffer_equality_with_nulls() {
434        let mut a = TypedBuffer::<i64>::new();
435        a.push(1);
436        a.push_null();
437        let mut b = TypedBuffer::<i64>::new();
438        b.push(1);
439        b.push_null();
440        assert_eq!(a, b);
441    }
442
443    #[test]
444    fn test_aligned_typed_buffer_basic() {
445        let mut buf = AlignedTypedBuffer::new();
446        buf.push(1.0);
447        buf.push(2.0);
448        buf.push(3.0);
449        assert_eq!(buf.len(), 3);
450        assert_eq!(buf.get(0), Some(&1.0));
451        assert_eq!(buf.get(1), Some(&2.0));
452        assert!(buf.is_valid(0));
453        assert_eq!(buf.as_slice(), &[1.0, 2.0, 3.0]);
454    }
455
456    #[test]
457    fn test_aligned_typed_buffer_with_nulls() {
458        let mut buf = AlignedTypedBuffer::new();
459        buf.push(1.0);
460        buf.push_null();
461        buf.push(3.0);
462        assert!(buf.is_valid(0));
463        assert!(!buf.is_valid(1));
464        assert!(buf.is_valid(2));
465        assert_eq!(buf.get(1), None);
466    }
467
468    #[test]
469    fn test_many_elements_bitmap() {
470        let mut buf = TypedBuffer::<i32>::new();
471        for i in 0..200 {
472            if i % 10 == 0 {
473                buf.push_null();
474            } else {
475                buf.push(i);
476            }
477        }
478        assert_eq!(buf.len(), 200);
479        assert_eq!(buf.null_count(), 20);
480        assert!(!buf.is_valid(0));
481        assert!(buf.is_valid(1));
482        assert!(!buf.is_valid(10));
483        assert!(buf.is_valid(11));
484    }
485}