Skip to main content

vortex_array/builders/
primitive.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::mem::MaybeUninit;
6
7use vortex_buffer::BufferMut;
8use vortex_error::VortexExpect;
9use vortex_error::VortexResult;
10use vortex_error::vortex_ensure;
11use vortex_mask::Mask;
12
13use crate::ArrayRef;
14use crate::IntoArray;
15use crate::arrays::PrimitiveArray;
16use crate::builders::ArrayBuilder;
17use crate::builders::DEFAULT_BUILDER_CAPACITY;
18use crate::builders::LazyBitBufferBuilder;
19use crate::canonical::Canonical;
20use crate::canonical::ToCanonical;
21use crate::dtype::DType;
22use crate::dtype::NativePType;
23use crate::dtype::Nullability;
24use crate::scalar::Scalar;
25
26/// The builder for building a [`PrimitiveArray`], parametrized by the `PType`.
27pub struct PrimitiveBuilder<T> {
28    dtype: DType,
29    values: BufferMut<T>,
30    nulls: LazyBitBufferBuilder,
31}
32
33impl<T: NativePType> PrimitiveBuilder<T> {
34    /// Creates a new `PrimitiveBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
35    pub fn new(nullability: Nullability) -> Self {
36        Self::with_capacity(nullability, DEFAULT_BUILDER_CAPACITY)
37    }
38
39    /// Creates a new `PrimitiveBuilder` with the given `capacity`.
40    pub fn with_capacity(nullability: Nullability, capacity: usize) -> Self {
41        Self {
42            values: BufferMut::with_capacity(capacity),
43            nulls: LazyBitBufferBuilder::new(capacity),
44            dtype: DType::Primitive(T::PTYPE, nullability),
45        }
46    }
47
48    /// Appends a primitive `value` to the builder.
49    pub fn append_value(&mut self, value: T) {
50        self.values.push(value);
51        self.nulls.append_non_null();
52    }
53
54    /// Appends `n` copies of `value` as non-null entries, directly writing into the buffer.
55    pub fn append_n_values(&mut self, value: T, n: usize) {
56        self.values.push_n(value, n);
57        self.nulls.append_n_non_nulls(n);
58    }
59
60    /// Returns the raw primitive values in this builder as a slice.
61    pub fn values(&self) -> &[T] {
62        self.values.as_ref()
63    }
64
65    /// Returns the raw primitive values in this builder as a mutable slice.
66    pub fn values_mut(&mut self) -> &mut [T] {
67        self.values.as_mut()
68    }
69
70    /// Create a new handle to the next `len` uninitialized values in the builder.
71    ///
72    /// All reads/writes through the handle to the values buffer or the validity buffer will operate
73    /// on indices relative to the start of the range.
74    ///
75    /// # Panics
76    ///
77    /// Panics if `len` is 0 or if the current length of the builder plus `len` would exceed the
78    /// capacity of the builder's memory.
79    ///
80    /// ## Example
81    ///
82    /// ```
83    /// use std::mem::MaybeUninit;
84    /// use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder};
85    /// use vortex_array::dtype::Nullability;
86    ///
87    /// // Create a new builder.
88    /// let mut builder: PrimitiveBuilder<i32> =
89    ///     PrimitiveBuilder::with_capacity(Nullability::NonNullable, 5);
90    ///
91    /// // Populate the values.
92    /// let mut uninit_range = builder.uninit_range(5);
93    /// uninit_range.copy_from_slice(0, &[0, 1, 2, 3, 4]);
94    ///
95    /// // SAFETY: We have initialized all 5 values in the range, and since the array builder is
96    /// // non-nullable, we don't need to set any null bits.
97    /// unsafe { uninit_range.finish(); }
98    ///
99    /// let built = builder.finish_into_primitive();
100    ///
101    /// assert_eq!(built.as_slice::<i32>(), &[0i32, 1, 2, 3, 4]);
102    /// ```
103    pub fn uninit_range(&mut self, len: usize) -> UninitRange<'_, T> {
104        assert_ne!(0, len, "cannot create an uninit range of length 0");
105
106        let current_len = self.values.len();
107        assert!(
108            current_len + len <= self.values.capacity(),
109            "uninit_range of len {len} exceeds builder with length {} and capacity {}",
110            current_len,
111            self.values.capacity()
112        );
113
114        UninitRange { len, builder: self }
115    }
116
117    /// Finishes the builder directly into a [`PrimitiveArray`].
118    pub fn finish_into_primitive(&mut self) -> PrimitiveArray {
119        let validity = self
120            .nulls
121            .finish_with_nullability(self.dtype().nullability());
122
123        PrimitiveArray::new(std::mem::take(&mut self.values).freeze(), validity)
124    }
125
126    /// Extends the primitive array with an iterator.
127    pub fn extend_with_iterator(&mut self, iter: impl IntoIterator<Item = T>, mask: Mask) {
128        self.values.extend(iter);
129        self.nulls.append_validity_mask(mask);
130    }
131}
132
133impl<T: NativePType> ArrayBuilder for PrimitiveBuilder<T> {
134    fn as_any(&self) -> &dyn Any {
135        self
136    }
137
138    fn as_any_mut(&mut self) -> &mut dyn Any {
139        self
140    }
141
142    fn dtype(&self) -> &DType {
143        &self.dtype
144    }
145
146    fn len(&self) -> usize {
147        self.values.len()
148    }
149
150    fn append_zeros(&mut self, n: usize) {
151        self.values.push_n(T::default(), n);
152        self.nulls.append_n_non_nulls(n);
153    }
154
155    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
156        self.values.push_n(T::default(), n);
157        self.nulls.append_n_nulls(n);
158    }
159
160    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
161        vortex_ensure!(
162            scalar.dtype() == self.dtype(),
163            "PrimitiveBuilder expected scalar with dtype {}, got {}",
164            self.dtype(),
165            scalar.dtype()
166        );
167
168        if let Some(pv) = scalar.as_primitive().pvalue() {
169            self.append_value(pv.cast::<T>()?)
170        } else {
171            self.append_null()
172        }
173
174        Ok(())
175    }
176
177    unsafe fn extend_from_array_unchecked(&mut self, array: &ArrayRef) {
178        let array = array.to_primitive();
179
180        // This should be checked in `extend_from_array` but we can check it again.
181        debug_assert_eq!(
182            array.ptype(),
183            T::PTYPE,
184            "Cannot extend from array with different ptype"
185        );
186
187        self.values.extend_from_slice(array.as_slice::<T>());
188        self.nulls
189            .append_validity_mask(array.validity_mask().vortex_expect("validity_mask"));
190    }
191
192    fn reserve_exact(&mut self, additional: usize) {
193        self.values.reserve(additional);
194        self.nulls.reserve_exact(additional);
195    }
196
197    unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
198        self.nulls = LazyBitBufferBuilder::new(validity.len());
199        self.nulls.append_validity_mask(validity);
200    }
201
202    fn finish(&mut self) -> ArrayRef {
203        self.finish_into_primitive().into_array()
204    }
205
206    fn finish_into_canonical(&mut self) -> Canonical {
207        Canonical::Primitive(self.finish_into_primitive())
208    }
209}
210
211/// A range of uninitialized values in the primitive builder that can be filled.
212pub struct UninitRange<'a, T> {
213    /// The length of the uninitialized range.
214    ///
215    /// This is guaranteed to be within the memory capacity of the builder.
216    len: usize,
217
218    /// A mutable reference to the builder.
219    ///
220    /// Since this is a mutable reference, we can guarantee that nothing else can modify the builder
221    /// while this `UninitRange` exists.
222    builder: &'a mut PrimitiveBuilder<T>,
223}
224
225impl<T> UninitRange<'_, T> {
226    /// Returns the length of this uninitialized range.
227    #[inline]
228    pub fn len(&self) -> usize {
229        self.len
230    }
231
232    /// Returns true if this range has zero length.
233    #[inline]
234    pub fn is_empty(&self) -> bool {
235        self.len == 0
236    }
237
238    /// Set a value at the given index within this range.
239    ///
240    /// # Panics
241    ///
242    /// Panics if the index is out of bounds.
243    #[inline]
244    pub fn set_value(&mut self, index: usize, value: T) {
245        assert!(index < self.len, "index out of bounds");
246        let spare = self.builder.values.spare_capacity_mut();
247        spare[index] = MaybeUninit::new(value);
248    }
249
250    /// Append a [`Mask`] to this builder's null buffer.
251    ///
252    /// # Panics
253    ///
254    /// Panics if the mask length is not equal to the the length of the current `UninitRange`.
255    ///
256    /// # Safety
257    ///
258    /// - The caller must ensure that they safely initialize `mask.len()` primitive values via
259    ///   [`UninitRange::copy_from_slice`].
260    /// - The caller must also ensure that they only call this method once.
261    pub unsafe fn append_mask(&mut self, mask: Mask) {
262        assert_eq!(
263            mask.len(),
264            self.len,
265            "Tried to append a mask to an `UninitRange` that was beyond the allowed range"
266        );
267
268        // TODO(connor): Ideally, we would call this function `set_mask` and directly set all of the
269        // bits (so that we can call this multiple times), but the underlying `BooleanBuffer` does
270        // not have an easy way to do this correctly.
271
272        self.builder.nulls.append_validity_mask(mask);
273    }
274
275    /// Set a validity bit at the given index.
276    ///
277    /// The index is relative to the start of this range (not relative to the values already in the
278    /// builder).
279    ///
280    /// Note that this will have no effect if the builder is non-nullable.
281    pub fn set_validity_bit(&mut self, index: usize, v: bool) {
282        assert!(index < self.len, "set_bit index out of bounds");
283        // Note that this won't panic because we can only create an `UninitRange` within the
284        // capacity of the builder (it will not automatically resize).
285        let absolute_index = self.builder.values.len() + index;
286        self.builder.nulls.set_bit(absolute_index, v);
287    }
288
289    /// Set values from an initialized range.
290    ///
291    /// Note that the input `offset` should be an offset relative to the local `UninitRange`, not
292    /// the entire `PrimitiveBuilder`.
293    pub fn copy_from_slice(&mut self, local_offset: usize, src: &[T])
294    where
295        T: Copy,
296    {
297        debug_assert!(
298            local_offset + src.len() <= self.len,
299            "tried to copy a slice into a `UninitRange` past its boundary"
300        );
301
302        // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout.
303        let uninit_src: &[MaybeUninit<T>] = unsafe { std::mem::transmute(src) };
304
305        // Note: spare_capacity_mut() returns the spare capacity starting from the current length,
306        // so we just use local_offset directly.
307        let dst =
308            &mut self.builder.values.spare_capacity_mut()[local_offset..local_offset + src.len()];
309        dst.copy_from_slice(uninit_src);
310    }
311
312    /// Get a mutable slice of uninitialized memory at the specified offset within this range.
313    ///
314    /// Note that the offsets are relative to this local range, not to the values already in the
315    /// builder.
316    ///
317    /// # Safety
318    ///
319    /// The caller must ensure that they properly initialize the returned memory before calling
320    /// `finish()` on this range.
321    ///
322    /// # Panics
323    ///
324    /// Panics if `offset + len` exceeds the range bounds.
325    pub unsafe fn slice_uninit_mut(&mut self, offset: usize, len: usize) -> &mut [MaybeUninit<T>] {
326        assert!(
327            offset + len <= self.len,
328            "slice_uninit_mut: offset {} + len {} exceeds range length {}",
329            offset,
330            len,
331            self.len
332        );
333        &mut self.builder.values.spare_capacity_mut()[offset..offset + len]
334    }
335
336    /// Finish building this range, marking it as initialized and advancing the length of the
337    /// underlying values buffer.
338    ///
339    /// # Safety
340    ///
341    /// The caller must ensure that they have safely initialized all `len` values via
342    /// [`copy_from_slice()`] or [`set_value()`], as well as correctly set all of the null bits via
343    /// [`set_validity_bit()`] or [`append_mask()`] if the builder is nullable.
344    ///
345    /// [`copy_from_slice()`]: UninitRange::copy_from_slice
346    /// [`set_value()`]: UninitRange::set_value
347    /// [`set_validity_bit()`]: UninitRange::set_validity_bit
348    /// [`append_mask()`]: UninitRange::append_mask
349    pub unsafe fn finish(self) {
350        // SAFETY: constructor enforces that current length + len does not exceed the capacity of the array.
351        let new_len = self.builder.values.len() + self.len;
352        unsafe { self.builder.values.set_len(new_len) };
353    }
354}
355
356#[cfg(test)]
357mod tests {
358    use vortex_error::VortexExpect;
359
360    use super::*;
361    use crate::assert_arrays_eq;
362
363    /// REGRESSION TEST: This test verifies that multiple sequential ranges have correct offsets.
364    ///
365    /// This would have caught the `Deref` bug where it always returned from the start of the
366    /// buffer.
367    #[test]
368    fn test_multiple_uninit_ranges_correct_offsets() {
369        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
370
371        // First range.
372        let mut range1 = builder.uninit_range(3);
373        range1.copy_from_slice(0, &[1, 2, 3]);
374
375        // SAFETY: We initialized all 3 values.
376        unsafe {
377            range1.finish();
378        }
379
380        // Verify the builder now has these values.
381        assert_eq!(builder.values(), &[1, 2, 3]);
382
383        // Second range - this would fail with the old Deref implementation.
384        let mut range2 = builder.uninit_range(2);
385
386        // Set values using copy_from_slice.
387        range2.copy_from_slice(0, &[4, 5]);
388
389        // SAFETY: We initialized both values.
390        unsafe {
391            range2.finish();
392        }
393
394        // Verify the builder now has all 5 values.
395        assert_eq!(builder.values(), &[1, 2, 3, 4, 5]);
396
397        let array = builder.finish_into_primitive();
398        assert_arrays_eq!(array, PrimitiveArray::from_iter([1i32, 2, 3, 4, 5]));
399    }
400
401    /// REGRESSION TEST: This test verifies that `append_mask` was correctly moved from
402    /// `PrimitiveBuilder` to `UninitRange`.
403    ///
404    /// The old API had `append_mask` on the builder, which was confusing when used with ranges.
405    /// This test ensures the new API works correctly.
406    #[test]
407    fn test_append_mask_on_uninit_range() {
408        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 5);
409        let mut range = builder.uninit_range(3);
410
411        // Create a mask for 3 values.
412        let mask = Mask::from_iter([true, false, true]);
413
414        // SAFETY: We're about to initialize the values.
415        unsafe {
416            range.append_mask(mask);
417        }
418
419        // Initialize the values.
420        range.copy_from_slice(0, &[10, 20, 30]);
421
422        // SAFETY: We've initialized all values and set the mask.
423        unsafe {
424            range.finish();
425        }
426
427        let array = builder.finish_into_primitive();
428        assert_eq!(array.len(), 3);
429        // Check validity using scalar_at - nulls will return is_null() = true.
430        assert!(!array.scalar_at(0).unwrap().is_null());
431        assert!(array.scalar_at(1).unwrap().is_null());
432        assert!(!array.scalar_at(2).unwrap().is_null());
433    }
434
435    /// REGRESSION TEST: This test verifies that `append_mask` validates the mask length.
436    ///
437    /// This ensures that masks can only be appended if they match the range length.
438    #[test]
439    #[should_panic(
440        expected = "Tried to append a mask to an `UninitRange` that was beyond the allowed range"
441    )]
442    fn test_append_mask_wrong_length_panics() {
443        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
444        let mut range = builder.uninit_range(5);
445
446        // Try to append a mask with wrong length (3 instead of 5).
447        let wrong_mask = Mask::from_iter([true, false, true]);
448
449        // SAFETY: This is expected to panic due to length mismatch.
450        unsafe {
451            range.append_mask(wrong_mask);
452        }
453    }
454
455    /// Test that `copy_from_slice` works correctly with different offsets.
456    ///
457    /// This verifies the new simplified API without the redundant `len` parameter.
458    #[test]
459    fn test_copy_from_slice_with_offsets() {
460        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
461        let mut range = builder.uninit_range(6);
462
463        // Copy to different offsets.
464        range.copy_from_slice(0, &[1, 2]);
465        range.copy_from_slice(2, &[3, 4]);
466        range.copy_from_slice(4, &[5, 6]);
467
468        // SAFETY: We've initialized all 6 values.
469        unsafe {
470            range.finish();
471        }
472
473        let array = builder.finish_into_primitive();
474        assert_arrays_eq!(array, PrimitiveArray::from_iter([1i32, 2, 3, 4, 5, 6]));
475    }
476
477    /// Test that `set_bit` uses relative indexing within the range.
478    ///
479    /// Note: `set_bit` requires the null buffer to already be initialized, so we first
480    /// use `append_mask` to set up the buffer, then demonstrate that `set_bit` can
481    /// modify individual bits with relative indexing.
482    #[test]
483    fn test_set_bit_relative_indexing() {
484        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
485
486        // First add some values to the builder.
487        builder.append_value(100);
488        builder.append_value(200);
489
490        // Create a range for new values.
491        let mut range = builder.uninit_range(3);
492
493        // Use append_mask to initialize the validity buffer for this range.
494        let initial_mask = Mask::from_iter([false, false, false]);
495        // SAFETY: We're about to initialize the values.
496        unsafe {
497            range.append_mask(initial_mask);
498        }
499
500        // Now we can use set_bit to modify individual bits with relative indexing.
501        range.set_validity_bit(0, true); // Change first bit to valid
502        range.set_validity_bit(2, true); // Change third bit to valid
503        // Leave middle bit as false (null)
504
505        // Initialize the values.
506        range.copy_from_slice(0, &[10, 20, 30]);
507
508        // SAFETY: We've initialized all 3 values and set their validity.
509        unsafe {
510            range.finish();
511        }
512
513        let array = builder.finish_into_primitive();
514
515        // Verify the total length and values.
516        assert_eq!(array.len(), 5);
517        assert_eq!(array.as_slice::<i32>(), &[100, 200, 10, 20, 30]);
518
519        // Check validity - the first two should be valid (from append_value).
520        assert!(!array.scalar_at(0).unwrap().is_null()); // initial value 100
521        assert!(!array.scalar_at(1).unwrap().is_null()); // initial value 200
522
523        // Check the range items with modified validity.
524        assert!(!array.scalar_at(2).unwrap().is_null()); // range index 0 - set to valid
525        assert!(array.scalar_at(3).unwrap().is_null()); // range index 1 - left as null
526        assert!(!array.scalar_at(4).unwrap().is_null()); // range index 2 - set to valid
527    }
528
529    /// Test that creating a zero-length uninit range panics.
530    #[test]
531    #[should_panic(expected = "cannot create an uninit range of length 0")]
532    fn test_zero_length_uninit_range_panics() {
533        let mut builder = PrimitiveBuilder::<i32>::new(Nullability::NonNullable);
534        let _range = builder.uninit_range(0);
535    }
536
537    /// Test that creating an uninit range exceeding capacity panics.
538    #[test]
539    #[should_panic(
540        expected = "uninit_range of len 10 exceeds builder with length 0 and capacity 6"
541    )]
542    fn test_uninit_range_exceeds_capacity_panics() {
543        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 5);
544        let _range = builder.uninit_range(10);
545    }
546
547    /// Test that `copy_from_slice` debug asserts on out-of-bounds access.
548    ///
549    /// Note: This only panics in debug mode due to `debug_assert!`.
550    #[test]
551    #[cfg(debug_assertions)]
552    #[should_panic(expected = "tried to copy a slice into a `UninitRange` past its boundary")]
553    fn test_copy_from_slice_out_of_bounds() {
554        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
555        let mut range = builder.uninit_range(3);
556
557        // Try to copy 3 elements starting at offset 1 (would need 4 slots total).
558        range.copy_from_slice(1, &[1, 2, 3]);
559    }
560
561    /// Test that the unsafe contract of `finish` is documented and works correctly.
562    ///
563    /// This test demonstrates proper usage of the unsafe `finish` method.
564    #[test]
565    fn test_finish_unsafe_contract() {
566        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 5);
567        let mut range = builder.uninit_range(3);
568
569        // Set validity mask.
570        let mask = Mask::from_iter([true, true, false]);
571        // SAFETY: We're about to initialize the matching number of values.
572        unsafe {
573            range.append_mask(mask);
574        }
575
576        // Initialize all values.
577        range.copy_from_slice(0, &[10, 20, 30]);
578
579        // SAFETY: We have initialized all 3 values and set their validity.
580        unsafe {
581            range.finish();
582        }
583
584        let array = builder.finish_into_primitive();
585        assert_eq!(array.len(), 3);
586        assert_eq!(array.as_slice::<i32>(), &[10, 20, 30]);
587    }
588
589    #[test]
590    fn test_append_scalar() {
591        use crate::dtype::DType;
592        use crate::scalar::Scalar;
593
594        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
595
596        // Test appending a valid primitive value.
597        let scalar1 = Scalar::primitive(42i32, Nullability::Nullable);
598        builder.append_scalar(&scalar1).unwrap();
599
600        // Test appending another value.
601        let scalar2 = Scalar::primitive(84i32, Nullability::Nullable);
602        builder.append_scalar(&scalar2).unwrap();
603
604        // Test appending null value.
605        let null_scalar = Scalar::null(DType::Primitive(
606            crate::dtype::PType::I32,
607            Nullability::Nullable,
608        ));
609        builder.append_scalar(&null_scalar).unwrap();
610
611        let array = builder.finish_into_primitive();
612        assert_eq!(array.len(), 3);
613
614        // Check actual values.
615        let values = array.as_slice::<i32>();
616        assert_eq!(values[0], 42);
617        assert_eq!(values[1], 84);
618        // values[2] might be any value since it's null.
619
620        // Check validity - first two should be valid, third should be null.
621        assert!(
622            array
623                .validity()
624                .vortex_expect("primitive validity should be derivable")
625                .is_valid(0)
626                .unwrap()
627        );
628        assert!(
629            array
630                .validity()
631                .vortex_expect("primitive validity should be derivable")
632                .is_valid(1)
633                .unwrap()
634        );
635        assert!(
636            !array
637                .validity()
638                .vortex_expect("primitive validity should be derivable")
639                .is_valid(2)
640                .unwrap()
641        );
642
643        // Test wrong dtype error.
644        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
645        let wrong_scalar = Scalar::from(true);
646        assert!(builder.append_scalar(&wrong_scalar).is_err());
647    }
648}