Skip to main content

vortex_array/builders/
primitive.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::mem::MaybeUninit;
6
7use vortex_buffer::BufferMut;
8use vortex_error::VortexExpect;
9use vortex_error::VortexResult;
10use vortex_error::vortex_ensure;
11use vortex_mask::Mask;
12
13use crate::ArrayRef;
14use crate::IntoArray;
15use crate::arrays::PrimitiveArray;
16use crate::builders::ArrayBuilder;
17use crate::builders::DEFAULT_BUILDER_CAPACITY;
18use crate::builders::LazyBitBufferBuilder;
19use crate::canonical::Canonical;
20use crate::canonical::ToCanonical;
21use crate::dtype::DType;
22use crate::dtype::NativePType;
23use crate::dtype::Nullability;
24use crate::scalar::Scalar;
25
26/// The builder for building a [`PrimitiveArray`], parametrized by the `PType`.
27pub struct PrimitiveBuilder<T> {
28    dtype: DType,
29    values: BufferMut<T>,
30    nulls: LazyBitBufferBuilder,
31}
32
33impl<T: NativePType> PrimitiveBuilder<T> {
34    /// Creates a new `PrimitiveBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
35    pub fn new(nullability: Nullability) -> Self {
36        Self::with_capacity(nullability, DEFAULT_BUILDER_CAPACITY)
37    }
38
39    /// Creates a new `PrimitiveBuilder` with the given `capacity`.
40    pub fn with_capacity(nullability: Nullability, capacity: usize) -> Self {
41        Self {
42            values: BufferMut::with_capacity(capacity),
43            nulls: LazyBitBufferBuilder::new(capacity),
44            dtype: DType::Primitive(T::PTYPE, nullability),
45        }
46    }
47
48    /// Appends a primitive `value` to the builder.
49    pub fn append_value(&mut self, value: T) {
50        self.values.push(value);
51        self.nulls.append_non_null();
52    }
53
54    /// Returns the raw primitive values in this builder as a slice.
55    pub fn values(&self) -> &[T] {
56        self.values.as_ref()
57    }
58
59    /// Create a new handle to the next `len` uninitialized values in the builder.
60    ///
61    /// All reads/writes through the handle to the values buffer or the validity buffer will operate
62    /// on indices relative to the start of the range.
63    ///
64    /// # Panics
65    ///
66    /// Panics if `len` is 0 or if the current length of the builder plus `len` would exceed the
67    /// capacity of the builder's memory.
68    ///
69    /// ## Example
70    ///
71    /// ```
72    /// use std::mem::MaybeUninit;
73    /// use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder};
74    /// use vortex_array::dtype::Nullability;
75    ///
76    /// // Create a new builder.
77    /// let mut builder: PrimitiveBuilder<i32> =
78    ///     PrimitiveBuilder::with_capacity(Nullability::NonNullable, 5);
79    ///
80    /// // Populate the values.
81    /// let mut uninit_range = builder.uninit_range(5);
82    /// uninit_range.copy_from_slice(0, &[0, 1, 2, 3, 4]);
83    ///
84    /// // SAFETY: We have initialized all 5 values in the range, and since the array builder is
85    /// // non-nullable, we don't need to set any null bits.
86    /// unsafe { uninit_range.finish(); }
87    ///
88    /// let built = builder.finish_into_primitive();
89    ///
90    /// assert_eq!(built.as_slice::<i32>(), &[0i32, 1, 2, 3, 4]);
91    /// ```
92    pub fn uninit_range(&mut self, len: usize) -> UninitRange<'_, T> {
93        assert_ne!(0, len, "cannot create an uninit range of length 0");
94
95        let current_len = self.values.len();
96        assert!(
97            current_len + len <= self.values.capacity(),
98            "uninit_range of len {len} exceeds builder with length {} and capacity {}",
99            current_len,
100            self.values.capacity()
101        );
102
103        UninitRange { len, builder: self }
104    }
105
106    /// Finishes the builder directly into a [`PrimitiveArray`].
107    pub fn finish_into_primitive(&mut self) -> PrimitiveArray {
108        let validity = self
109            .nulls
110            .finish_with_nullability(self.dtype().nullability());
111
112        PrimitiveArray::new(std::mem::take(&mut self.values).freeze(), validity)
113    }
114
115    /// Extends the primitive array with an iterator.
116    pub fn extend_with_iterator(&mut self, iter: impl IntoIterator<Item = T>, mask: Mask) {
117        self.values.extend(iter);
118        self.nulls.append_validity_mask(mask);
119    }
120}
121
122impl<T: NativePType> ArrayBuilder for PrimitiveBuilder<T> {
123    fn as_any(&self) -> &dyn Any {
124        self
125    }
126
127    fn as_any_mut(&mut self) -> &mut dyn Any {
128        self
129    }
130
131    fn dtype(&self) -> &DType {
132        &self.dtype
133    }
134
135    fn len(&self) -> usize {
136        self.values.len()
137    }
138
139    fn append_zeros(&mut self, n: usize) {
140        self.values.push_n(T::default(), n);
141        self.nulls.append_n_non_nulls(n);
142    }
143
144    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
145        self.values.push_n(T::default(), n);
146        self.nulls.append_n_nulls(n);
147    }
148
149    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
150        vortex_ensure!(
151            scalar.dtype() == self.dtype(),
152            "PrimitiveBuilder expected scalar with dtype {}, got {}",
153            self.dtype(),
154            scalar.dtype()
155        );
156
157        if let Some(pv) = scalar.as_primitive().pvalue() {
158            self.append_value(pv.cast::<T>()?)
159        } else {
160            self.append_null()
161        }
162
163        Ok(())
164    }
165
166    unsafe fn extend_from_array_unchecked(&mut self, array: &ArrayRef) {
167        let array = array.to_primitive();
168
169        // This should be checked in `extend_from_array` but we can check it again.
170        debug_assert_eq!(
171            array.ptype(),
172            T::PTYPE,
173            "Cannot extend from array with different ptype"
174        );
175
176        self.values.extend_from_slice(array.as_slice::<T>());
177        self.nulls.append_validity_mask(
178            array
179                .validity_mask()
180                .vortex_expect("validity_mask in extend_from_array_unchecked"),
181        );
182    }
183
184    fn reserve_exact(&mut self, additional: usize) {
185        self.values.reserve(additional);
186        self.nulls.reserve_exact(additional);
187    }
188
189    unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
190        self.nulls = LazyBitBufferBuilder::new(validity.len());
191        self.nulls.append_validity_mask(validity);
192    }
193
194    fn finish(&mut self) -> ArrayRef {
195        self.finish_into_primitive().into_array()
196    }
197
198    fn finish_into_canonical(&mut self) -> Canonical {
199        Canonical::Primitive(self.finish_into_primitive())
200    }
201}
202
203/// A range of uninitialized values in the primitive builder that can be filled.
204pub struct UninitRange<'a, T> {
205    /// The length of the uninitialized range.
206    ///
207    /// This is guaranteed to be within the memory capacity of the builder.
208    len: usize,
209
210    /// A mutable reference to the builder.
211    ///
212    /// Since this is a mutable reference, we can guarantee that nothing else can modify the builder
213    /// while this `UninitRange` exists.
214    builder: &'a mut PrimitiveBuilder<T>,
215}
216
217impl<T> UninitRange<'_, T> {
218    /// Returns the length of this uninitialized range.
219    #[inline]
220    pub fn len(&self) -> usize {
221        self.len
222    }
223
224    /// Returns true if this range has zero length.
225    #[inline]
226    pub fn is_empty(&self) -> bool {
227        self.len == 0
228    }
229
230    /// Set a value at the given index within this range.
231    ///
232    /// # Panics
233    ///
234    /// Panics if the index is out of bounds.
235    #[inline]
236    pub fn set_value(&mut self, index: usize, value: T) {
237        assert!(index < self.len, "index out of bounds");
238        let spare = self.builder.values.spare_capacity_mut();
239        spare[index] = MaybeUninit::new(value);
240    }
241
242    /// Append a [`Mask`] to this builder's null buffer.
243    ///
244    /// # Panics
245    ///
246    /// Panics if the mask length is not equal to the the length of the current `UninitRange`.
247    ///
248    /// # Safety
249    ///
250    /// - The caller must ensure that they safely initialize `mask.len()` primitive values via
251    ///   [`UninitRange::copy_from_slice`].
252    /// - The caller must also ensure that they only call this method once.
253    pub unsafe fn append_mask(&mut self, mask: Mask) {
254        assert_eq!(
255            mask.len(),
256            self.len,
257            "Tried to append a mask to an `UninitRange` that was beyond the allowed range"
258        );
259
260        // TODO(connor): Ideally, we would call this function `set_mask` and directly set all of the
261        // bits (so that we can call this multiple times), but the underlying `BooleanBuffer` does
262        // not have an easy way to do this correctly.
263
264        self.builder.nulls.append_validity_mask(mask);
265    }
266
267    /// Set a validity bit at the given index.
268    ///
269    /// The index is relative to the start of this range (not relative to the values already in the
270    /// builder).
271    ///
272    /// Note that this will have no effect if the builder is non-nullable.
273    pub fn set_validity_bit(&mut self, index: usize, v: bool) {
274        assert!(index < self.len, "set_bit index out of bounds");
275        // Note that this won't panic because we can only create an `UninitRange` within the
276        // capacity of the builder (it will not automatically resize).
277        let absolute_index = self.builder.values.len() + index;
278        self.builder.nulls.set_bit(absolute_index, v);
279    }
280
281    /// Set values from an initialized range.
282    ///
283    /// Note that the input `offset` should be an offset relative to the local `UninitRange`, not
284    /// the entire `PrimitiveBuilder`.
285    pub fn copy_from_slice(&mut self, local_offset: usize, src: &[T])
286    where
287        T: Copy,
288    {
289        debug_assert!(
290            local_offset + src.len() <= self.len,
291            "tried to copy a slice into a `UninitRange` past its boundary"
292        );
293
294        // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout.
295        let uninit_src: &[MaybeUninit<T>] = unsafe { std::mem::transmute(src) };
296
297        // Note: spare_capacity_mut() returns the spare capacity starting from the current length,
298        // so we just use local_offset directly.
299        let dst =
300            &mut self.builder.values.spare_capacity_mut()[local_offset..local_offset + src.len()];
301        dst.copy_from_slice(uninit_src);
302    }
303
304    /// Get a mutable slice of uninitialized memory at the specified offset within this range.
305    ///
306    /// Note that the offsets are relative to this local range, not to the values already in the
307    /// builder.
308    ///
309    /// # Safety
310    ///
311    /// The caller must ensure that they properly initialize the returned memory before calling
312    /// `finish()` on this range.
313    ///
314    /// # Panics
315    ///
316    /// Panics if `offset + len` exceeds the range bounds.
317    pub unsafe fn slice_uninit_mut(&mut self, offset: usize, len: usize) -> &mut [MaybeUninit<T>] {
318        assert!(
319            offset + len <= self.len,
320            "slice_uninit_mut: offset {} + len {} exceeds range length {}",
321            offset,
322            len,
323            self.len
324        );
325        &mut self.builder.values.spare_capacity_mut()[offset..offset + len]
326    }
327
328    /// Finish building this range, marking it as initialized and advancing the length of the
329    /// underlying values buffer.
330    ///
331    /// # Safety
332    ///
333    /// The caller must ensure that they have safely initialized all `len` values via
334    /// [`copy_from_slice()`] or [`set_value()`], as well as correctly set all of the null bits via
335    /// [`set_validity_bit()`] or [`append_mask()`] if the builder is nullable.
336    ///
337    /// [`copy_from_slice()`]: UninitRange::copy_from_slice
338    /// [`set_value()`]: UninitRange::set_value
339    /// [`set_validity_bit()`]: UninitRange::set_validity_bit
340    /// [`append_mask()`]: UninitRange::append_mask
341    pub unsafe fn finish(self) {
342        // SAFETY: constructor enforces that current length + len does not exceed the capacity of the array.
343        let new_len = self.builder.values.len() + self.len;
344        unsafe { self.builder.values.set_len(new_len) };
345    }
346}
347
348#[cfg(test)]
349mod tests {
350    use super::*;
351    use crate::assert_arrays_eq;
352
353    /// REGRESSION TEST: This test verifies that multiple sequential ranges have correct offsets.
354    ///
355    /// This would have caught the `Deref` bug where it always returned from the start of the
356    /// buffer.
357    #[test]
358    fn test_multiple_uninit_ranges_correct_offsets() {
359        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
360
361        // First range.
362        let mut range1 = builder.uninit_range(3);
363        range1.copy_from_slice(0, &[1, 2, 3]);
364
365        // SAFETY: We initialized all 3 values.
366        unsafe {
367            range1.finish();
368        }
369
370        // Verify the builder now has these values.
371        assert_eq!(builder.values(), &[1, 2, 3]);
372
373        // Second range - this would fail with the old Deref implementation.
374        let mut range2 = builder.uninit_range(2);
375
376        // Set values using copy_from_slice.
377        range2.copy_from_slice(0, &[4, 5]);
378
379        // SAFETY: We initialized both values.
380        unsafe {
381            range2.finish();
382        }
383
384        // Verify the builder now has all 5 values.
385        assert_eq!(builder.values(), &[1, 2, 3, 4, 5]);
386
387        let array = builder.finish_into_primitive();
388        assert_arrays_eq!(array, PrimitiveArray::from_iter([1i32, 2, 3, 4, 5]));
389    }
390
391    /// REGRESSION TEST: This test verifies that `append_mask` was correctly moved from
392    /// `PrimitiveBuilder` to `UninitRange`.
393    ///
394    /// The old API had `append_mask` on the builder, which was confusing when used with ranges.
395    /// This test ensures the new API works correctly.
396    #[test]
397    fn test_append_mask_on_uninit_range() {
398        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 5);
399        let mut range = builder.uninit_range(3);
400
401        // Create a mask for 3 values.
402        let mask = Mask::from_iter([true, false, true]);
403
404        // SAFETY: We're about to initialize the values.
405        unsafe {
406            range.append_mask(mask);
407        }
408
409        // Initialize the values.
410        range.copy_from_slice(0, &[10, 20, 30]);
411
412        // SAFETY: We've initialized all values and set the mask.
413        unsafe {
414            range.finish();
415        }
416
417        let array = builder.finish_into_primitive();
418        assert_eq!(array.len(), 3);
419        // Check validity using scalar_at - nulls will return is_null() = true.
420        assert!(!array.scalar_at(0).unwrap().is_null());
421        assert!(array.scalar_at(1).unwrap().is_null());
422        assert!(!array.scalar_at(2).unwrap().is_null());
423    }
424
425    /// REGRESSION TEST: This test verifies that `append_mask` validates the mask length.
426    ///
427    /// This ensures that masks can only be appended if they match the range length.
428    #[test]
429    #[should_panic(
430        expected = "Tried to append a mask to an `UninitRange` that was beyond the allowed range"
431    )]
432    fn test_append_mask_wrong_length_panics() {
433        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
434        let mut range = builder.uninit_range(5);
435
436        // Try to append a mask with wrong length (3 instead of 5).
437        let wrong_mask = Mask::from_iter([true, false, true]);
438
439        // SAFETY: This is expected to panic due to length mismatch.
440        unsafe {
441            range.append_mask(wrong_mask);
442        }
443    }
444
445    /// Test that `copy_from_slice` works correctly with different offsets.
446    ///
447    /// This verifies the new simplified API without the redundant `len` parameter.
448    #[test]
449    fn test_copy_from_slice_with_offsets() {
450        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
451        let mut range = builder.uninit_range(6);
452
453        // Copy to different offsets.
454        range.copy_from_slice(0, &[1, 2]);
455        range.copy_from_slice(2, &[3, 4]);
456        range.copy_from_slice(4, &[5, 6]);
457
458        // SAFETY: We've initialized all 6 values.
459        unsafe {
460            range.finish();
461        }
462
463        let array = builder.finish_into_primitive();
464        assert_arrays_eq!(array, PrimitiveArray::from_iter([1i32, 2, 3, 4, 5, 6]));
465    }
466
467    /// Test that `set_bit` uses relative indexing within the range.
468    ///
469    /// Note: `set_bit` requires the null buffer to already be initialized, so we first
470    /// use `append_mask` to set up the buffer, then demonstrate that `set_bit` can
471    /// modify individual bits with relative indexing.
472    #[test]
473    fn test_set_bit_relative_indexing() {
474        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
475
476        // First add some values to the builder.
477        builder.append_value(100);
478        builder.append_value(200);
479
480        // Create a range for new values.
481        let mut range = builder.uninit_range(3);
482
483        // Use append_mask to initialize the validity buffer for this range.
484        let initial_mask = Mask::from_iter([false, false, false]);
485        // SAFETY: We're about to initialize the values.
486        unsafe {
487            range.append_mask(initial_mask);
488        }
489
490        // Now we can use set_bit to modify individual bits with relative indexing.
491        range.set_validity_bit(0, true); // Change first bit to valid
492        range.set_validity_bit(2, true); // Change third bit to valid
493        // Leave middle bit as false (null)
494
495        // Initialize the values.
496        range.copy_from_slice(0, &[10, 20, 30]);
497
498        // SAFETY: We've initialized all 3 values and set their validity.
499        unsafe {
500            range.finish();
501        }
502
503        let array = builder.finish_into_primitive();
504
505        // Verify the total length and values.
506        assert_eq!(array.len(), 5);
507        assert_eq!(array.as_slice::<i32>(), &[100, 200, 10, 20, 30]);
508
509        // Check validity - the first two should be valid (from append_value).
510        assert!(!array.scalar_at(0).unwrap().is_null()); // initial value 100
511        assert!(!array.scalar_at(1).unwrap().is_null()); // initial value 200
512
513        // Check the range items with modified validity.
514        assert!(!array.scalar_at(2).unwrap().is_null()); // range index 0 - set to valid
515        assert!(array.scalar_at(3).unwrap().is_null()); // range index 1 - left as null
516        assert!(!array.scalar_at(4).unwrap().is_null()); // range index 2 - set to valid
517    }
518
519    /// Test that creating a zero-length uninit range panics.
520    #[test]
521    #[should_panic(expected = "cannot create an uninit range of length 0")]
522    fn test_zero_length_uninit_range_panics() {
523        let mut builder = PrimitiveBuilder::<i32>::new(Nullability::NonNullable);
524        let _range = builder.uninit_range(0);
525    }
526
527    /// Test that creating an uninit range exceeding capacity panics.
528    #[test]
529    #[should_panic(
530        expected = "uninit_range of len 10 exceeds builder with length 0 and capacity 6"
531    )]
532    fn test_uninit_range_exceeds_capacity_panics() {
533        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 5);
534        let _range = builder.uninit_range(10);
535    }
536
537    /// Test that `copy_from_slice` debug asserts on out-of-bounds access.
538    ///
539    /// Note: This only panics in debug mode due to `debug_assert!`.
540    #[test]
541    #[cfg(debug_assertions)]
542    #[should_panic(expected = "tried to copy a slice into a `UninitRange` past its boundary")]
543    fn test_copy_from_slice_out_of_bounds() {
544        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
545        let mut range = builder.uninit_range(3);
546
547        // Try to copy 3 elements starting at offset 1 (would need 4 slots total).
548        range.copy_from_slice(1, &[1, 2, 3]);
549    }
550
551    /// Test that the unsafe contract of `finish` is documented and works correctly.
552    ///
553    /// This test demonstrates proper usage of the unsafe `finish` method.
554    #[test]
555    fn test_finish_unsafe_contract() {
556        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 5);
557        let mut range = builder.uninit_range(3);
558
559        // Set validity mask.
560        let mask = Mask::from_iter([true, true, false]);
561        // SAFETY: We're about to initialize the matching number of values.
562        unsafe {
563            range.append_mask(mask);
564        }
565
566        // Initialize all values.
567        range.copy_from_slice(0, &[10, 20, 30]);
568
569        // SAFETY: We have initialized all 3 values and set their validity.
570        unsafe {
571            range.finish();
572        }
573
574        let array = builder.finish_into_primitive();
575        assert_eq!(array.len(), 3);
576        assert_eq!(array.as_slice::<i32>(), &[10, 20, 30]);
577    }
578
579    #[test]
580    fn test_append_scalar() {
581        use crate::dtype::DType;
582        use crate::scalar::Scalar;
583
584        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
585
586        // Test appending a valid primitive value.
587        let scalar1 = Scalar::primitive(42i32, Nullability::Nullable);
588        builder.append_scalar(&scalar1).unwrap();
589
590        // Test appending another value.
591        let scalar2 = Scalar::primitive(84i32, Nullability::Nullable);
592        builder.append_scalar(&scalar2).unwrap();
593
594        // Test appending null value.
595        let null_scalar = Scalar::null(DType::Primitive(
596            crate::dtype::PType::I32,
597            Nullability::Nullable,
598        ));
599        builder.append_scalar(&null_scalar).unwrap();
600
601        let array = builder.finish_into_primitive();
602        assert_eq!(array.len(), 3);
603
604        // Check actual values.
605        let values = array.as_slice::<i32>();
606        assert_eq!(values[0], 42);
607        assert_eq!(values[1], 84);
608        // values[2] might be any value since it's null.
609
610        // Check validity - first two should be valid, third should be null.
611        use crate::vtable::ValidityHelper;
612        assert!(array.validity().is_valid(0).unwrap());
613        assert!(array.validity().is_valid(1).unwrap());
614        assert!(!array.validity().is_valid(2).unwrap());
615
616        // Test wrong dtype error.
617        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
618        let wrong_scalar = Scalar::from(true);
619        assert!(builder.append_scalar(&wrong_scalar).is_err());
620    }
621}