vortex_array/builders/
primitive.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::mem::MaybeUninit;
6
7use vortex_buffer::BufferMut;
8use vortex_dtype::{DType, NativePType, Nullability};
9use vortex_error::{VortexResult, vortex_ensure};
10use vortex_mask::Mask;
11use vortex_scalar::{PrimitiveScalar, Scalar};
12
13use crate::arrays::PrimitiveArray;
14use crate::builders::{ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyNullBufferBuilder};
15use crate::canonical::{Canonical, ToCanonical};
16use crate::{Array, ArrayRef, IntoArray};
17
18/// The builder for building a [`PrimitiveArray`], parametrized by the `PType`.
19pub struct PrimitiveBuilder<T> {
20    dtype: DType,
21    values: BufferMut<T>,
22    nulls: LazyNullBufferBuilder,
23}
24
25impl<T: NativePType> PrimitiveBuilder<T> {
26    /// Creates a new `PrimitiveBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
27    pub fn new(nullability: Nullability) -> Self {
28        Self::with_capacity(nullability, DEFAULT_BUILDER_CAPACITY)
29    }
30
31    /// Creates a new `PrimitiveBuilder` with the given `capacity`.
32    pub fn with_capacity(nullability: Nullability, capacity: usize) -> Self {
33        Self {
34            values: BufferMut::with_capacity(capacity),
35            nulls: LazyNullBufferBuilder::new(capacity),
36            dtype: DType::Primitive(T::PTYPE, nullability),
37        }
38    }
39
40    /// Appends a primitive `value` to the builder.
41    pub fn append_value(&mut self, value: T) {
42        self.values.push(value);
43        self.nulls.append_non_null();
44    }
45
46    /// Returns the raw primitive values in this builder as a slice.
47    pub fn values(&self) -> &[T] {
48        self.values.as_ref()
49    }
50
51    /// Create a new handle to the next `len` uninitialized values in the builder.
52    ///
53    /// All reads/writes through the handle to the values buffer or the validity buffer will operate
54    /// on indices relative to the start of the range.
55    ///
56    /// # Panics
57    ///
58    /// Panics if `len` is 0 or if the current length of the builder plus `len` would exceed the
59    /// capacity of the builder's memory.
60    ///
61    /// ## Example
62    ///
63    /// ```
64    /// use std::mem::MaybeUninit;
65    /// use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder};
66    /// use vortex_dtype::Nullability;
67    ///
68    /// // Create a new builder.
69    /// let mut builder: PrimitiveBuilder<i32> =
70    ///     PrimitiveBuilder::with_capacity(Nullability::NonNullable, 5);
71    ///
72    /// // Populate the values.
73    /// let mut uninit_range = builder.uninit_range(5);
74    /// uninit_range.copy_from_slice(0, &[0, 1, 2, 3, 4]);
75    ///
76    /// // SAFETY: We have initialized all 5 values in the range, and since the array builder is
77    /// // non-nullable, we don't need to set any null bits.
78    /// unsafe { uninit_range.finish(); }
79    ///
80    /// let built = builder.finish_into_primitive();
81    ///
82    /// assert_eq!(built.as_slice::<i32>(), &[0i32, 1, 2, 3, 4]);
83    /// ```
84    pub fn uninit_range(&mut self, len: usize) -> UninitRange<'_, T> {
85        assert_ne!(0, len, "cannot create an uninit range of length 0");
86
87        let current_len = self.values.len();
88        assert!(
89            current_len + len <= self.values.capacity(),
90            "uninit_range of len {len} exceeds builder with length {} and capacity {}",
91            current_len,
92            self.values.capacity()
93        );
94
95        UninitRange { len, builder: self }
96    }
97
98    /// Finishes the builder directly into a [`PrimitiveArray`].
99    pub fn finish_into_primitive(&mut self) -> PrimitiveArray {
100        let validity = self
101            .nulls
102            .finish_with_nullability(self.dtype().nullability());
103
104        PrimitiveArray::new(std::mem::take(&mut self.values).freeze(), validity)
105    }
106
107    /// Extends the primitive array with an iterator.
108    pub fn extend_with_iterator(&mut self, iter: impl IntoIterator<Item = T>, mask: Mask) {
109        self.values.extend(iter);
110        self.nulls.append_validity_mask(mask);
111    }
112}
113
114impl<T: NativePType> ArrayBuilder for PrimitiveBuilder<T> {
115    fn as_any(&self) -> &dyn Any {
116        self
117    }
118
119    fn as_any_mut(&mut self) -> &mut dyn Any {
120        self
121    }
122
123    fn dtype(&self) -> &DType {
124        &self.dtype
125    }
126
127    fn len(&self) -> usize {
128        self.values.len()
129    }
130
131    fn append_zeros(&mut self, n: usize) {
132        self.values.push_n(T::default(), n);
133        self.nulls.append_n_non_nulls(n);
134    }
135
136    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
137        self.values.push_n(T::default(), n);
138        self.nulls.append_n_nulls(n);
139    }
140
141    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
142        vortex_ensure!(
143            scalar.dtype() == self.dtype(),
144            "PrimitiveBuilder expected scalar with dtype {:?}, got {:?}",
145            self.dtype(),
146            scalar.dtype()
147        );
148
149        let primitive_scalar = PrimitiveScalar::try_from(scalar)?;
150        match primitive_scalar.pvalue() {
151            Some(pv) => self.append_value(pv.as_primitive::<T>()),
152            None => self.append_null(),
153        }
154
155        Ok(())
156    }
157
158    unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
159        let array = array.to_primitive();
160
161        // This should be checked in `extend_from_array` but we can check it again.
162        debug_assert_eq!(
163            array.ptype(),
164            T::PTYPE,
165            "Cannot extend from array with different ptype"
166        );
167
168        self.values.extend_from_slice(array.as_slice::<T>());
169        self.nulls.append_validity_mask(array.validity_mask());
170    }
171
172    fn reserve_exact(&mut self, additional: usize) {
173        self.values.reserve(additional);
174        self.nulls.reserve_exact(additional);
175    }
176
177    unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
178        self.nulls = LazyNullBufferBuilder::new(validity.len());
179        self.nulls.append_validity_mask(validity);
180    }
181
182    fn finish(&mut self) -> ArrayRef {
183        self.finish_into_primitive().into_array()
184    }
185
186    fn finish_into_canonical(&mut self) -> Canonical {
187        Canonical::Primitive(self.finish_into_primitive())
188    }
189}
190
191/// A range of uninitialized values in the primitive builder that can be filled.
192pub struct UninitRange<'a, T> {
193    /// The length of the uninitialized range.
194    ///
195    /// This is guaranteed to be within the memory capacity of the builder.
196    len: usize,
197
198    /// A mutable reference to the builder.
199    ///
200    /// Since this is a mutable reference, we can guarantee that nothing else can modify the builder
201    /// while this `UninitRange` exists.
202    builder: &'a mut PrimitiveBuilder<T>,
203}
204
205impl<T> UninitRange<'_, T> {
206    /// Returns the length of this uninitialized range.
207    #[inline]
208    pub fn len(&self) -> usize {
209        self.len
210    }
211
212    /// Returns true if this range has zero length.
213    #[inline]
214    pub fn is_empty(&self) -> bool {
215        self.len == 0
216    }
217
218    /// Set a value at the given index within this range.
219    ///
220    /// # Panics
221    ///
222    /// Panics if the index is out of bounds.
223    #[inline]
224    pub fn set_value(&mut self, index: usize, value: T) {
225        assert!(index < self.len, "index out of bounds");
226        let spare = self.builder.values.spare_capacity_mut();
227        spare[index] = MaybeUninit::new(value);
228    }
229
230    /// Append a [`Mask`] to this builder's null buffer.
231    ///
232    /// # Panics
233    ///
234    /// Panics if the mask length is not equal to the the length of the current `UninitRange`.
235    ///
236    /// # Safety
237    ///
238    /// - The caller must ensure that they safely initialize `mask.len()` primitive values via
239    ///   [`UninitRange::copy_from_slice`].
240    /// - The caller must also ensure that they only call this method once.
241    pub unsafe fn append_mask(&mut self, mask: Mask) {
242        assert_eq!(
243            mask.len(),
244            self.len,
245            "Tried to append a mask to an `UninitRange` that was beyond the allowed range"
246        );
247
248        // TODO(connor): Ideally, we would call this function `set_mask` and directly set all of the
249        // bits (so that we can call this multiple times), but the underlying `BooleanBuffer` does
250        // not have an easy way to do this correctly.
251
252        self.builder.nulls.append_validity_mask(mask);
253    }
254
255    /// Set a validity bit at the given index.
256    ///
257    /// The index is relative to the start of this range (not relative to the values already in the
258    /// builder).
259    ///
260    /// Note that this will have no effect if the builder is non-nullable.
261    pub fn set_validity_bit(&mut self, index: usize, v: bool) {
262        assert!(index < self.len, "set_bit index out of bounds");
263        // Note that this won't panic because we can only create an `UninitRange` within the
264        // capacity of the builder (it will not automatically resize).
265        let absolute_index = self.builder.values.len() + index;
266        self.builder.nulls.set_bit(absolute_index, v);
267    }
268
269    /// Set values from an initialized range.
270    ///
271    /// Note that the input `offset` should be an offset relative to the local `UninitRange`, not
272    /// the entire `PrimitiveBuilder`.
273    pub fn copy_from_slice(&mut self, local_offset: usize, src: &[T])
274    where
275        T: Copy,
276    {
277        debug_assert!(
278            local_offset + src.len() <= self.len,
279            "tried to copy a slice into a `UninitRange` past its boundary"
280        );
281
282        // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout.
283        let uninit_src: &[MaybeUninit<T>] = unsafe { std::mem::transmute(src) };
284
285        // Note: spare_capacity_mut() returns the spare capacity starting from the current length,
286        // so we just use local_offset directly.
287        let dst =
288            &mut self.builder.values.spare_capacity_mut()[local_offset..local_offset + src.len()];
289        dst.copy_from_slice(uninit_src);
290    }
291
292    /// Get a mutable slice of uninitialized memory at the specified offset within this range.
293    ///
294    /// Note that the offsets are relative to this local range, not to the values already in the
295    /// builder.
296    ///
297    /// # Safety
298    ///
299    /// The caller must ensure that they properly initialize the returned memory before calling
300    /// `finish()` on this range.
301    ///
302    /// # Panics
303    ///
304    /// Panics if `offset + len` exceeds the range bounds.
305    pub unsafe fn slice_uninit_mut(&mut self, offset: usize, len: usize) -> &mut [MaybeUninit<T>] {
306        assert!(
307            offset + len <= self.len,
308            "slice_uninit_mut: offset {} + len {} exceeds range length {}",
309            offset,
310            len,
311            self.len
312        );
313        &mut self.builder.values.spare_capacity_mut()[offset..offset + len]
314    }
315
316    /// Finish building this range, marking it as initialized and advancing the length of the
317    /// underlying values buffer.
318    ///
319    /// # Safety
320    ///
321    /// The caller must ensure that they have safely initialized all `len` values via
322    /// [`copy_from_slice()`] or [`set_value()`], as well as correctly set all of the null bits via
323    /// [`set_validity_bit()`] or [`append_mask()`] if the builder is nullable.
324    ///
325    /// [`copy_from_slice()`]: UninitRange::copy_from_slice
326    /// [`set_value()`]: UninitRange::set_value
327    /// [`set_validity_bit()`]: UninitRange::set_validity_bit
328    /// [`append_mask()`]: UninitRange::append_mask
329    pub unsafe fn finish(self) {
330        // SAFETY: constructor enforces that current length + len does not exceed the capacity of the array.
331        let new_len = self.builder.values.len() + self.len;
332        unsafe { self.builder.values.set_len(new_len) };
333    }
334}
335
336#[cfg(test)]
337mod tests {
338    use super::*;
339
340    /// REGRESSION TEST: This test verifies that multiple sequential ranges have correct offsets.
341    ///
342    /// This would have caught the `Deref` bug where it always returned from the start of the
343    /// buffer.
344    #[test]
345    fn test_multiple_uninit_ranges_correct_offsets() {
346        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
347
348        // First range.
349        let mut range1 = builder.uninit_range(3);
350        range1.copy_from_slice(0, &[1, 2, 3]);
351
352        // SAFETY: We initialized all 3 values.
353        unsafe {
354            range1.finish();
355        }
356
357        // Verify the builder now has these values.
358        assert_eq!(builder.values(), &[1, 2, 3]);
359
360        // Second range - this would fail with the old Deref implementation.
361        let mut range2 = builder.uninit_range(2);
362
363        // Set values using copy_from_slice.
364        range2.copy_from_slice(0, &[4, 5]);
365
366        // SAFETY: We initialized both values.
367        unsafe {
368            range2.finish();
369        }
370
371        // Verify the builder now has all 5 values.
372        assert_eq!(builder.values(), &[1, 2, 3, 4, 5]);
373
374        let array = builder.finish_into_primitive();
375        assert_eq!(array.as_slice::<i32>(), &[1, 2, 3, 4, 5]);
376    }
377
378    /// REGRESSION TEST: This test verifies that `append_mask` was correctly moved from
379    /// `PrimitiveBuilder` to `UninitRange`.
380    ///
381    /// The old API had `append_mask` on the builder, which was confusing when used with ranges.
382    /// This test ensures the new API works correctly.
383    #[test]
384    fn test_append_mask_on_uninit_range() {
385        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 5);
386        let mut range = builder.uninit_range(3);
387
388        // Create a mask for 3 values.
389        let mask = Mask::from_iter([true, false, true]);
390
391        // SAFETY: We're about to initialize the values.
392        unsafe {
393            range.append_mask(mask);
394        }
395
396        // Initialize the values.
397        range.copy_from_slice(0, &[10, 20, 30]);
398
399        // SAFETY: We've initialized all values and set the mask.
400        unsafe {
401            range.finish();
402        }
403
404        let array = builder.finish_into_primitive();
405        assert_eq!(array.len(), 3);
406        // Check validity using scalar_at - nulls will return is_null() = true.
407        assert!(!array.scalar_at(0).is_null());
408        assert!(array.scalar_at(1).is_null());
409        assert!(!array.scalar_at(2).is_null());
410    }
411
412    /// REGRESSION TEST: This test verifies that `append_mask` validates the mask length.
413    ///
414    /// This ensures that masks can only be appended if they match the range length.
415    #[test]
416    #[should_panic(
417        expected = "Tried to append a mask to an `UninitRange` that was beyond the allowed range"
418    )]
419    fn test_append_mask_wrong_length_panics() {
420        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
421        let mut range = builder.uninit_range(5);
422
423        // Try to append a mask with wrong length (3 instead of 5).
424        let wrong_mask = Mask::from_iter([true, false, true]);
425
426        // SAFETY: This is expected to panic due to length mismatch.
427        unsafe {
428            range.append_mask(wrong_mask);
429        }
430    }
431
432    /// Test that `copy_from_slice` works correctly with different offsets.
433    ///
434    /// This verifies the new simplified API without the redundant `len` parameter.
435    #[test]
436    fn test_copy_from_slice_with_offsets() {
437        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
438        let mut range = builder.uninit_range(6);
439
440        // Copy to different offsets.
441        range.copy_from_slice(0, &[1, 2]);
442        range.copy_from_slice(2, &[3, 4]);
443        range.copy_from_slice(4, &[5, 6]);
444
445        // SAFETY: We've initialized all 6 values.
446        unsafe {
447            range.finish();
448        }
449
450        let array = builder.finish_into_primitive();
451        assert_eq!(array.as_slice::<i32>(), &[1, 2, 3, 4, 5, 6]);
452    }
453
454    /// Test that `set_bit` uses relative indexing within the range.
455    ///
456    /// Note: `set_bit` requires the null buffer to already be initialized, so we first
457    /// use `append_mask` to set up the buffer, then demonstrate that `set_bit` can
458    /// modify individual bits with relative indexing.
459    #[test]
460    fn test_set_bit_relative_indexing() {
461        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
462
463        // First add some values to the builder.
464        builder.append_value(100);
465        builder.append_value(200);
466
467        // Create a range for new values.
468        let mut range = builder.uninit_range(3);
469
470        // Use append_mask to initialize the validity buffer for this range.
471        let initial_mask = Mask::from_iter([false, false, false]);
472        // SAFETY: We're about to initialize the values.
473        unsafe {
474            range.append_mask(initial_mask);
475        }
476
477        // Now we can use set_bit to modify individual bits with relative indexing.
478        range.set_validity_bit(0, true); // Change first bit to valid
479        range.set_validity_bit(2, true); // Change third bit to valid
480        // Leave middle bit as false (null)
481
482        // Initialize the values.
483        range.copy_from_slice(0, &[10, 20, 30]);
484
485        // SAFETY: We've initialized all 3 values and set their validity.
486        unsafe {
487            range.finish();
488        }
489
490        let array = builder.finish_into_primitive();
491
492        // Verify the total length and values.
493        assert_eq!(array.len(), 5);
494        assert_eq!(array.as_slice::<i32>(), &[100, 200, 10, 20, 30]);
495
496        // Check validity - the first two should be valid (from append_value).
497        assert!(!array.scalar_at(0).is_null()); // initial value 100
498        assert!(!array.scalar_at(1).is_null()); // initial value 200
499
500        // Check the range items with modified validity.
501        assert!(!array.scalar_at(2).is_null()); // range index 0 - set to valid
502        assert!(array.scalar_at(3).is_null()); // range index 1 - left as null
503        assert!(!array.scalar_at(4).is_null()); // range index 2 - set to valid
504    }
505
506    /// Test that creating a zero-length uninit range panics.
507    #[test]
508    #[should_panic(expected = "cannot create an uninit range of length 0")]
509    fn test_zero_length_uninit_range_panics() {
510        let mut builder = PrimitiveBuilder::<i32>::new(Nullability::NonNullable);
511        let _range = builder.uninit_range(0);
512    }
513
514    /// Test that creating an uninit range exceeding capacity panics.
515    #[test]
516    #[should_panic(
517        expected = "uninit_range of len 10 exceeds builder with length 0 and capacity 6"
518    )]
519    fn test_uninit_range_exceeds_capacity_panics() {
520        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 5);
521        let _range = builder.uninit_range(10);
522    }
523
524    /// Test that `copy_from_slice` debug asserts on out-of-bounds access.
525    ///
526    /// Note: This only panics in debug mode due to `debug_assert!`.
527    #[test]
528    #[cfg(debug_assertions)]
529    #[should_panic(expected = "tried to copy a slice into a `UninitRange` past its boundary")]
530    fn test_copy_from_slice_out_of_bounds() {
531        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
532        let mut range = builder.uninit_range(3);
533
534        // Try to copy 3 elements starting at offset 1 (would need 4 slots total).
535        range.copy_from_slice(1, &[1, 2, 3]);
536    }
537
538    /// Test that the unsafe contract of `finish` is documented and works correctly.
539    ///
540    /// This test demonstrates proper usage of the unsafe `finish` method.
541    #[test]
542    fn test_finish_unsafe_contract() {
543        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 5);
544        let mut range = builder.uninit_range(3);
545
546        // Set validity mask.
547        let mask = Mask::from_iter([true, true, false]);
548        // SAFETY: We're about to initialize the matching number of values.
549        unsafe {
550            range.append_mask(mask);
551        }
552
553        // Initialize all values.
554        range.copy_from_slice(0, &[10, 20, 30]);
555
556        // SAFETY: We have initialized all 3 values and set their validity.
557        unsafe {
558            range.finish();
559        }
560
561        let array = builder.finish_into_primitive();
562        assert_eq!(array.len(), 3);
563        assert_eq!(array.as_slice::<i32>(), &[10, 20, 30]);
564    }
565
566    #[test]
567    fn test_append_scalar() {
568        use vortex_dtype::DType;
569        use vortex_scalar::Scalar;
570
571        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
572
573        // Test appending a valid primitive value.
574        let scalar1 = Scalar::primitive(42i32, Nullability::Nullable);
575        builder.append_scalar(&scalar1).unwrap();
576
577        // Test appending another value.
578        let scalar2 = Scalar::primitive(84i32, Nullability::Nullable);
579        builder.append_scalar(&scalar2).unwrap();
580
581        // Test appending null value.
582        let null_scalar = Scalar::null(DType::Primitive(
583            vortex_dtype::PType::I32,
584            Nullability::Nullable,
585        ));
586        builder.append_scalar(&null_scalar).unwrap();
587
588        let array = builder.finish_into_primitive();
589        assert_eq!(array.len(), 3);
590
591        // Check actual values.
592        let values = array.as_slice::<i32>();
593        assert_eq!(values[0], 42);
594        assert_eq!(values[1], 84);
595        // values[2] might be any value since it's null.
596
597        // Check validity - first two should be valid, third should be null.
598        use crate::vtable::ValidityHelper;
599        assert!(array.validity().is_valid(0));
600        assert!(array.validity().is_valid(1));
601        assert!(!array.validity().is_valid(2));
602
603        // Test wrong dtype error.
604        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
605        let wrong_scalar = Scalar::from(true);
606        assert!(builder.append_scalar(&wrong_scalar).is_err());
607    }
608}