Skip to main content

vortex_array/builders/
primitive.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::mem::MaybeUninit;
6
7use vortex_buffer::BufferMut;
8use vortex_error::VortexExpect;
9use vortex_error::VortexResult;
10use vortex_error::vortex_ensure;
11use vortex_mask::Mask;
12
13use crate::ArrayRef;
14use crate::IntoArray;
15use crate::arrays::PrimitiveArray;
16use crate::builders::ArrayBuilder;
17use crate::builders::DEFAULT_BUILDER_CAPACITY;
18use crate::builders::LazyBitBufferBuilder;
19use crate::canonical::Canonical;
20use crate::canonical::ToCanonical;
21use crate::dtype::DType;
22use crate::dtype::NativePType;
23use crate::dtype::Nullability;
24use crate::scalar::Scalar;
25
26/// The builder for building a [`PrimitiveArray`], parametrized by the `PType`.
27pub struct PrimitiveBuilder<T> {
28    dtype: DType,
29    values: BufferMut<T>,
30    nulls: LazyBitBufferBuilder,
31}
32
33impl<T: NativePType> PrimitiveBuilder<T> {
34    /// Creates a new `PrimitiveBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
35    pub fn new(nullability: Nullability) -> Self {
36        Self::with_capacity(nullability, DEFAULT_BUILDER_CAPACITY)
37    }
38
39    /// Creates a new `PrimitiveBuilder` with the given `capacity`.
40    pub fn with_capacity(nullability: Nullability, capacity: usize) -> Self {
41        Self {
42            values: BufferMut::with_capacity(capacity),
43            nulls: LazyBitBufferBuilder::new(capacity),
44            dtype: DType::Primitive(T::PTYPE, nullability),
45        }
46    }
47
48    /// Appends a primitive `value` to the builder.
49    pub fn append_value(&mut self, value: T) {
50        self.values.push(value);
51        self.nulls.append_non_null();
52    }
53
54    /// Appends `n` copies of `value` as non-null entries, directly writing into the buffer.
55    pub fn append_n_values(&mut self, value: T, n: usize) {
56        self.values.push_n(value, n);
57        self.nulls.append_n_non_nulls(n);
58    }
59
60    /// Returns the raw primitive values in this builder as a slice.
61    pub fn values(&self) -> &[T] {
62        self.values.as_ref()
63    }
64
65    /// Create a new handle to the next `len` uninitialized values in the builder.
66    ///
67    /// All reads/writes through the handle to the values buffer or the validity buffer will operate
68    /// on indices relative to the start of the range.
69    ///
70    /// # Panics
71    ///
72    /// Panics if `len` is 0 or if the current length of the builder plus `len` would exceed the
73    /// capacity of the builder's memory.
74    ///
75    /// ## Example
76    ///
77    /// ```
78    /// use std::mem::MaybeUninit;
79    /// use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder};
80    /// use vortex_array::dtype::Nullability;
81    ///
82    /// // Create a new builder.
83    /// let mut builder: PrimitiveBuilder<i32> =
84    ///     PrimitiveBuilder::with_capacity(Nullability::NonNullable, 5);
85    ///
86    /// // Populate the values.
87    /// let mut uninit_range = builder.uninit_range(5);
88    /// uninit_range.copy_from_slice(0, &[0, 1, 2, 3, 4]);
89    ///
90    /// // SAFETY: We have initialized all 5 values in the range, and since the array builder is
91    /// // non-nullable, we don't need to set any null bits.
92    /// unsafe { uninit_range.finish(); }
93    ///
94    /// let built = builder.finish_into_primitive();
95    ///
96    /// assert_eq!(built.as_slice::<i32>(), &[0i32, 1, 2, 3, 4]);
97    /// ```
98    pub fn uninit_range(&mut self, len: usize) -> UninitRange<'_, T> {
99        assert_ne!(0, len, "cannot create an uninit range of length 0");
100
101        let current_len = self.values.len();
102        assert!(
103            current_len + len <= self.values.capacity(),
104            "uninit_range of len {len} exceeds builder with length {} and capacity {}",
105            current_len,
106            self.values.capacity()
107        );
108
109        UninitRange { len, builder: self }
110    }
111
112    /// Finishes the builder directly into a [`PrimitiveArray`].
113    pub fn finish_into_primitive(&mut self) -> PrimitiveArray {
114        let validity = self
115            .nulls
116            .finish_with_nullability(self.dtype().nullability());
117
118        PrimitiveArray::new(std::mem::take(&mut self.values).freeze(), validity)
119    }
120
121    /// Extends the primitive array with an iterator.
122    pub fn extend_with_iterator(&mut self, iter: impl IntoIterator<Item = T>, mask: Mask) {
123        self.values.extend(iter);
124        self.nulls.append_validity_mask(mask);
125    }
126}
127
128impl<T: NativePType> ArrayBuilder for PrimitiveBuilder<T> {
129    fn as_any(&self) -> &dyn Any {
130        self
131    }
132
133    fn as_any_mut(&mut self) -> &mut dyn Any {
134        self
135    }
136
137    fn dtype(&self) -> &DType {
138        &self.dtype
139    }
140
141    fn len(&self) -> usize {
142        self.values.len()
143    }
144
145    fn append_zeros(&mut self, n: usize) {
146        self.values.push_n(T::default(), n);
147        self.nulls.append_n_non_nulls(n);
148    }
149
150    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
151        self.values.push_n(T::default(), n);
152        self.nulls.append_n_nulls(n);
153    }
154
155    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
156        vortex_ensure!(
157            scalar.dtype() == self.dtype(),
158            "PrimitiveBuilder expected scalar with dtype {}, got {}",
159            self.dtype(),
160            scalar.dtype()
161        );
162
163        if let Some(pv) = scalar.as_primitive().pvalue() {
164            self.append_value(pv.cast::<T>()?)
165        } else {
166            self.append_null()
167        }
168
169        Ok(())
170    }
171
172    unsafe fn extend_from_array_unchecked(&mut self, array: &ArrayRef) {
173        let array = array.to_primitive();
174
175        // This should be checked in `extend_from_array` but we can check it again.
176        debug_assert_eq!(
177            array.ptype(),
178            T::PTYPE,
179            "Cannot extend from array with different ptype"
180        );
181
182        self.values.extend_from_slice(array.as_slice::<T>());
183        self.nulls.append_validity_mask(
184            array
185                .validity_mask()
186                .vortex_expect("validity_mask in extend_from_array_unchecked"),
187        );
188    }
189
190    fn reserve_exact(&mut self, additional: usize) {
191        self.values.reserve(additional);
192        self.nulls.reserve_exact(additional);
193    }
194
195    unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
196        self.nulls = LazyBitBufferBuilder::new(validity.len());
197        self.nulls.append_validity_mask(validity);
198    }
199
200    fn finish(&mut self) -> ArrayRef {
201        self.finish_into_primitive().into_array()
202    }
203
204    fn finish_into_canonical(&mut self) -> Canonical {
205        Canonical::Primitive(self.finish_into_primitive())
206    }
207}
208
209/// A range of uninitialized values in the primitive builder that can be filled.
210pub struct UninitRange<'a, T> {
211    /// The length of the uninitialized range.
212    ///
213    /// This is guaranteed to be within the memory capacity of the builder.
214    len: usize,
215
216    /// A mutable reference to the builder.
217    ///
218    /// Since this is a mutable reference, we can guarantee that nothing else can modify the builder
219    /// while this `UninitRange` exists.
220    builder: &'a mut PrimitiveBuilder<T>,
221}
222
223impl<T> UninitRange<'_, T> {
224    /// Returns the length of this uninitialized range.
225    #[inline]
226    pub fn len(&self) -> usize {
227        self.len
228    }
229
230    /// Returns true if this range has zero length.
231    #[inline]
232    pub fn is_empty(&self) -> bool {
233        self.len == 0
234    }
235
236    /// Set a value at the given index within this range.
237    ///
238    /// # Panics
239    ///
240    /// Panics if the index is out of bounds.
241    #[inline]
242    pub fn set_value(&mut self, index: usize, value: T) {
243        assert!(index < self.len, "index out of bounds");
244        let spare = self.builder.values.spare_capacity_mut();
245        spare[index] = MaybeUninit::new(value);
246    }
247
248    /// Append a [`Mask`] to this builder's null buffer.
249    ///
250    /// # Panics
251    ///
252    /// Panics if the mask length is not equal to the the length of the current `UninitRange`.
253    ///
254    /// # Safety
255    ///
256    /// - The caller must ensure that they safely initialize `mask.len()` primitive values via
257    ///   [`UninitRange::copy_from_slice`].
258    /// - The caller must also ensure that they only call this method once.
259    pub unsafe fn append_mask(&mut self, mask: Mask) {
260        assert_eq!(
261            mask.len(),
262            self.len,
263            "Tried to append a mask to an `UninitRange` that was beyond the allowed range"
264        );
265
266        // TODO(connor): Ideally, we would call this function `set_mask` and directly set all of the
267        // bits (so that we can call this multiple times), but the underlying `BooleanBuffer` does
268        // not have an easy way to do this correctly.
269
270        self.builder.nulls.append_validity_mask(mask);
271    }
272
273    /// Set a validity bit at the given index.
274    ///
275    /// The index is relative to the start of this range (not relative to the values already in the
276    /// builder).
277    ///
278    /// Note that this will have no effect if the builder is non-nullable.
279    pub fn set_validity_bit(&mut self, index: usize, v: bool) {
280        assert!(index < self.len, "set_bit index out of bounds");
281        // Note that this won't panic because we can only create an `UninitRange` within the
282        // capacity of the builder (it will not automatically resize).
283        let absolute_index = self.builder.values.len() + index;
284        self.builder.nulls.set_bit(absolute_index, v);
285    }
286
287    /// Set values from an initialized range.
288    ///
289    /// Note that the input `offset` should be an offset relative to the local `UninitRange`, not
290    /// the entire `PrimitiveBuilder`.
291    pub fn copy_from_slice(&mut self, local_offset: usize, src: &[T])
292    where
293        T: Copy,
294    {
295        debug_assert!(
296            local_offset + src.len() <= self.len,
297            "tried to copy a slice into a `UninitRange` past its boundary"
298        );
299
300        // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout.
301        let uninit_src: &[MaybeUninit<T>] = unsafe { std::mem::transmute(src) };
302
303        // Note: spare_capacity_mut() returns the spare capacity starting from the current length,
304        // so we just use local_offset directly.
305        let dst =
306            &mut self.builder.values.spare_capacity_mut()[local_offset..local_offset + src.len()];
307        dst.copy_from_slice(uninit_src);
308    }
309
310    /// Get a mutable slice of uninitialized memory at the specified offset within this range.
311    ///
312    /// Note that the offsets are relative to this local range, not to the values already in the
313    /// builder.
314    ///
315    /// # Safety
316    ///
317    /// The caller must ensure that they properly initialize the returned memory before calling
318    /// `finish()` on this range.
319    ///
320    /// # Panics
321    ///
322    /// Panics if `offset + len` exceeds the range bounds.
323    pub unsafe fn slice_uninit_mut(&mut self, offset: usize, len: usize) -> &mut [MaybeUninit<T>] {
324        assert!(
325            offset + len <= self.len,
326            "slice_uninit_mut: offset {} + len {} exceeds range length {}",
327            offset,
328            len,
329            self.len
330        );
331        &mut self.builder.values.spare_capacity_mut()[offset..offset + len]
332    }
333
334    /// Finish building this range, marking it as initialized and advancing the length of the
335    /// underlying values buffer.
336    ///
337    /// # Safety
338    ///
339    /// The caller must ensure that they have safely initialized all `len` values via
340    /// [`copy_from_slice()`] or [`set_value()`], as well as correctly set all of the null bits via
341    /// [`set_validity_bit()`] or [`append_mask()`] if the builder is nullable.
342    ///
343    /// [`copy_from_slice()`]: UninitRange::copy_from_slice
344    /// [`set_value()`]: UninitRange::set_value
345    /// [`set_validity_bit()`]: UninitRange::set_validity_bit
346    /// [`append_mask()`]: UninitRange::append_mask
347    pub unsafe fn finish(self) {
348        // SAFETY: constructor enforces that current length + len does not exceed the capacity of the array.
349        let new_len = self.builder.values.len() + self.len;
350        unsafe { self.builder.values.set_len(new_len) };
351    }
352}
353
354#[cfg(test)]
355mod tests {
356    use super::*;
357    use crate::assert_arrays_eq;
358
359    /// REGRESSION TEST: This test verifies that multiple sequential ranges have correct offsets.
360    ///
361    /// This would have caught the `Deref` bug where it always returned from the start of the
362    /// buffer.
363    #[test]
364    fn test_multiple_uninit_ranges_correct_offsets() {
365        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
366
367        // First range.
368        let mut range1 = builder.uninit_range(3);
369        range1.copy_from_slice(0, &[1, 2, 3]);
370
371        // SAFETY: We initialized all 3 values.
372        unsafe {
373            range1.finish();
374        }
375
376        // Verify the builder now has these values.
377        assert_eq!(builder.values(), &[1, 2, 3]);
378
379        // Second range - this would fail with the old Deref implementation.
380        let mut range2 = builder.uninit_range(2);
381
382        // Set values using copy_from_slice.
383        range2.copy_from_slice(0, &[4, 5]);
384
385        // SAFETY: We initialized both values.
386        unsafe {
387            range2.finish();
388        }
389
390        // Verify the builder now has all 5 values.
391        assert_eq!(builder.values(), &[1, 2, 3, 4, 5]);
392
393        let array = builder.finish_into_primitive();
394        assert_arrays_eq!(array, PrimitiveArray::from_iter([1i32, 2, 3, 4, 5]));
395    }
396
397    /// REGRESSION TEST: This test verifies that `append_mask` was correctly moved from
398    /// `PrimitiveBuilder` to `UninitRange`.
399    ///
400    /// The old API had `append_mask` on the builder, which was confusing when used with ranges.
401    /// This test ensures the new API works correctly.
402    #[test]
403    fn test_append_mask_on_uninit_range() {
404        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 5);
405        let mut range = builder.uninit_range(3);
406
407        // Create a mask for 3 values.
408        let mask = Mask::from_iter([true, false, true]);
409
410        // SAFETY: We're about to initialize the values.
411        unsafe {
412            range.append_mask(mask);
413        }
414
415        // Initialize the values.
416        range.copy_from_slice(0, &[10, 20, 30]);
417
418        // SAFETY: We've initialized all values and set the mask.
419        unsafe {
420            range.finish();
421        }
422
423        let array = builder.finish_into_primitive();
424        assert_eq!(array.len(), 3);
425        // Check validity using scalar_at - nulls will return is_null() = true.
426        assert!(!array.scalar_at(0).unwrap().is_null());
427        assert!(array.scalar_at(1).unwrap().is_null());
428        assert!(!array.scalar_at(2).unwrap().is_null());
429    }
430
431    /// REGRESSION TEST: This test verifies that `append_mask` validates the mask length.
432    ///
433    /// This ensures that masks can only be appended if they match the range length.
434    #[test]
435    #[should_panic(
436        expected = "Tried to append a mask to an `UninitRange` that was beyond the allowed range"
437    )]
438    fn test_append_mask_wrong_length_panics() {
439        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
440        let mut range = builder.uninit_range(5);
441
442        // Try to append a mask with wrong length (3 instead of 5).
443        let wrong_mask = Mask::from_iter([true, false, true]);
444
445        // SAFETY: This is expected to panic due to length mismatch.
446        unsafe {
447            range.append_mask(wrong_mask);
448        }
449    }
450
451    /// Test that `copy_from_slice` works correctly with different offsets.
452    ///
453    /// This verifies the new simplified API without the redundant `len` parameter.
454    #[test]
455    fn test_copy_from_slice_with_offsets() {
456        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
457        let mut range = builder.uninit_range(6);
458
459        // Copy to different offsets.
460        range.copy_from_slice(0, &[1, 2]);
461        range.copy_from_slice(2, &[3, 4]);
462        range.copy_from_slice(4, &[5, 6]);
463
464        // SAFETY: We've initialized all 6 values.
465        unsafe {
466            range.finish();
467        }
468
469        let array = builder.finish_into_primitive();
470        assert_arrays_eq!(array, PrimitiveArray::from_iter([1i32, 2, 3, 4, 5, 6]));
471    }
472
473    /// Test that `set_bit` uses relative indexing within the range.
474    ///
475    /// Note: `set_bit` requires the null buffer to already be initialized, so we first
476    /// use `append_mask` to set up the buffer, then demonstrate that `set_bit` can
477    /// modify individual bits with relative indexing.
478    #[test]
479    fn test_set_bit_relative_indexing() {
480        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
481
482        // First add some values to the builder.
483        builder.append_value(100);
484        builder.append_value(200);
485
486        // Create a range for new values.
487        let mut range = builder.uninit_range(3);
488
489        // Use append_mask to initialize the validity buffer for this range.
490        let initial_mask = Mask::from_iter([false, false, false]);
491        // SAFETY: We're about to initialize the values.
492        unsafe {
493            range.append_mask(initial_mask);
494        }
495
496        // Now we can use set_bit to modify individual bits with relative indexing.
497        range.set_validity_bit(0, true); // Change first bit to valid
498        range.set_validity_bit(2, true); // Change third bit to valid
499        // Leave middle bit as false (null)
500
501        // Initialize the values.
502        range.copy_from_slice(0, &[10, 20, 30]);
503
504        // SAFETY: We've initialized all 3 values and set their validity.
505        unsafe {
506            range.finish();
507        }
508
509        let array = builder.finish_into_primitive();
510
511        // Verify the total length and values.
512        assert_eq!(array.len(), 5);
513        assert_eq!(array.as_slice::<i32>(), &[100, 200, 10, 20, 30]);
514
515        // Check validity - the first two should be valid (from append_value).
516        assert!(!array.scalar_at(0).unwrap().is_null()); // initial value 100
517        assert!(!array.scalar_at(1).unwrap().is_null()); // initial value 200
518
519        // Check the range items with modified validity.
520        assert!(!array.scalar_at(2).unwrap().is_null()); // range index 0 - set to valid
521        assert!(array.scalar_at(3).unwrap().is_null()); // range index 1 - left as null
522        assert!(!array.scalar_at(4).unwrap().is_null()); // range index 2 - set to valid
523    }
524
525    /// Test that creating a zero-length uninit range panics.
526    #[test]
527    #[should_panic(expected = "cannot create an uninit range of length 0")]
528    fn test_zero_length_uninit_range_panics() {
529        let mut builder = PrimitiveBuilder::<i32>::new(Nullability::NonNullable);
530        let _range = builder.uninit_range(0);
531    }
532
533    /// Test that creating an uninit range exceeding capacity panics.
534    #[test]
535    #[should_panic(
536        expected = "uninit_range of len 10 exceeds builder with length 0 and capacity 6"
537    )]
538    fn test_uninit_range_exceeds_capacity_panics() {
539        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 5);
540        let _range = builder.uninit_range(10);
541    }
542
543    /// Test that `copy_from_slice` debug asserts on out-of-bounds access.
544    ///
545    /// Note: This only panics in debug mode due to `debug_assert!`.
546    #[test]
547    #[cfg(debug_assertions)]
548    #[should_panic(expected = "tried to copy a slice into a `UninitRange` past its boundary")]
549    fn test_copy_from_slice_out_of_bounds() {
550        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
551        let mut range = builder.uninit_range(3);
552
553        // Try to copy 3 elements starting at offset 1 (would need 4 slots total).
554        range.copy_from_slice(1, &[1, 2, 3]);
555    }
556
557    /// Test that the unsafe contract of `finish` is documented and works correctly.
558    ///
559    /// This test demonstrates proper usage of the unsafe `finish` method.
560    #[test]
561    fn test_finish_unsafe_contract() {
562        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 5);
563        let mut range = builder.uninit_range(3);
564
565        // Set validity mask.
566        let mask = Mask::from_iter([true, true, false]);
567        // SAFETY: We're about to initialize the matching number of values.
568        unsafe {
569            range.append_mask(mask);
570        }
571
572        // Initialize all values.
573        range.copy_from_slice(0, &[10, 20, 30]);
574
575        // SAFETY: We have initialized all 3 values and set their validity.
576        unsafe {
577            range.finish();
578        }
579
580        let array = builder.finish_into_primitive();
581        assert_eq!(array.len(), 3);
582        assert_eq!(array.as_slice::<i32>(), &[10, 20, 30]);
583    }
584
585    #[test]
586    fn test_append_scalar() {
587        use crate::dtype::DType;
588        use crate::scalar::Scalar;
589
590        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
591
592        // Test appending a valid primitive value.
593        let scalar1 = Scalar::primitive(42i32, Nullability::Nullable);
594        builder.append_scalar(&scalar1).unwrap();
595
596        // Test appending another value.
597        let scalar2 = Scalar::primitive(84i32, Nullability::Nullable);
598        builder.append_scalar(&scalar2).unwrap();
599
600        // Test appending null value.
601        let null_scalar = Scalar::null(DType::Primitive(
602            crate::dtype::PType::I32,
603            Nullability::Nullable,
604        ));
605        builder.append_scalar(&null_scalar).unwrap();
606
607        let array = builder.finish_into_primitive();
608        assert_eq!(array.len(), 3);
609
610        // Check actual values.
611        let values = array.as_slice::<i32>();
612        assert_eq!(values[0], 42);
613        assert_eq!(values[1], 84);
614        // values[2] might be any value since it's null.
615
616        // Check validity - first two should be valid, third should be null.
617        use crate::vtable::ValidityHelper;
618        assert!(array.validity().is_valid(0).unwrap());
619        assert!(array.validity().is_valid(1).unwrap());
620        assert!(!array.validity().is_valid(2).unwrap());
621
622        // Test wrong dtype error.
623        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
624        let wrong_scalar = Scalar::from(true);
625        assert!(builder.append_scalar(&wrong_scalar).is_err());
626    }
627}