vortex_array/builders/
primitive.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::mem::MaybeUninit;
6
7use vortex_buffer::BufferMut;
8use vortex_dtype::{DType, NativePType, Nullability};
9use vortex_error::{VortexResult, vortex_ensure};
10use vortex_mask::Mask;
11use vortex_scalar::{PrimitiveScalar, Scalar};
12
13use crate::arrays::PrimitiveArray;
14use crate::builders::{ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyNullBufferBuilder};
15use crate::canonical::{Canonical, ToCanonical};
16use crate::{Array, ArrayRef, IntoArray};
17
18/// The builder for building a [`PrimitiveArray`], parametrized by the `PType`.
19pub struct PrimitiveBuilder<T> {
20    dtype: DType,
21    values: BufferMut<T>,
22    nulls: LazyNullBufferBuilder,
23}
24
25impl<T: NativePType> PrimitiveBuilder<T> {
26    /// Creates a new `PrimitiveBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
27    pub fn new(nullability: Nullability) -> Self {
28        Self::with_capacity(nullability, DEFAULT_BUILDER_CAPACITY)
29    }
30
31    /// Creates a new `PrimitiveBuilder` with the given `capacity`.
32    pub fn with_capacity(nullability: Nullability, capacity: usize) -> Self {
33        Self {
34            values: BufferMut::with_capacity(capacity),
35            nulls: LazyNullBufferBuilder::new(capacity),
36            dtype: DType::Primitive(T::PTYPE, nullability),
37        }
38    }
39
40    /// Appends a primitive `value` to the builder.
41    pub fn append_value(&mut self, value: T) {
42        self.values.push(value);
43        self.nulls.append_non_null();
44    }
45
46    /// Returns the raw primitive values in this builder as a slice.
47    pub fn values(&self) -> &[T] {
48        self.values.as_ref()
49    }
50
51    /// Create a new handle to the next `len` uninitialized values in the builder.
52    ///
53    /// All reads/writes through the handle to the values buffer or the validity buffer will operate
54    /// on indices relative to the start of the range.
55    ///
56    /// # Panics
57    ///
58    /// Panics if `len` is 0 or if the current length of the builder plus `len` would exceed the
59    /// capacity of the builder's memory.
60    ///
61    /// ## Example
62    ///
63    /// ```
64    /// use std::mem::MaybeUninit;
65    /// use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder};
66    /// use vortex_dtype::Nullability;
67    ///
68    /// // Create a new builder.
69    /// let mut builder: PrimitiveBuilder<i32> =
70    ///     PrimitiveBuilder::with_capacity(Nullability::NonNullable, 5);
71    ///
72    /// // Populate the values.
73    /// let mut uninit_range = builder.uninit_range(5);
74    /// uninit_range.copy_from_slice(0, &[0, 1, 2, 3, 4]);
75    ///
76    /// // SAFETY: We have initialized all 5 values in the range, and since the array builder is
77    /// // non-nullable, we don't need to set any null bits.
78    /// unsafe { uninit_range.finish(); }
79    ///
80    /// let built = builder.finish_into_primitive();
81    ///
82    /// assert_eq!(built.as_slice::<i32>(), &[0i32, 1, 2, 3, 4]);
83    /// ```
84    pub fn uninit_range(&mut self, len: usize) -> UninitRange<'_, T> {
85        assert_ne!(0, len, "cannot create an uninit range of length 0");
86
87        let current_len = self.values.len();
88        assert!(
89            current_len + len <= self.values.capacity(),
90            "uninit_range of len {len} exceeds builder capacity {}",
91            self.values.capacity()
92        );
93
94        UninitRange { len, builder: self }
95    }
96
97    /// Finishes the builder directly into a [`PrimitiveArray`].
98    pub fn finish_into_primitive(&mut self) -> PrimitiveArray {
99        let validity = self
100            .nulls
101            .finish_with_nullability(self.dtype().nullability());
102
103        PrimitiveArray::new(std::mem::take(&mut self.values).freeze(), validity)
104    }
105
106    /// Extends the primitive array with an iterator.
107    pub fn extend_with_iterator(&mut self, iter: impl IntoIterator<Item = T>, mask: Mask) {
108        self.values.extend(iter);
109        self.nulls.append_validity_mask(mask);
110    }
111}
112
113impl<T: NativePType> ArrayBuilder for PrimitiveBuilder<T> {
114    fn as_any(&self) -> &dyn Any {
115        self
116    }
117
118    fn as_any_mut(&mut self) -> &mut dyn Any {
119        self
120    }
121
122    fn dtype(&self) -> &DType {
123        &self.dtype
124    }
125
126    fn len(&self) -> usize {
127        self.values.len()
128    }
129
130    fn append_zeros(&mut self, n: usize) {
131        self.values.push_n(T::default(), n);
132        self.nulls.append_n_non_nulls(n);
133    }
134
135    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
136        self.values.push_n(T::default(), n);
137        self.nulls.append_n_nulls(n);
138    }
139
140    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
141        vortex_ensure!(
142            scalar.dtype() == self.dtype(),
143            "PrimitiveBuilder expected scalar with dtype {:?}, got {:?}",
144            self.dtype(),
145            scalar.dtype()
146        );
147
148        let primitive_scalar = PrimitiveScalar::try_from(scalar)?;
149        match primitive_scalar.pvalue() {
150            Some(pv) => self.append_value(pv.as_primitive::<T>()),
151            None => self.append_null(),
152        }
153
154        Ok(())
155    }
156
157    unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
158        let array = array.to_primitive();
159
160        // This should be checked in `extend_from_array` but we can check it again.
161        debug_assert_eq!(
162            array.ptype(),
163            T::PTYPE,
164            "Cannot extend from array with different ptype"
165        );
166
167        self.values.extend_from_slice(array.as_slice::<T>());
168        self.nulls.append_validity_mask(array.validity_mask());
169    }
170
171    fn ensure_capacity(&mut self, capacity: usize) {
172        if capacity > self.values.capacity() {
173            self.values.reserve(capacity - self.values.len());
174            self.nulls.ensure_capacity(capacity);
175        }
176    }
177
178    fn set_validity(&mut self, validity: Mask) {
179        self.nulls = LazyNullBufferBuilder::new(validity.len());
180        self.nulls.append_validity_mask(validity);
181    }
182
183    fn finish(&mut self) -> ArrayRef {
184        self.finish_into_primitive().into_array()
185    }
186
187    fn finish_into_canonical(&mut self) -> Canonical {
188        Canonical::Primitive(self.finish_into_primitive())
189    }
190}
191
192/// A range of uninitialized values in the primitive builder that can be filled.
193pub struct UninitRange<'a, T> {
194    /// The length of the uninitialized range.
195    ///
196    /// This is guaranteed to be within the memory capacity of the builder.
197    len: usize,
198
199    /// A mutable reference to the builder.
200    ///
201    /// Since this is a mutable reference, we can guarantee that nothing else can modify the builder
202    /// while this `UninitRange` exists.
203    builder: &'a mut PrimitiveBuilder<T>,
204}
205
206impl<T> UninitRange<'_, T> {
207    /// Returns the length of this uninitialized range.
208    #[inline]
209    pub fn len(&self) -> usize {
210        self.len
211    }
212
213    /// Returns true if this range has zero length.
214    #[inline]
215    pub fn is_empty(&self) -> bool {
216        self.len == 0
217    }
218
219    /// Set a value at the given index within this range.
220    ///
221    /// # Panics
222    ///
223    /// Panics if the index is out of bounds.
224    #[inline]
225    pub fn set_value(&mut self, index: usize, value: T) {
226        assert!(index < self.len, "index out of bounds");
227        let spare = self.builder.values.spare_capacity_mut();
228        spare[index] = MaybeUninit::new(value);
229    }
230
231    /// Append a [`Mask`] to this builder's null buffer.
232    ///
233    /// # Panics
234    ///
235    /// Panics if the mask length is not equal to the the length of the current `UninitRange`.
236    ///
237    /// # Safety
238    ///
239    /// - The caller must ensure that they safely initialize `mask.len()` primitive values via
240    ///   [`UninitRange::copy_from_slice`].
241    /// - The caller must also ensure that they only call this method once.
242    pub unsafe fn append_mask(&mut self, mask: Mask) {
243        assert_eq!(
244            mask.len(),
245            self.len,
246            "Tried to append a mask to an `UninitRange` that was beyond the allowed range"
247        );
248
249        // TODO(connor): Ideally, we would call this function `set_mask` and directly set all of the
250        // bits (so that we can call this multiple times), but the underlying `BooleanBuffer` does
251        // not have an easy way to do this correctly.
252
253        self.builder.nulls.append_validity_mask(mask);
254    }
255
256    /// Set a validity bit at the given index.
257    ///
258    /// The index is relative to the start of this range (not relative to the values already in the
259    /// builder).
260    ///
261    /// Note that this will have no effect if the builder is non-nullable.
262    pub fn set_bit(&mut self, index: usize, v: bool) {
263        assert!(index < self.len, "set_bit index out of bounds");
264        // Note that this won't panic because we can only create an `UninitRange` within the
265        // capacity of the builder (it will not automatically resize).
266        let absolute_index = self.builder.values.len() + index;
267        self.builder.nulls.set_bit(absolute_index, v);
268    }
269
270    /// Set values from an initialized range.
271    ///
272    /// Note that the input `offset` should be an offset relative to the local `UninitRange`, not
273    /// the entire `PrimitiveBuilder`.
274    pub fn copy_from_slice(&mut self, local_offset: usize, src: &[T])
275    where
276        T: Copy,
277    {
278        debug_assert!(
279            local_offset + src.len() <= self.len,
280            "tried to copy a slice into a `UninitRange` past its boundary"
281        );
282
283        // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout.
284        let uninit_src: &[MaybeUninit<T>] = unsafe { std::mem::transmute(src) };
285
286        // Note: spare_capacity_mut() returns the spare capacity starting from the current length,
287        // so we just use local_offset directly.
288        let dst =
289            &mut self.builder.values.spare_capacity_mut()[local_offset..local_offset + src.len()];
290        dst.copy_from_slice(uninit_src);
291    }
292
293    /// Get a mutable slice of uninitialized memory at the specified offset within this range.
294    ///
295    /// Note that the offsets are relative to this local range, not to the values already in the
296    /// builder.
297    ///
298    /// # Safety
299    ///
300    /// The caller must ensure that they properly initialize the returned memory before calling
301    /// `finish()` on this range.
302    ///
303    /// # Panics
304    ///
305    /// Panics if `offset + len` exceeds the range bounds.
306    pub unsafe fn slice_uninit_mut(&mut self, offset: usize, len: usize) -> &mut [MaybeUninit<T>] {
307        assert!(
308            offset + len <= self.len,
309            "slice_uninit_mut: offset {} + len {} exceeds range length {}",
310            offset,
311            len,
312            self.len
313        );
314        &mut self.builder.values.spare_capacity_mut()[offset..offset + len]
315    }
316
317    /// Finish building this range, marking it as initialized and advancing the length of the
318    /// underlying values buffer.
319    ///
320    /// # Safety
321    ///
322    /// The caller must ensure that they have safely initialized all `len` values via
323    /// [`UninitRange::copy_from_slice`] as well as correctly set all of the null bits via
324    /// [`set_bit`] or [`append_mask`] if the builder is nullable.
325    ///
326    /// [`set_bit`]: UninitRange::set_bit
327    /// [`append_mask`]: UninitRange::append_mask
328    pub unsafe fn finish(self) {
329        // SAFETY: constructor enforces that current length + len does not exceed the capacity of the array.
330        let new_len = self.builder.values.len() + self.len;
331        unsafe { self.builder.values.set_len(new_len) };
332    }
333}
334
335#[cfg(test)]
336mod tests {
337    use super::*;
338
339    /// REGRESSION TEST: This test verifies that multiple sequential ranges have correct offsets.
340    ///
341    /// This would have caught the `Deref` bug where it always returned from the start of the
342    /// buffer.
343    #[test]
344    fn test_multiple_uninit_ranges_correct_offsets() {
345        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
346
347        // First range.
348        let mut range1 = builder.uninit_range(3);
349        range1.copy_from_slice(0, &[1, 2, 3]);
350
351        // SAFETY: We initialized all 3 values.
352        unsafe {
353            range1.finish();
354        }
355
356        // Verify the builder now has these values.
357        assert_eq!(builder.values(), &[1, 2, 3]);
358
359        // Second range - this would fail with the old Deref implementation.
360        let mut range2 = builder.uninit_range(2);
361
362        // Set values using copy_from_slice.
363        range2.copy_from_slice(0, &[4, 5]);
364
365        // SAFETY: We initialized both values.
366        unsafe {
367            range2.finish();
368        }
369
370        // Verify the builder now has all 5 values.
371        assert_eq!(builder.values(), &[1, 2, 3, 4, 5]);
372
373        let array = builder.finish_into_primitive();
374        assert_eq!(array.as_slice::<i32>(), &[1, 2, 3, 4, 5]);
375    }
376
377    /// REGRESSION TEST: This test verifies that `append_mask` was correctly moved from
378    /// `PrimitiveBuilder` to `UninitRange`.
379    ///
380    /// The old API had `append_mask` on the builder, which was confusing when used with ranges.
381    /// This test ensures the new API works correctly.
382    #[test]
383    fn test_append_mask_on_uninit_range() {
384        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 5);
385        let mut range = builder.uninit_range(3);
386
387        // Create a mask for 3 values.
388        let mask = Mask::from_iter([true, false, true]);
389
390        // SAFETY: We're about to initialize the values.
391        unsafe {
392            range.append_mask(mask);
393        }
394
395        // Initialize the values.
396        range.copy_from_slice(0, &[10, 20, 30]);
397
398        // SAFETY: We've initialized all values and set the mask.
399        unsafe {
400            range.finish();
401        }
402
403        let array = builder.finish_into_primitive();
404        assert_eq!(array.len(), 3);
405        // Check validity using scalar_at - nulls will return is_null() = true.
406        assert!(!array.scalar_at(0).is_null());
407        assert!(array.scalar_at(1).is_null());
408        assert!(!array.scalar_at(2).is_null());
409    }
410
411    /// REGRESSION TEST: This test verifies that `append_mask` validates the mask length.
412    ///
413    /// This ensures that masks can only be appended if they match the range length.
414    #[test]
415    #[should_panic(
416        expected = "Tried to append a mask to an `UninitRange` that was beyond the allowed range"
417    )]
418    fn test_append_mask_wrong_length_panics() {
419        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
420        let mut range = builder.uninit_range(5);
421
422        // Try to append a mask with wrong length (3 instead of 5).
423        let wrong_mask = Mask::from_iter([true, false, true]);
424
425        // SAFETY: This is expected to panic due to length mismatch.
426        unsafe {
427            range.append_mask(wrong_mask);
428        }
429    }
430
431    /// Test that `copy_from_slice` works correctly with different offsets.
432    ///
433    /// This verifies the new simplified API without the redundant `len` parameter.
434    #[test]
435    fn test_copy_from_slice_with_offsets() {
436        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
437        let mut range = builder.uninit_range(6);
438
439        // Copy to different offsets.
440        range.copy_from_slice(0, &[1, 2]);
441        range.copy_from_slice(2, &[3, 4]);
442        range.copy_from_slice(4, &[5, 6]);
443
444        // SAFETY: We've initialized all 6 values.
445        unsafe {
446            range.finish();
447        }
448
449        let array = builder.finish_into_primitive();
450        assert_eq!(array.as_slice::<i32>(), &[1, 2, 3, 4, 5, 6]);
451    }
452
453    /// Test that `set_bit` uses relative indexing within the range.
454    ///
455    /// Note: `set_bit` requires the null buffer to already be initialized, so we first
456    /// use `append_mask` to set up the buffer, then demonstrate that `set_bit` can
457    /// modify individual bits with relative indexing.
458    #[test]
459    fn test_set_bit_relative_indexing() {
460        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
461
462        // First add some values to the builder.
463        builder.append_value(100);
464        builder.append_value(200);
465
466        // Create a range for new values.
467        let mut range = builder.uninit_range(3);
468
469        // Use append_mask to initialize the validity buffer for this range.
470        let initial_mask = Mask::from_iter([false, false, false]);
471        // SAFETY: We're about to initialize the values.
472        unsafe {
473            range.append_mask(initial_mask);
474        }
475
476        // Now we can use set_bit to modify individual bits with relative indexing.
477        range.set_bit(0, true); // Change first bit to valid
478        range.set_bit(2, true); // Change third bit to valid
479        // Leave middle bit as false (null)
480
481        // Initialize the values.
482        range.copy_from_slice(0, &[10, 20, 30]);
483
484        // SAFETY: We've initialized all 3 values and set their validity.
485        unsafe {
486            range.finish();
487        }
488
489        let array = builder.finish_into_primitive();
490
491        // Verify the total length and values.
492        assert_eq!(array.len(), 5);
493        assert_eq!(array.as_slice::<i32>(), &[100, 200, 10, 20, 30]);
494
495        // Check validity - the first two should be valid (from append_value).
496        assert!(!array.scalar_at(0).is_null()); // initial value 100
497        assert!(!array.scalar_at(1).is_null()); // initial value 200
498
499        // Check the range items with modified validity.
500        assert!(!array.scalar_at(2).is_null()); // range index 0 - set to valid
501        assert!(array.scalar_at(3).is_null()); // range index 1 - left as null
502        assert!(!array.scalar_at(4).is_null()); // range index 2 - set to valid
503    }
504
505    /// Test that creating a zero-length uninit range panics.
506    #[test]
507    #[should_panic(expected = "cannot create an uninit range of length 0")]
508    fn test_zero_length_uninit_range_panics() {
509        let mut builder = PrimitiveBuilder::<i32>::new(Nullability::NonNullable);
510        let _range = builder.uninit_range(0);
511    }
512
513    /// Test that creating an uninit range exceeding capacity panics.
514    #[test]
515    #[should_panic(expected = "uninit_range of len 10 exceeds builder capacity")]
516    fn test_uninit_range_exceeds_capacity_panics() {
517        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 5);
518        let _range = builder.uninit_range(10);
519    }
520
521    /// Test that `copy_from_slice` debug asserts on out-of-bounds access.
522    ///
523    /// Note: This only panics in debug mode due to `debug_assert!`.
524    #[test]
525    #[cfg(debug_assertions)]
526    #[should_panic(expected = "tried to copy a slice into a `UninitRange` past its boundary")]
527    fn test_copy_from_slice_out_of_bounds() {
528        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
529        let mut range = builder.uninit_range(3);
530
531        // Try to copy 3 elements starting at offset 1 (would need 4 slots total).
532        range.copy_from_slice(1, &[1, 2, 3]);
533    }
534
535    /// Test that the unsafe contract of `finish` is documented and works correctly.
536    ///
537    /// This test demonstrates proper usage of the unsafe `finish` method.
538    #[test]
539    fn test_finish_unsafe_contract() {
540        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 5);
541        let mut range = builder.uninit_range(3);
542
543        // Set validity mask.
544        let mask = Mask::from_iter([true, true, false]);
545        // SAFETY: We're about to initialize the matching number of values.
546        unsafe {
547            range.append_mask(mask);
548        }
549
550        // Initialize all values.
551        range.copy_from_slice(0, &[10, 20, 30]);
552
553        // SAFETY: We have initialized all 3 values and set their validity.
554        unsafe {
555            range.finish();
556        }
557
558        let array = builder.finish_into_primitive();
559        assert_eq!(array.len(), 3);
560        assert_eq!(array.as_slice::<i32>(), &[10, 20, 30]);
561    }
562
563    #[test]
564    fn test_append_scalar() {
565        use vortex_dtype::DType;
566        use vortex_scalar::Scalar;
567
568        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
569
570        // Test appending a valid primitive value.
571        let scalar1 = Scalar::primitive(42i32, Nullability::Nullable);
572        builder.append_scalar(&scalar1).unwrap();
573
574        // Test appending another value.
575        let scalar2 = Scalar::primitive(84i32, Nullability::Nullable);
576        builder.append_scalar(&scalar2).unwrap();
577
578        // Test appending null value.
579        let null_scalar = Scalar::null(DType::Primitive(
580            vortex_dtype::PType::I32,
581            Nullability::Nullable,
582        ));
583        builder.append_scalar(&null_scalar).unwrap();
584
585        let array = builder.finish_into_primitive();
586        assert_eq!(array.len(), 3);
587
588        // Check actual values.
589        let values = array.as_slice::<i32>();
590        assert_eq!(values[0], 42);
591        assert_eq!(values[1], 84);
592        // values[2] might be any value since it's null.
593
594        // Check validity - first two should be valid, third should be null.
595        use crate::vtable::ValidityHelper;
596        assert!(array.validity().is_valid(0));
597        assert!(array.validity().is_valid(1));
598        assert!(!array.validity().is_valid(2));
599
600        // Test wrong dtype error.
601        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
602        let wrong_scalar = Scalar::from(true);
603        assert!(builder.append_scalar(&wrong_scalar).is_err());
604    }
605}