Skip to main content

vortex_array/builders/
primitive.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::mem::MaybeUninit;
6
7use vortex_buffer::BufferMut;
8use vortex_dtype::DType;
9use vortex_dtype::NativePType;
10use vortex_dtype::Nullability;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_ensure;
14use vortex_mask::Mask;
15
16use crate::Array;
17use crate::ArrayRef;
18use crate::IntoArray;
19use crate::arrays::PrimitiveArray;
20use crate::builders::ArrayBuilder;
21use crate::builders::DEFAULT_BUILDER_CAPACITY;
22use crate::builders::LazyBitBufferBuilder;
23use crate::canonical::Canonical;
24use crate::canonical::ToCanonical;
25use crate::scalar::Scalar;
26
27/// The builder for building a [`PrimitiveArray`], parametrized by the `PType`.
28pub struct PrimitiveBuilder<T> {
29    dtype: DType,
30    values: BufferMut<T>,
31    nulls: LazyBitBufferBuilder,
32}
33
34impl<T: NativePType> PrimitiveBuilder<T> {
35    /// Creates a new `PrimitiveBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
36    pub fn new(nullability: Nullability) -> Self {
37        Self::with_capacity(nullability, DEFAULT_BUILDER_CAPACITY)
38    }
39
40    /// Creates a new `PrimitiveBuilder` with the given `capacity`.
41    pub fn with_capacity(nullability: Nullability, capacity: usize) -> Self {
42        Self {
43            values: BufferMut::with_capacity(capacity),
44            nulls: LazyBitBufferBuilder::new(capacity),
45            dtype: DType::Primitive(T::PTYPE, nullability),
46        }
47    }
48
49    /// Appends a primitive `value` to the builder.
50    pub fn append_value(&mut self, value: T) {
51        self.values.push(value);
52        self.nulls.append_non_null();
53    }
54
55    /// Returns the raw primitive values in this builder as a slice.
56    pub fn values(&self) -> &[T] {
57        self.values.as_ref()
58    }
59
60    /// Create a new handle to the next `len` uninitialized values in the builder.
61    ///
62    /// All reads/writes through the handle to the values buffer or the validity buffer will operate
63    /// on indices relative to the start of the range.
64    ///
65    /// # Panics
66    ///
67    /// Panics if `len` is 0 or if the current length of the builder plus `len` would exceed the
68    /// capacity of the builder's memory.
69    ///
70    /// ## Example
71    ///
72    /// ```
73    /// use std::mem::MaybeUninit;
74    /// use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder};
75    /// use vortex_dtype::Nullability;
76    ///
77    /// // Create a new builder.
78    /// let mut builder: PrimitiveBuilder<i32> =
79    ///     PrimitiveBuilder::with_capacity(Nullability::NonNullable, 5);
80    ///
81    /// // Populate the values.
82    /// let mut uninit_range = builder.uninit_range(5);
83    /// uninit_range.copy_from_slice(0, &[0, 1, 2, 3, 4]);
84    ///
85    /// // SAFETY: We have initialized all 5 values in the range, and since the array builder is
86    /// // non-nullable, we don't need to set any null bits.
87    /// unsafe { uninit_range.finish(); }
88    ///
89    /// let built = builder.finish_into_primitive();
90    ///
91    /// assert_eq!(built.as_slice::<i32>(), &[0i32, 1, 2, 3, 4]);
92    /// ```
93    pub fn uninit_range(&mut self, len: usize) -> UninitRange<'_, T> {
94        assert_ne!(0, len, "cannot create an uninit range of length 0");
95
96        let current_len = self.values.len();
97        assert!(
98            current_len + len <= self.values.capacity(),
99            "uninit_range of len {len} exceeds builder with length {} and capacity {}",
100            current_len,
101            self.values.capacity()
102        );
103
104        UninitRange { len, builder: self }
105    }
106
107    /// Finishes the builder directly into a [`PrimitiveArray`].
108    pub fn finish_into_primitive(&mut self) -> PrimitiveArray {
109        let validity = self
110            .nulls
111            .finish_with_nullability(self.dtype().nullability());
112
113        PrimitiveArray::new(std::mem::take(&mut self.values).freeze(), validity)
114    }
115
116    /// Extends the primitive array with an iterator.
117    pub fn extend_with_iterator(&mut self, iter: impl IntoIterator<Item = T>, mask: Mask) {
118        self.values.extend(iter);
119        self.nulls.append_validity_mask(mask);
120    }
121}
122
123impl<T: NativePType> ArrayBuilder for PrimitiveBuilder<T> {
124    fn as_any(&self) -> &dyn Any {
125        self
126    }
127
128    fn as_any_mut(&mut self) -> &mut dyn Any {
129        self
130    }
131
132    fn dtype(&self) -> &DType {
133        &self.dtype
134    }
135
136    fn len(&self) -> usize {
137        self.values.len()
138    }
139
140    fn append_zeros(&mut self, n: usize) {
141        self.values.push_n(T::default(), n);
142        self.nulls.append_n_non_nulls(n);
143    }
144
145    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
146        self.values.push_n(T::default(), n);
147        self.nulls.append_n_nulls(n);
148    }
149
150    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
151        vortex_ensure!(
152            scalar.dtype() == self.dtype(),
153            "PrimitiveBuilder expected scalar with dtype {}, got {}",
154            self.dtype(),
155            scalar.dtype()
156        );
157
158        if let Some(pv) = scalar.as_primitive().pvalue() {
159            self.append_value(pv.cast::<T>()?)
160        } else {
161            self.append_null()
162        }
163
164        Ok(())
165    }
166
167    unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
168        let array = array.to_primitive();
169
170        // This should be checked in `extend_from_array` but we can check it again.
171        debug_assert_eq!(
172            array.ptype(),
173            T::PTYPE,
174            "Cannot extend from array with different ptype"
175        );
176
177        self.values.extend_from_slice(array.as_slice::<T>());
178        self.nulls.append_validity_mask(
179            array
180                .validity_mask()
181                .vortex_expect("validity_mask in extend_from_array_unchecked"),
182        );
183    }
184
185    fn reserve_exact(&mut self, additional: usize) {
186        self.values.reserve(additional);
187        self.nulls.reserve_exact(additional);
188    }
189
190    unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
191        self.nulls = LazyBitBufferBuilder::new(validity.len());
192        self.nulls.append_validity_mask(validity);
193    }
194
195    fn finish(&mut self) -> ArrayRef {
196        self.finish_into_primitive().into_array()
197    }
198
199    fn finish_into_canonical(&mut self) -> Canonical {
200        Canonical::Primitive(self.finish_into_primitive())
201    }
202}
203
204/// A range of uninitialized values in the primitive builder that can be filled.
205pub struct UninitRange<'a, T> {
206    /// The length of the uninitialized range.
207    ///
208    /// This is guaranteed to be within the memory capacity of the builder.
209    len: usize,
210
211    /// A mutable reference to the builder.
212    ///
213    /// Since this is a mutable reference, we can guarantee that nothing else can modify the builder
214    /// while this `UninitRange` exists.
215    builder: &'a mut PrimitiveBuilder<T>,
216}
217
218impl<T> UninitRange<'_, T> {
219    /// Returns the length of this uninitialized range.
220    #[inline]
221    pub fn len(&self) -> usize {
222        self.len
223    }
224
225    /// Returns true if this range has zero length.
226    #[inline]
227    pub fn is_empty(&self) -> bool {
228        self.len == 0
229    }
230
231    /// Set a value at the given index within this range.
232    ///
233    /// # Panics
234    ///
235    /// Panics if the index is out of bounds.
236    #[inline]
237    pub fn set_value(&mut self, index: usize, value: T) {
238        assert!(index < self.len, "index out of bounds");
239        let spare = self.builder.values.spare_capacity_mut();
240        spare[index] = MaybeUninit::new(value);
241    }
242
243    /// Append a [`Mask`] to this builder's null buffer.
244    ///
245    /// # Panics
246    ///
247    /// Panics if the mask length is not equal to the the length of the current `UninitRange`.
248    ///
249    /// # Safety
250    ///
251    /// - The caller must ensure that they safely initialize `mask.len()` primitive values via
252    ///   [`UninitRange::copy_from_slice`].
253    /// - The caller must also ensure that they only call this method once.
254    pub unsafe fn append_mask(&mut self, mask: Mask) {
255        assert_eq!(
256            mask.len(),
257            self.len,
258            "Tried to append a mask to an `UninitRange` that was beyond the allowed range"
259        );
260
261        // TODO(connor): Ideally, we would call this function `set_mask` and directly set all of the
262        // bits (so that we can call this multiple times), but the underlying `BooleanBuffer` does
263        // not have an easy way to do this correctly.
264
265        self.builder.nulls.append_validity_mask(mask);
266    }
267
268    /// Set a validity bit at the given index.
269    ///
270    /// The index is relative to the start of this range (not relative to the values already in the
271    /// builder).
272    ///
273    /// Note that this will have no effect if the builder is non-nullable.
274    pub fn set_validity_bit(&mut self, index: usize, v: bool) {
275        assert!(index < self.len, "set_bit index out of bounds");
276        // Note that this won't panic because we can only create an `UninitRange` within the
277        // capacity of the builder (it will not automatically resize).
278        let absolute_index = self.builder.values.len() + index;
279        self.builder.nulls.set_bit(absolute_index, v);
280    }
281
282    /// Set values from an initialized range.
283    ///
284    /// Note that the input `offset` should be an offset relative to the local `UninitRange`, not
285    /// the entire `PrimitiveBuilder`.
286    pub fn copy_from_slice(&mut self, local_offset: usize, src: &[T])
287    where
288        T: Copy,
289    {
290        debug_assert!(
291            local_offset + src.len() <= self.len,
292            "tried to copy a slice into a `UninitRange` past its boundary"
293        );
294
295        // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout.
296        let uninit_src: &[MaybeUninit<T>] = unsafe { std::mem::transmute(src) };
297
298        // Note: spare_capacity_mut() returns the spare capacity starting from the current length,
299        // so we just use local_offset directly.
300        let dst =
301            &mut self.builder.values.spare_capacity_mut()[local_offset..local_offset + src.len()];
302        dst.copy_from_slice(uninit_src);
303    }
304
305    /// Get a mutable slice of uninitialized memory at the specified offset within this range.
306    ///
307    /// Note that the offsets are relative to this local range, not to the values already in the
308    /// builder.
309    ///
310    /// # Safety
311    ///
312    /// The caller must ensure that they properly initialize the returned memory before calling
313    /// `finish()` on this range.
314    ///
315    /// # Panics
316    ///
317    /// Panics if `offset + len` exceeds the range bounds.
318    pub unsafe fn slice_uninit_mut(&mut self, offset: usize, len: usize) -> &mut [MaybeUninit<T>] {
319        assert!(
320            offset + len <= self.len,
321            "slice_uninit_mut: offset {} + len {} exceeds range length {}",
322            offset,
323            len,
324            self.len
325        );
326        &mut self.builder.values.spare_capacity_mut()[offset..offset + len]
327    }
328
329    /// Finish building this range, marking it as initialized and advancing the length of the
330    /// underlying values buffer.
331    ///
332    /// # Safety
333    ///
334    /// The caller must ensure that they have safely initialized all `len` values via
335    /// [`copy_from_slice()`] or [`set_value()`], as well as correctly set all of the null bits via
336    /// [`set_validity_bit()`] or [`append_mask()`] if the builder is nullable.
337    ///
338    /// [`copy_from_slice()`]: UninitRange::copy_from_slice
339    /// [`set_value()`]: UninitRange::set_value
340    /// [`set_validity_bit()`]: UninitRange::set_validity_bit
341    /// [`append_mask()`]: UninitRange::append_mask
342    pub unsafe fn finish(self) {
343        // SAFETY: constructor enforces that current length + len does not exceed the capacity of the array.
344        let new_len = self.builder.values.len() + self.len;
345        unsafe { self.builder.values.set_len(new_len) };
346    }
347}
348
349#[cfg(test)]
350mod tests {
351    use super::*;
352    use crate::assert_arrays_eq;
353
354    /// REGRESSION TEST: This test verifies that multiple sequential ranges have correct offsets.
355    ///
356    /// This would have caught the `Deref` bug where it always returned from the start of the
357    /// buffer.
358    #[test]
359    fn test_multiple_uninit_ranges_correct_offsets() {
360        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
361
362        // First range.
363        let mut range1 = builder.uninit_range(3);
364        range1.copy_from_slice(0, &[1, 2, 3]);
365
366        // SAFETY: We initialized all 3 values.
367        unsafe {
368            range1.finish();
369        }
370
371        // Verify the builder now has these values.
372        assert_eq!(builder.values(), &[1, 2, 3]);
373
374        // Second range - this would fail with the old Deref implementation.
375        let mut range2 = builder.uninit_range(2);
376
377        // Set values using copy_from_slice.
378        range2.copy_from_slice(0, &[4, 5]);
379
380        // SAFETY: We initialized both values.
381        unsafe {
382            range2.finish();
383        }
384
385        // Verify the builder now has all 5 values.
386        assert_eq!(builder.values(), &[1, 2, 3, 4, 5]);
387
388        let array = builder.finish_into_primitive();
389        assert_arrays_eq!(array, PrimitiveArray::from_iter([1i32, 2, 3, 4, 5]));
390    }
391
392    /// REGRESSION TEST: This test verifies that `append_mask` was correctly moved from
393    /// `PrimitiveBuilder` to `UninitRange`.
394    ///
395    /// The old API had `append_mask` on the builder, which was confusing when used with ranges.
396    /// This test ensures the new API works correctly.
397    #[test]
398    fn test_append_mask_on_uninit_range() {
399        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 5);
400        let mut range = builder.uninit_range(3);
401
402        // Create a mask for 3 values.
403        let mask = Mask::from_iter([true, false, true]);
404
405        // SAFETY: We're about to initialize the values.
406        unsafe {
407            range.append_mask(mask);
408        }
409
410        // Initialize the values.
411        range.copy_from_slice(0, &[10, 20, 30]);
412
413        // SAFETY: We've initialized all values and set the mask.
414        unsafe {
415            range.finish();
416        }
417
418        let array = builder.finish_into_primitive();
419        assert_eq!(array.len(), 3);
420        // Check validity using scalar_at - nulls will return is_null() = true.
421        assert!(!array.scalar_at(0).unwrap().is_null());
422        assert!(array.scalar_at(1).unwrap().is_null());
423        assert!(!array.scalar_at(2).unwrap().is_null());
424    }
425
426    /// REGRESSION TEST: This test verifies that `append_mask` validates the mask length.
427    ///
428    /// This ensures that masks can only be appended if they match the range length.
429    #[test]
430    #[should_panic(
431        expected = "Tried to append a mask to an `UninitRange` that was beyond the allowed range"
432    )]
433    fn test_append_mask_wrong_length_panics() {
434        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
435        let mut range = builder.uninit_range(5);
436
437        // Try to append a mask with wrong length (3 instead of 5).
438        let wrong_mask = Mask::from_iter([true, false, true]);
439
440        // SAFETY: This is expected to panic due to length mismatch.
441        unsafe {
442            range.append_mask(wrong_mask);
443        }
444    }
445
446    /// Test that `copy_from_slice` works correctly with different offsets.
447    ///
448    /// This verifies the new simplified API without the redundant `len` parameter.
449    #[test]
450    fn test_copy_from_slice_with_offsets() {
451        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
452        let mut range = builder.uninit_range(6);
453
454        // Copy to different offsets.
455        range.copy_from_slice(0, &[1, 2]);
456        range.copy_from_slice(2, &[3, 4]);
457        range.copy_from_slice(4, &[5, 6]);
458
459        // SAFETY: We've initialized all 6 values.
460        unsafe {
461            range.finish();
462        }
463
464        let array = builder.finish_into_primitive();
465        assert_arrays_eq!(array, PrimitiveArray::from_iter([1i32, 2, 3, 4, 5, 6]));
466    }
467
468    /// Test that `set_bit` uses relative indexing within the range.
469    ///
470    /// Note: `set_bit` requires the null buffer to already be initialized, so we first
471    /// use `append_mask` to set up the buffer, then demonstrate that `set_bit` can
472    /// modify individual bits with relative indexing.
473    #[test]
474    fn test_set_bit_relative_indexing() {
475        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
476
477        // First add some values to the builder.
478        builder.append_value(100);
479        builder.append_value(200);
480
481        // Create a range for new values.
482        let mut range = builder.uninit_range(3);
483
484        // Use append_mask to initialize the validity buffer for this range.
485        let initial_mask = Mask::from_iter([false, false, false]);
486        // SAFETY: We're about to initialize the values.
487        unsafe {
488            range.append_mask(initial_mask);
489        }
490
491        // Now we can use set_bit to modify individual bits with relative indexing.
492        range.set_validity_bit(0, true); // Change first bit to valid
493        range.set_validity_bit(2, true); // Change third bit to valid
494        // Leave middle bit as false (null)
495
496        // Initialize the values.
497        range.copy_from_slice(0, &[10, 20, 30]);
498
499        // SAFETY: We've initialized all 3 values and set their validity.
500        unsafe {
501            range.finish();
502        }
503
504        let array = builder.finish_into_primitive();
505
506        // Verify the total length and values.
507        assert_eq!(array.len(), 5);
508        assert_eq!(array.as_slice::<i32>(), &[100, 200, 10, 20, 30]);
509
510        // Check validity - the first two should be valid (from append_value).
511        assert!(!array.scalar_at(0).unwrap().is_null()); // initial value 100
512        assert!(!array.scalar_at(1).unwrap().is_null()); // initial value 200
513
514        // Check the range items with modified validity.
515        assert!(!array.scalar_at(2).unwrap().is_null()); // range index 0 - set to valid
516        assert!(array.scalar_at(3).unwrap().is_null()); // range index 1 - left as null
517        assert!(!array.scalar_at(4).unwrap().is_null()); // range index 2 - set to valid
518    }
519
520    /// Test that creating a zero-length uninit range panics.
521    #[test]
522    #[should_panic(expected = "cannot create an uninit range of length 0")]
523    fn test_zero_length_uninit_range_panics() {
524        let mut builder = PrimitiveBuilder::<i32>::new(Nullability::NonNullable);
525        let _range = builder.uninit_range(0);
526    }
527
528    /// Test that creating an uninit range exceeding capacity panics.
529    #[test]
530    #[should_panic(
531        expected = "uninit_range of len 10 exceeds builder with length 0 and capacity 6"
532    )]
533    fn test_uninit_range_exceeds_capacity_panics() {
534        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 5);
535        let _range = builder.uninit_range(10);
536    }
537
538    /// Test that `copy_from_slice` debug asserts on out-of-bounds access.
539    ///
540    /// Note: This only panics in debug mode due to `debug_assert!`.
541    #[test]
542    #[cfg(debug_assertions)]
543    #[should_panic(expected = "tried to copy a slice into a `UninitRange` past its boundary")]
544    fn test_copy_from_slice_out_of_bounds() {
545        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
546        let mut range = builder.uninit_range(3);
547
548        // Try to copy 3 elements starting at offset 1 (would need 4 slots total).
549        range.copy_from_slice(1, &[1, 2, 3]);
550    }
551
552    /// Test that the unsafe contract of `finish` is documented and works correctly.
553    ///
554    /// This test demonstrates proper usage of the unsafe `finish` method.
555    #[test]
556    fn test_finish_unsafe_contract() {
557        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 5);
558        let mut range = builder.uninit_range(3);
559
560        // Set validity mask.
561        let mask = Mask::from_iter([true, true, false]);
562        // SAFETY: We're about to initialize the matching number of values.
563        unsafe {
564            range.append_mask(mask);
565        }
566
567        // Initialize all values.
568        range.copy_from_slice(0, &[10, 20, 30]);
569
570        // SAFETY: We have initialized all 3 values and set their validity.
571        unsafe {
572            range.finish();
573        }
574
575        let array = builder.finish_into_primitive();
576        assert_eq!(array.len(), 3);
577        assert_eq!(array.as_slice::<i32>(), &[10, 20, 30]);
578    }
579
580    #[test]
581    fn test_append_scalar() {
582        use vortex_dtype::DType;
583
584        use crate::scalar::Scalar;
585
586        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
587
588        // Test appending a valid primitive value.
589        let scalar1 = Scalar::primitive(42i32, Nullability::Nullable);
590        builder.append_scalar(&scalar1).unwrap();
591
592        // Test appending another value.
593        let scalar2 = Scalar::primitive(84i32, Nullability::Nullable);
594        builder.append_scalar(&scalar2).unwrap();
595
596        // Test appending null value.
597        let null_scalar = Scalar::null(DType::Primitive(
598            vortex_dtype::PType::I32,
599            Nullability::Nullable,
600        ));
601        builder.append_scalar(&null_scalar).unwrap();
602
603        let array = builder.finish_into_primitive();
604        assert_eq!(array.len(), 3);
605
606        // Check actual values.
607        let values = array.as_slice::<i32>();
608        assert_eq!(values[0], 42);
609        assert_eq!(values[1], 84);
610        // values[2] might be any value since it's null.
611
612        // Check validity - first two should be valid, third should be null.
613        use crate::vtable::ValidityHelper;
614        assert!(array.validity().is_valid(0).unwrap());
615        assert!(array.validity().is_valid(1).unwrap());
616        assert!(!array.validity().is_valid(2).unwrap());
617
618        // Test wrong dtype error.
619        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
620        let wrong_scalar = Scalar::from(true);
621        assert!(builder.append_scalar(&wrong_scalar).is_err());
622    }
623}