vortex_array/builders/primitive.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::mem::MaybeUninit;
6
7use vortex_buffer::BufferMut;
8use vortex_dtype::{DType, NativePType, Nullability};
9use vortex_mask::Mask;
10
11use crate::arrays::PrimitiveArray;
12use crate::builders::{ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyNullBufferBuilder};
13use crate::canonical::{Canonical, ToCanonical};
14use crate::{Array, ArrayRef, IntoArray};
15
16/// The builder for building a [`PrimitiveArray`], parametrized by the `PType`.
17pub struct PrimitiveBuilder<T> {
18    dtype: DType,
19    values: BufferMut<T>,
20    nulls: LazyNullBufferBuilder,
21}
22
23impl<T: NativePType> PrimitiveBuilder<T> {
24    /// Creates a new `PrimitiveBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
25    pub fn new(nullability: Nullability) -> Self {
26        Self::with_capacity(nullability, DEFAULT_BUILDER_CAPACITY)
27    }
28
29    /// Creates a new `PrimitiveBuilder` with the given `capacity`.
30    pub fn with_capacity(nullability: Nullability, capacity: usize) -> Self {
31        Self {
32            values: BufferMut::with_capacity(capacity),
33            nulls: LazyNullBufferBuilder::new(capacity),
34            dtype: DType::Primitive(T::PTYPE, nullability),
35        }
36    }
37
38    /// Appends a primitive `value` to the builder.
39    pub fn append_value(&mut self, value: T) {
40        self.values.push(value);
41        self.nulls.append_non_null();
42    }
43
44    /// Appends an optional primitive value to the builder.
45    ///
46    /// If the value is `Some`, it appends the primitive value. If the value is `None`, it appends a
47    /// null.
48    ///
49    /// # Panics
50    ///
51    /// This method will panic if the input is `None` and the builder is non-nullable.
52    pub(crate) fn append_option(&mut self, value: Option<T>) {
53        match value {
54            Some(value) => self.append_value(value),
55            None => self.append_null(),
56        }
57    }
58
59    /// Returns the raw primitive values in this builder as a slice.
60    pub fn values(&self) -> &[T] {
61        self.values.as_ref()
62    }
63
64    /// Create a new handle to the next `len` uninitialized values in the builder.
65    ///
66    /// All reads/writes through the handle to the values buffer or the validity buffer will operate
67    /// on indices relative to the start of the range.
68    ///
69    /// # Panics
70    ///
71    /// Panics if `len` is 0 or if the current length of the builder plus `len` would exceed the
72    /// capacity of the builder's memory.
73    ///
74    /// ## Example
75    ///
76    /// ```
77    /// use std::mem::MaybeUninit;
78    /// use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder};
79    /// use vortex_dtype::Nullability;
80    ///
81    /// // Create a new builder.
82    /// let mut builder: PrimitiveBuilder<i32> =
83    ///     PrimitiveBuilder::with_capacity(Nullability::NonNullable, 5);
84    ///
85    /// // Populate the values.
86    /// let mut uninit_range = builder.uninit_range(5);
87    /// uninit_range.copy_from_slice(0, &[0, 1, 2, 3, 4]);
88    ///
89    /// // SAFETY: We have initialized all 5 values in the range, and since the array builder is
90    /// // non-nullable, we don't need to set any null bits.
91    /// unsafe { uninit_range.finish(); }
92    ///
93    /// let built = builder.finish_into_primitive();
94    ///
95    /// assert_eq!(built.as_slice::<i32>(), &[0i32, 1, 2, 3, 4]);
96    /// ```
97    pub fn uninit_range(&mut self, len: usize) -> UninitRange<'_, T> {
98        assert_ne!(0, len, "cannot create an uninit range of length 0");
99
100        let current_len = self.values.len();
101        assert!(
102            current_len + len <= self.values.capacity(),
103            "uninit_range of len {len} exceeds builder capacity {}",
104            self.values.capacity()
105        );
106
107        UninitRange { len, builder: self }
108    }
109
110    /// Finishes the builder directly into a [`PrimitiveArray`].
111    pub fn finish_into_primitive(&mut self) -> PrimitiveArray {
112        let validity = self
113            .nulls
114            .finish_with_nullability(self.dtype().nullability());
115
116        PrimitiveArray::new(std::mem::take(&mut self.values).freeze(), validity)
117    }
118
119    /// Extends the primitive array with an iterator.
120    pub fn extend_with_iterator(&mut self, iter: impl IntoIterator<Item = T>, mask: Mask) {
121        self.values.extend(iter);
122        self.nulls.append_validity_mask(mask);
123    }
124}
125
126impl<T: NativePType> ArrayBuilder for PrimitiveBuilder<T> {
127    fn as_any(&self) -> &dyn Any {
128        self
129    }
130
131    fn as_any_mut(&mut self) -> &mut dyn Any {
132        self
133    }
134
135    fn dtype(&self) -> &DType {
136        &self.dtype
137    }
138
139    fn len(&self) -> usize {
140        self.values.len()
141    }
142
143    fn append_zeros(&mut self, n: usize) {
144        self.values.push_n(T::default(), n);
145        self.nulls.append_n_non_nulls(n);
146    }
147
148    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
149        self.values.push_n(T::default(), n);
150        self.nulls.append_n_nulls(n);
151    }
152
153    unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
154        let array = array.to_primitive();
155
156        // This should be checked in `extend_from_array` but we can check it again.
157        debug_assert_eq!(
158            array.ptype(),
159            T::PTYPE,
160            "Cannot extend from array with different ptype"
161        );
162
163        self.values.extend_from_slice(array.as_slice::<T>());
164        self.nulls.append_validity_mask(array.validity_mask());
165    }
166
167    fn ensure_capacity(&mut self, capacity: usize) {
168        if capacity > self.values.capacity() {
169            self.values.reserve(capacity - self.values.len());
170            self.nulls.ensure_capacity(capacity);
171        }
172    }
173
174    fn set_validity(&mut self, validity: Mask) {
175        self.nulls = LazyNullBufferBuilder::new(validity.len());
176        self.nulls.append_validity_mask(validity);
177    }
178
179    fn finish(&mut self) -> ArrayRef {
180        self.finish_into_primitive().into_array()
181    }
182
183    fn finish_into_canonical(&mut self) -> Canonical {
184        Canonical::Primitive(self.finish_into_primitive())
185    }
186}
187
188/// A range of uninitialized values in the primitive builder that can be filled.
189pub struct UninitRange<'a, T> {
190    /// The length of the uninitialized range.
191    ///
192    /// This is guaranteed to be within the memory capacity of the builder.
193    len: usize,
194
195    /// A mutable reference to the builder.
196    ///
197    /// Since this is a mutable reference, we can guarantee that nothing else can modify the builder
198    /// while this `UninitRange` exists.
199    builder: &'a mut PrimitiveBuilder<T>,
200}
201
202impl<T> UninitRange<'_, T> {
203    /// Returns the length of this uninitialized range.
204    #[inline]
205    pub fn len(&self) -> usize {
206        self.len
207    }
208
209    /// Returns true if this range has zero length.
210    #[inline]
211    pub fn is_empty(&self) -> bool {
212        self.len == 0
213    }
214
215    /// Set a value at the given index within this range.
216    ///
217    /// # Panics
218    ///
219    /// Panics if the index is out of bounds.
220    #[inline]
221    pub fn set_value(&mut self, index: usize, value: T) {
222        assert!(index < self.len, "index out of bounds");
223        let spare = self.builder.values.spare_capacity_mut();
224        spare[index] = MaybeUninit::new(value);
225    }
226
227    /// Append a [`Mask`] to this builder's null buffer.
228    ///
229    /// # Panics
230    ///
231    /// Panics if the mask length is not equal to the the length of the current `UninitRange`.
232    ///
233    /// # Safety
234    ///
235    /// - The caller must ensure that they safely initialize `mask.len()` primitive values via
236    ///   [`UninitRange::copy_from_slice`].
237    /// - The caller must also ensure that they only call this method once.
238    pub unsafe fn append_mask(&mut self, mask: Mask) {
239        assert_eq!(
240            mask.len(),
241            self.len,
242            "Tried to append a mask to an `UninitRange` that was beyond the allowed range"
243        );
244
245        // TODO(connor): Ideally, we would call this function `set_mask` and directly set all of the
246        // bits (so that we can call this multiple times), but the underlying `BooleanBuffer` does
247        // not have an easy way to do this correctly.
248
249        self.builder.nulls.append_validity_mask(mask);
250    }
251
252    /// Set a validity bit at the given index.
253    ///
254    /// The index is relative to the start of this range (not relative to the values already in the
255    /// builder).
256    ///
257    /// Note that this will have no effect if the builder is non-nullable.
258    pub fn set_bit(&mut self, index: usize, v: bool) {
259        assert!(index < self.len, "set_bit index out of bounds");
260        // Note that this won't panic because we can only create an `UninitRange` within the
261        // capacity of the builder (it will not automatically resize).
262        let absolute_index = self.builder.values.len() + index;
263        self.builder.nulls.set_bit(absolute_index, v);
264    }
265
266    /// Set values from an initialized range.
267    ///
268    /// Note that the input `offset` should be an offset relative to the local `UninitRange`, not
269    /// the entire `PrimitiveBuilder`.
270    pub fn copy_from_slice(&mut self, local_offset: usize, src: &[T])
271    where
272        T: Copy,
273    {
274        debug_assert!(
275            local_offset + src.len() <= self.len,
276            "tried to copy a slice into a `UninitRange` past its boundary"
277        );
278
279        // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout.
280        let uninit_src: &[MaybeUninit<T>] = unsafe { std::mem::transmute(src) };
281
282        // Note: spare_capacity_mut() returns the spare capacity starting from the current length,
283        // so we just use local_offset directly.
284        let dst =
285            &mut self.builder.values.spare_capacity_mut()[local_offset..local_offset + src.len()];
286        dst.copy_from_slice(uninit_src);
287    }
288
289    /// Get a mutable slice of uninitialized memory at the specified offset within this range.
290    ///
291    /// Note that the offsets are relative to this local range, not to the values already in the
292    /// builder.
293    ///
294    /// # Safety
295    ///
296    /// The caller must ensure that they properly initialize the returned memory before calling
297    /// `finish()` on this range.
298    ///
299    /// # Panics
300    ///
301    /// Panics if `offset + len` exceeds the range bounds.
302    pub unsafe fn slice_uninit_mut(&mut self, offset: usize, len: usize) -> &mut [MaybeUninit<T>] {
303        assert!(
304            offset + len <= self.len,
305            "slice_uninit_mut: offset {} + len {} exceeds range length {}",
306            offset,
307            len,
308            self.len
309        );
310        &mut self.builder.values.spare_capacity_mut()[offset..offset + len]
311    }
312
313    /// Finish building this range, marking it as initialized and advancing the length of the
314    /// underlying values buffer.
315    ///
316    /// # Safety
317    ///
318    /// The caller must ensure that they have safely initialized all `len` values via
319    /// [`UninitRange::copy_from_slice`] as well as correctly set all of the null bits via
320    /// [`set_bit`] or [`append_mask`] if the builder is nullable.
321    ///
322    /// [`set_bit`]: UninitRange::set_bit
323    /// [`append_mask`]: UninitRange::append_mask
324    pub unsafe fn finish(self) {
325        // SAFETY: constructor enforces that current length + len does not exceed the capacity of the array.
326        let new_len = self.builder.values.len() + self.len;
327        unsafe { self.builder.values.set_len(new_len) };
328    }
329}
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334
335    /// REGRESSION TEST: This test verifies that multiple sequential ranges have correct offsets.
336    ///
337    /// This would have caught the `Deref` bug where it always returned from the start of the
338    /// buffer.
339    #[test]
340    fn test_multiple_uninit_ranges_correct_offsets() {
341        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
342
343        // First range.
344        let mut range1 = builder.uninit_range(3);
345        range1.copy_from_slice(0, &[1, 2, 3]);
346
347        // SAFETY: We initialized all 3 values.
348        unsafe {
349            range1.finish();
350        }
351
352        // Verify the builder now has these values.
353        assert_eq!(builder.values(), &[1, 2, 3]);
354
355        // Second range - this would fail with the old Deref implementation.
356        let mut range2 = builder.uninit_range(2);
357
358        // Set values using copy_from_slice.
359        range2.copy_from_slice(0, &[4, 5]);
360
361        // SAFETY: We initialized both values.
362        unsafe {
363            range2.finish();
364        }
365
366        // Verify the builder now has all 5 values.
367        assert_eq!(builder.values(), &[1, 2, 3, 4, 5]);
368
369        let array = builder.finish_into_primitive();
370        assert_eq!(array.as_slice::<i32>(), &[1, 2, 3, 4, 5]);
371    }
372
373    /// REGRESSION TEST: This test verifies that `append_mask` was correctly moved from
374    /// `PrimitiveBuilder` to `UninitRange`.
375    ///
376    /// The old API had `append_mask` on the builder, which was confusing when used with ranges.
377    /// This test ensures the new API works correctly.
378    #[test]
379    fn test_append_mask_on_uninit_range() {
380        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 5);
381        let mut range = builder.uninit_range(3);
382
383        // Create a mask for 3 values.
384        let mask = Mask::from_iter([true, false, true]);
385
386        // SAFETY: We're about to initialize the values.
387        unsafe {
388            range.append_mask(mask);
389        }
390
391        // Initialize the values.
392        range.copy_from_slice(0, &[10, 20, 30]);
393
394        // SAFETY: We've initialized all values and set the mask.
395        unsafe {
396            range.finish();
397        }
398
399        let array = builder.finish_into_primitive();
400        assert_eq!(array.len(), 3);
401        // Check validity using scalar_at - nulls will return is_null() = true.
402        assert!(!array.scalar_at(0).is_null());
403        assert!(array.scalar_at(1).is_null());
404        assert!(!array.scalar_at(2).is_null());
405    }
406
407    /// REGRESSION TEST: This test verifies that `append_mask` validates the mask length.
408    ///
409    /// This ensures that masks can only be appended if they match the range length.
410    #[test]
411    #[should_panic(
412        expected = "Tried to append a mask to an `UninitRange` that was beyond the allowed range"
413    )]
414    fn test_append_mask_wrong_length_panics() {
415        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
416        let mut range = builder.uninit_range(5);
417
418        // Try to append a mask with wrong length (3 instead of 5).
419        let wrong_mask = Mask::from_iter([true, false, true]);
420
421        // SAFETY: This is expected to panic due to length mismatch.
422        unsafe {
423            range.append_mask(wrong_mask);
424        }
425    }
426
427    /// Test that `copy_from_slice` works correctly with different offsets.
428    ///
429    /// This verifies the new simplified API without the redundant `len` parameter.
430    #[test]
431    fn test_copy_from_slice_with_offsets() {
432        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
433        let mut range = builder.uninit_range(6);
434
435        // Copy to different offsets.
436        range.copy_from_slice(0, &[1, 2]);
437        range.copy_from_slice(2, &[3, 4]);
438        range.copy_from_slice(4, &[5, 6]);
439
440        // SAFETY: We've initialized all 6 values.
441        unsafe {
442            range.finish();
443        }
444
445        let array = builder.finish_into_primitive();
446        assert_eq!(array.as_slice::<i32>(), &[1, 2, 3, 4, 5, 6]);
447    }
448
449    /// Test that `set_bit` uses relative indexing within the range.
450    ///
451    /// Note: `set_bit` requires the null buffer to already be initialized, so we first
452    /// use `append_mask` to set up the buffer, then demonstrate that `set_bit` can
453    /// modify individual bits with relative indexing.
454    #[test]
455    fn test_set_bit_relative_indexing() {
456        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
457
458        // First add some values to the builder.
459        builder.append_value(100);
460        builder.append_value(200);
461
462        // Create a range for new values.
463        let mut range = builder.uninit_range(3);
464
465        // Use append_mask to initialize the validity buffer for this range.
466        let initial_mask = Mask::from_iter([false, false, false]);
467        // SAFETY: We're about to initialize the values.
468        unsafe {
469            range.append_mask(initial_mask);
470        }
471
472        // Now we can use set_bit to modify individual bits with relative indexing.
473        range.set_bit(0, true); // Change first bit to valid
474        range.set_bit(2, true); // Change third bit to valid
475        // Leave middle bit as false (null)
476
477        // Initialize the values.
478        range.copy_from_slice(0, &[10, 20, 30]);
479
480        // SAFETY: We've initialized all 3 values and set their validity.
481        unsafe {
482            range.finish();
483        }
484
485        let array = builder.finish_into_primitive();
486
487        // Verify the total length and values.
488        assert_eq!(array.len(), 5);
489        assert_eq!(array.as_slice::<i32>(), &[100, 200, 10, 20, 30]);
490
491        // Check validity - the first two should be valid (from append_value).
492        assert!(!array.scalar_at(0).is_null()); // initial value 100
493        assert!(!array.scalar_at(1).is_null()); // initial value 200
494
495        // Check the range items with modified validity.
496        assert!(!array.scalar_at(2).is_null()); // range index 0 - set to valid
497        assert!(array.scalar_at(3).is_null()); // range index 1 - left as null
498        assert!(!array.scalar_at(4).is_null()); // range index 2 - set to valid
499    }
500
501    /// Test that creating a zero-length uninit range panics.
502    #[test]
503    #[should_panic(expected = "cannot create an uninit range of length 0")]
504    fn test_zero_length_uninit_range_panics() {
505        let mut builder = PrimitiveBuilder::<i32>::new(Nullability::NonNullable);
506        let _range = builder.uninit_range(0);
507    }
508
509    /// Test that creating an uninit range exceeding capacity panics.
510    #[test]
511    #[should_panic(expected = "uninit_range of len 10 exceeds builder capacity")]
512    fn test_uninit_range_exceeds_capacity_panics() {
513        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 5);
514        let _range = builder.uninit_range(10);
515    }
516
517    /// Test that `copy_from_slice` debug asserts on out-of-bounds access.
518    ///
519    /// Note: This only panics in debug mode due to `debug_assert!`.
520    #[test]
521    #[cfg(debug_assertions)]
522    #[should_panic(expected = "tried to copy a slice into a `UninitRange` past its boundary")]
523    fn test_copy_from_slice_out_of_bounds() {
524        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
525        let mut range = builder.uninit_range(3);
526
527        // Try to copy 3 elements starting at offset 1 (would need 4 slots total).
528        range.copy_from_slice(1, &[1, 2, 3]);
529    }
530
531    /// Test that the unsafe contract of `finish` is documented and works correctly.
532    ///
533    /// This test demonstrates proper usage of the unsafe `finish` method.
534    #[test]
535    fn test_finish_unsafe_contract() {
536        let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 5);
537        let mut range = builder.uninit_range(3);
538
539        // Set validity mask.
540        let mask = Mask::from_iter([true, true, false]);
541        // SAFETY: We're about to initialize the matching number of values.
542        unsafe {
543            range.append_mask(mask);
544        }
545
546        // Initialize all values.
547        range.copy_from_slice(0, &[10, 20, 30]);
548
549        // SAFETY: We have initialized all 3 values and set their validity.
550        unsafe {
551            range.finish();
552        }
553
554        let array = builder.finish_into_primitive();
555        assert_eq!(array.len(), 3);
556        assert_eq!(array.as_slice::<i32>(), &[10, 20, 30]);
557    }
558}