vortex_array/builders/primitive.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::mem::MaybeUninit;
6
7use vortex_buffer::BufferMut;
8use vortex_dtype::{DType, NativePType, Nullability};
9use vortex_error::{VortexResult, vortex_ensure};
10use vortex_mask::Mask;
11use vortex_scalar::{PrimitiveScalar, Scalar};
12
13use crate::arrays::PrimitiveArray;
14use crate::builders::{ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyBitBufferBuilder};
15use crate::canonical::{Canonical, ToCanonical};
16use crate::{Array, ArrayRef, IntoArray};
17
18/// The builder for building a [`PrimitiveArray`], parametrized by the `PType`.
19pub struct PrimitiveBuilder<T> {
20 dtype: DType,
21 values: BufferMut<T>,
22 nulls: LazyBitBufferBuilder,
23}
24
25impl<T: NativePType> PrimitiveBuilder<T> {
26 /// Creates a new `PrimitiveBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
27 pub fn new(nullability: Nullability) -> Self {
28 Self::with_capacity(nullability, DEFAULT_BUILDER_CAPACITY)
29 }
30
31 /// Creates a new `PrimitiveBuilder` with the given `capacity`.
32 pub fn with_capacity(nullability: Nullability, capacity: usize) -> Self {
33 Self {
34 values: BufferMut::with_capacity(capacity),
35 nulls: LazyBitBufferBuilder::new(capacity),
36 dtype: DType::Primitive(T::PTYPE, nullability),
37 }
38 }
39
40 /// Appends a primitive `value` to the builder.
41 pub fn append_value(&mut self, value: T) {
42 self.values.push(value);
43 self.nulls.append_non_null();
44 }
45
46 /// Returns the raw primitive values in this builder as a slice.
47 pub fn values(&self) -> &[T] {
48 self.values.as_ref()
49 }
50
51 /// Create a new handle to the next `len` uninitialized values in the builder.
52 ///
53 /// All reads/writes through the handle to the values buffer or the validity buffer will operate
54 /// on indices relative to the start of the range.
55 ///
56 /// # Panics
57 ///
58 /// Panics if `len` is 0 or if the current length of the builder plus `len` would exceed the
59 /// capacity of the builder's memory.
60 ///
61 /// ## Example
62 ///
63 /// ```
64 /// use std::mem::MaybeUninit;
65 /// use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder};
66 /// use vortex_dtype::Nullability;
67 ///
68 /// // Create a new builder.
69 /// let mut builder: PrimitiveBuilder<i32> =
70 /// PrimitiveBuilder::with_capacity(Nullability::NonNullable, 5);
71 ///
72 /// // Populate the values.
73 /// let mut uninit_range = builder.uninit_range(5);
74 /// uninit_range.copy_from_slice(0, &[0, 1, 2, 3, 4]);
75 ///
76 /// // SAFETY: We have initialized all 5 values in the range, and since the array builder is
77 /// // non-nullable, we don't need to set any null bits.
78 /// unsafe { uninit_range.finish(); }
79 ///
80 /// let built = builder.finish_into_primitive();
81 ///
82 /// assert_eq!(built.as_slice::<i32>(), &[0i32, 1, 2, 3, 4]);
83 /// ```
84 pub fn uninit_range(&mut self, len: usize) -> UninitRange<'_, T> {
85 assert_ne!(0, len, "cannot create an uninit range of length 0");
86
87 let current_len = self.values.len();
88 assert!(
89 current_len + len <= self.values.capacity(),
90 "uninit_range of len {len} exceeds builder with length {} and capacity {}",
91 current_len,
92 self.values.capacity()
93 );
94
95 UninitRange { len, builder: self }
96 }
97
98 /// Finishes the builder directly into a [`PrimitiveArray`].
99 pub fn finish_into_primitive(&mut self) -> PrimitiveArray {
100 let validity = self
101 .nulls
102 .finish_with_nullability(self.dtype().nullability());
103
104 PrimitiveArray::new(std::mem::take(&mut self.values).freeze(), validity)
105 }
106
107 /// Extends the primitive array with an iterator.
108 pub fn extend_with_iterator(&mut self, iter: impl IntoIterator<Item = T>, mask: Mask) {
109 self.values.extend(iter);
110 self.nulls.append_validity_mask(mask);
111 }
112}
113
114impl<T: NativePType> ArrayBuilder for PrimitiveBuilder<T> {
115 fn as_any(&self) -> &dyn Any {
116 self
117 }
118
119 fn as_any_mut(&mut self) -> &mut dyn Any {
120 self
121 }
122
123 fn dtype(&self) -> &DType {
124 &self.dtype
125 }
126
127 fn len(&self) -> usize {
128 self.values.len()
129 }
130
131 fn append_zeros(&mut self, n: usize) {
132 self.values.push_n(T::default(), n);
133 self.nulls.append_n_non_nulls(n);
134 }
135
136 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
137 self.values.push_n(T::default(), n);
138 self.nulls.append_n_nulls(n);
139 }
140
141 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
142 vortex_ensure!(
143 scalar.dtype() == self.dtype(),
144 "PrimitiveBuilder expected scalar with dtype {:?}, got {:?}",
145 self.dtype(),
146 scalar.dtype()
147 );
148
149 let primitive_scalar = PrimitiveScalar::try_from(scalar)?;
150 match primitive_scalar.pvalue() {
151 Some(pv) => self.append_value(pv.cast::<T>()),
152 None => self.append_null(),
153 }
154
155 Ok(())
156 }
157
158 unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
159 let array = array.to_primitive();
160
161 // This should be checked in `extend_from_array` but we can check it again.
162 debug_assert_eq!(
163 array.ptype(),
164 T::PTYPE,
165 "Cannot extend from array with different ptype"
166 );
167
168 self.values.extend_from_slice(array.as_slice::<T>());
169 self.nulls.append_validity_mask(array.validity_mask());
170 }
171
172 fn reserve_exact(&mut self, additional: usize) {
173 self.values.reserve(additional);
174 self.nulls.reserve_exact(additional);
175 }
176
177 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
178 self.nulls = LazyBitBufferBuilder::new(validity.len());
179 self.nulls.append_validity_mask(validity);
180 }
181
182 fn finish(&mut self) -> ArrayRef {
183 self.finish_into_primitive().into_array()
184 }
185
186 fn finish_into_canonical(&mut self) -> Canonical {
187 Canonical::Primitive(self.finish_into_primitive())
188 }
189}
190
191/// A range of uninitialized values in the primitive builder that can be filled.
192pub struct UninitRange<'a, T> {
193 /// The length of the uninitialized range.
194 ///
195 /// This is guaranteed to be within the memory capacity of the builder.
196 len: usize,
197
198 /// A mutable reference to the builder.
199 ///
200 /// Since this is a mutable reference, we can guarantee that nothing else can modify the builder
201 /// while this `UninitRange` exists.
202 builder: &'a mut PrimitiveBuilder<T>,
203}
204
205impl<T> UninitRange<'_, T> {
206 /// Returns the length of this uninitialized range.
207 #[inline]
208 pub fn len(&self) -> usize {
209 self.len
210 }
211
212 /// Returns true if this range has zero length.
213 #[inline]
214 pub fn is_empty(&self) -> bool {
215 self.len == 0
216 }
217
218 /// Set a value at the given index within this range.
219 ///
220 /// # Panics
221 ///
222 /// Panics if the index is out of bounds.
223 #[inline]
224 pub fn set_value(&mut self, index: usize, value: T) {
225 assert!(index < self.len, "index out of bounds");
226 let spare = self.builder.values.spare_capacity_mut();
227 spare[index] = MaybeUninit::new(value);
228 }
229
230 /// Append a [`Mask`] to this builder's null buffer.
231 ///
232 /// # Panics
233 ///
234 /// Panics if the mask length is not equal to the the length of the current `UninitRange`.
235 ///
236 /// # Safety
237 ///
238 /// - The caller must ensure that they safely initialize `mask.len()` primitive values via
239 /// [`UninitRange::copy_from_slice`].
240 /// - The caller must also ensure that they only call this method once.
241 pub unsafe fn append_mask(&mut self, mask: Mask) {
242 assert_eq!(
243 mask.len(),
244 self.len,
245 "Tried to append a mask to an `UninitRange` that was beyond the allowed range"
246 );
247
248 // TODO(connor): Ideally, we would call this function `set_mask` and directly set all of the
249 // bits (so that we can call this multiple times), but the underlying `BooleanBuffer` does
250 // not have an easy way to do this correctly.
251
252 self.builder.nulls.append_validity_mask(mask);
253 }
254
255 /// Set a validity bit at the given index.
256 ///
257 /// The index is relative to the start of this range (not relative to the values already in the
258 /// builder).
259 ///
260 /// Note that this will have no effect if the builder is non-nullable.
261 pub fn set_validity_bit(&mut self, index: usize, v: bool) {
262 assert!(index < self.len, "set_bit index out of bounds");
263 // Note that this won't panic because we can only create an `UninitRange` within the
264 // capacity of the builder (it will not automatically resize).
265 let absolute_index = self.builder.values.len() + index;
266 self.builder.nulls.set_bit(absolute_index, v);
267 }
268
269 /// Set values from an initialized range.
270 ///
271 /// Note that the input `offset` should be an offset relative to the local `UninitRange`, not
272 /// the entire `PrimitiveBuilder`.
273 pub fn copy_from_slice(&mut self, local_offset: usize, src: &[T])
274 where
275 T: Copy,
276 {
277 debug_assert!(
278 local_offset + src.len() <= self.len,
279 "tried to copy a slice into a `UninitRange` past its boundary"
280 );
281
282 // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout.
283 let uninit_src: &[MaybeUninit<T>] = unsafe { std::mem::transmute(src) };
284
285 // Note: spare_capacity_mut() returns the spare capacity starting from the current length,
286 // so we just use local_offset directly.
287 let dst =
288 &mut self.builder.values.spare_capacity_mut()[local_offset..local_offset + src.len()];
289 dst.copy_from_slice(uninit_src);
290 }
291
292 /// Get a mutable slice of uninitialized memory at the specified offset within this range.
293 ///
294 /// Note that the offsets are relative to this local range, not to the values already in the
295 /// builder.
296 ///
297 /// # Safety
298 ///
299 /// The caller must ensure that they properly initialize the returned memory before calling
300 /// `finish()` on this range.
301 ///
302 /// # Panics
303 ///
304 /// Panics if `offset + len` exceeds the range bounds.
305 pub unsafe fn slice_uninit_mut(&mut self, offset: usize, len: usize) -> &mut [MaybeUninit<T>] {
306 assert!(
307 offset + len <= self.len,
308 "slice_uninit_mut: offset {} + len {} exceeds range length {}",
309 offset,
310 len,
311 self.len
312 );
313 &mut self.builder.values.spare_capacity_mut()[offset..offset + len]
314 }
315
316 /// Finish building this range, marking it as initialized and advancing the length of the
317 /// underlying values buffer.
318 ///
319 /// # Safety
320 ///
321 /// The caller must ensure that they have safely initialized all `len` values via
322 /// [`copy_from_slice()`] or [`set_value()`], as well as correctly set all of the null bits via
323 /// [`set_validity_bit()`] or [`append_mask()`] if the builder is nullable.
324 ///
325 /// [`copy_from_slice()`]: UninitRange::copy_from_slice
326 /// [`set_value()`]: UninitRange::set_value
327 /// [`set_validity_bit()`]: UninitRange::set_validity_bit
328 /// [`append_mask()`]: UninitRange::append_mask
329 pub unsafe fn finish(self) {
330 // SAFETY: constructor enforces that current length + len does not exceed the capacity of the array.
331 let new_len = self.builder.values.len() + self.len;
332 unsafe { self.builder.values.set_len(new_len) };
333 }
334}
335
336#[cfg(test)]
337mod tests {
338 use super::*;
339
340 /// REGRESSION TEST: This test verifies that multiple sequential ranges have correct offsets.
341 ///
342 /// This would have caught the `Deref` bug where it always returned from the start of the
343 /// buffer.
344 #[test]
345 fn test_multiple_uninit_ranges_correct_offsets() {
346 let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
347
348 // First range.
349 let mut range1 = builder.uninit_range(3);
350 range1.copy_from_slice(0, &[1, 2, 3]);
351
352 // SAFETY: We initialized all 3 values.
353 unsafe {
354 range1.finish();
355 }
356
357 // Verify the builder now has these values.
358 assert_eq!(builder.values(), &[1, 2, 3]);
359
360 // Second range - this would fail with the old Deref implementation.
361 let mut range2 = builder.uninit_range(2);
362
363 // Set values using copy_from_slice.
364 range2.copy_from_slice(0, &[4, 5]);
365
366 // SAFETY: We initialized both values.
367 unsafe {
368 range2.finish();
369 }
370
371 // Verify the builder now has all 5 values.
372 assert_eq!(builder.values(), &[1, 2, 3, 4, 5]);
373
374 let array = builder.finish_into_primitive();
375 assert_eq!(array.as_slice::<i32>(), &[1, 2, 3, 4, 5]);
376 }
377
378 /// REGRESSION TEST: This test verifies that `append_mask` was correctly moved from
379 /// `PrimitiveBuilder` to `UninitRange`.
380 ///
381 /// The old API had `append_mask` on the builder, which was confusing when used with ranges.
382 /// This test ensures the new API works correctly.
383 #[test]
384 fn test_append_mask_on_uninit_range() {
385 let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 5);
386 let mut range = builder.uninit_range(3);
387
388 // Create a mask for 3 values.
389 let mask = Mask::from_iter([true, false, true]);
390
391 // SAFETY: We're about to initialize the values.
392 unsafe {
393 range.append_mask(mask);
394 }
395
396 // Initialize the values.
397 range.copy_from_slice(0, &[10, 20, 30]);
398
399 // SAFETY: We've initialized all values and set the mask.
400 unsafe {
401 range.finish();
402 }
403
404 let array = builder.finish_into_primitive();
405 assert_eq!(array.len(), 3);
406 // Check validity using scalar_at - nulls will return is_null() = true.
407 assert!(!array.scalar_at(0).is_null());
408 assert!(array.scalar_at(1).is_null());
409 assert!(!array.scalar_at(2).is_null());
410 }
411
412 /// REGRESSION TEST: This test verifies that `append_mask` validates the mask length.
413 ///
414 /// This ensures that masks can only be appended if they match the range length.
415 #[test]
416 #[should_panic(
417 expected = "Tried to append a mask to an `UninitRange` that was beyond the allowed range"
418 )]
419 fn test_append_mask_wrong_length_panics() {
420 let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
421 let mut range = builder.uninit_range(5);
422
423 // Try to append a mask with wrong length (3 instead of 5).
424 let wrong_mask = Mask::from_iter([true, false, true]);
425
426 // SAFETY: This is expected to panic due to length mismatch.
427 unsafe {
428 range.append_mask(wrong_mask);
429 }
430 }
431
432 /// Test that `copy_from_slice` works correctly with different offsets.
433 ///
434 /// This verifies the new simplified API without the redundant `len` parameter.
435 #[test]
436 fn test_copy_from_slice_with_offsets() {
437 let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
438 let mut range = builder.uninit_range(6);
439
440 // Copy to different offsets.
441 range.copy_from_slice(0, &[1, 2]);
442 range.copy_from_slice(2, &[3, 4]);
443 range.copy_from_slice(4, &[5, 6]);
444
445 // SAFETY: We've initialized all 6 values.
446 unsafe {
447 range.finish();
448 }
449
450 let array = builder.finish_into_primitive();
451 assert_eq!(array.as_slice::<i32>(), &[1, 2, 3, 4, 5, 6]);
452 }
453
454 /// Test that `set_bit` uses relative indexing within the range.
455 ///
456 /// Note: `set_bit` requires the null buffer to already be initialized, so we first
457 /// use `append_mask` to set up the buffer, then demonstrate that `set_bit` can
458 /// modify individual bits with relative indexing.
459 #[test]
460 fn test_set_bit_relative_indexing() {
461 let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
462
463 // First add some values to the builder.
464 builder.append_value(100);
465 builder.append_value(200);
466
467 // Create a range for new values.
468 let mut range = builder.uninit_range(3);
469
470 // Use append_mask to initialize the validity buffer for this range.
471 let initial_mask = Mask::from_iter([false, false, false]);
472 // SAFETY: We're about to initialize the values.
473 unsafe {
474 range.append_mask(initial_mask);
475 }
476
477 // Now we can use set_bit to modify individual bits with relative indexing.
478 range.set_validity_bit(0, true); // Change first bit to valid
479 range.set_validity_bit(2, true); // Change third bit to valid
480 // Leave middle bit as false (null)
481
482 // Initialize the values.
483 range.copy_from_slice(0, &[10, 20, 30]);
484
485 // SAFETY: We've initialized all 3 values and set their validity.
486 unsafe {
487 range.finish();
488 }
489
490 let array = builder.finish_into_primitive();
491
492 // Verify the total length and values.
493 assert_eq!(array.len(), 5);
494 assert_eq!(array.as_slice::<i32>(), &[100, 200, 10, 20, 30]);
495
496 // Check validity - the first two should be valid (from append_value).
497 assert!(!array.scalar_at(0).is_null()); // initial value 100
498 assert!(!array.scalar_at(1).is_null()); // initial value 200
499
500 // Check the range items with modified validity.
501 assert!(!array.scalar_at(2).is_null()); // range index 0 - set to valid
502 assert!(array.scalar_at(3).is_null()); // range index 1 - left as null
503 assert!(!array.scalar_at(4).is_null()); // range index 2 - set to valid
504 }
505
506 /// Test that creating a zero-length uninit range panics.
507 #[test]
508 #[should_panic(expected = "cannot create an uninit range of length 0")]
509 fn test_zero_length_uninit_range_panics() {
510 let mut builder = PrimitiveBuilder::<i32>::new(Nullability::NonNullable);
511 let _range = builder.uninit_range(0);
512 }
513
514 /// Test that creating an uninit range exceeding capacity panics.
515 #[test]
516 #[should_panic(
517 expected = "uninit_range of len 10 exceeds builder with length 0 and capacity 6"
518 )]
519 fn test_uninit_range_exceeds_capacity_panics() {
520 let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 5);
521 let _range = builder.uninit_range(10);
522 }
523
524 /// Test that `copy_from_slice` debug asserts on out-of-bounds access.
525 ///
526 /// Note: This only panics in debug mode due to `debug_assert!`.
527 #[test]
528 #[cfg(debug_assertions)]
529 #[should_panic(expected = "tried to copy a slice into a `UninitRange` past its boundary")]
530 fn test_copy_from_slice_out_of_bounds() {
531 let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
532 let mut range = builder.uninit_range(3);
533
534 // Try to copy 3 elements starting at offset 1 (would need 4 slots total).
535 range.copy_from_slice(1, &[1, 2, 3]);
536 }
537
538 /// Test that the unsafe contract of `finish` is documented and works correctly.
539 ///
540 /// This test demonstrates proper usage of the unsafe `finish` method.
541 #[test]
542 fn test_finish_unsafe_contract() {
543 let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 5);
544 let mut range = builder.uninit_range(3);
545
546 // Set validity mask.
547 let mask = Mask::from_iter([true, true, false]);
548 // SAFETY: We're about to initialize the matching number of values.
549 unsafe {
550 range.append_mask(mask);
551 }
552
553 // Initialize all values.
554 range.copy_from_slice(0, &[10, 20, 30]);
555
556 // SAFETY: We have initialized all 3 values and set their validity.
557 unsafe {
558 range.finish();
559 }
560
561 let array = builder.finish_into_primitive();
562 assert_eq!(array.len(), 3);
563 assert_eq!(array.as_slice::<i32>(), &[10, 20, 30]);
564 }
565
566 #[test]
567 fn test_append_scalar() {
568 use vortex_dtype::DType;
569 use vortex_scalar::Scalar;
570
571 let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::Nullable, 10);
572
573 // Test appending a valid primitive value.
574 let scalar1 = Scalar::primitive(42i32, Nullability::Nullable);
575 builder.append_scalar(&scalar1).unwrap();
576
577 // Test appending another value.
578 let scalar2 = Scalar::primitive(84i32, Nullability::Nullable);
579 builder.append_scalar(&scalar2).unwrap();
580
581 // Test appending null value.
582 let null_scalar = Scalar::null(DType::Primitive(
583 vortex_dtype::PType::I32,
584 Nullability::Nullable,
585 ));
586 builder.append_scalar(&null_scalar).unwrap();
587
588 let array = builder.finish_into_primitive();
589 assert_eq!(array.len(), 3);
590
591 // Check actual values.
592 let values = array.as_slice::<i32>();
593 assert_eq!(values[0], 42);
594 assert_eq!(values[1], 84);
595 // values[2] might be any value since it's null.
596
597 // Check validity - first two should be valid, third should be null.
598 use crate::vtable::ValidityHelper;
599 assert!(array.validity().is_valid(0));
600 assert!(array.validity().is_valid(1));
601 assert!(!array.validity().is_valid(2));
602
603 // Test wrong dtype error.
604 let mut builder = PrimitiveBuilder::<i32>::with_capacity(Nullability::NonNullable, 10);
605 let wrong_scalar = Scalar::from(true);
606 assert!(builder.append_scalar(&wrong_scalar).is_err());
607 }
608}