vortex_array/builders/
primitive.rs

1use std::any::Any;
2use std::mem::MaybeUninit;
3use std::ops::{Deref, DerefMut};
4
5use vortex_buffer::BufferMut;
6use vortex_dtype::{DType, NativePType, Nullability};
7use vortex_error::{VortexResult, vortex_bail, vortex_panic};
8use vortex_mask::Mask;
9
10use crate::arrays::{BoolArray, PrimitiveArray};
11use crate::builders::ArrayBuilder;
12use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
13use crate::validity::Validity;
14use crate::{Array, ArrayRef, IntoArray, ToCanonical};
15
16/// Builder for [`PrimitiveArray`].
17pub struct PrimitiveBuilder<T> {
18    values: BufferMut<T>,
19    nulls: LazyNullBufferBuilder,
20    dtype: DType,
21}
22
23impl<T: NativePType> PrimitiveBuilder<T> {
24    pub fn new(nullability: Nullability) -> Self {
25        Self::with_capacity(nullability, 1024) // Same as Arrow builders
26    }
27
28    pub fn with_capacity(nullability: Nullability, capacity: usize) -> Self {
29        Self {
30            values: BufferMut::with_capacity(capacity),
31            nulls: LazyNullBufferBuilder::new(capacity),
32            dtype: DType::Primitive(T::PTYPE, nullability),
33        }
34    }
35
36    /// Append a `Mask` to the null buffer.
37    pub fn append_mask(&mut self, mask: Mask) {
38        self.nulls.append_validity_mask(mask);
39    }
40
41    pub fn append_value(&mut self, value: T) {
42        self.values.push(value);
43        self.nulls.append(true);
44    }
45
46    pub fn append_option(&mut self, value: Option<T>) {
47        match value {
48            Some(value) => {
49                self.values.push(value);
50                self.nulls.append(true);
51            }
52            None => self.append_null(),
53        }
54    }
55
56    pub fn values(&self) -> &[T] {
57        self.values.as_ref()
58    }
59
60    /// Create a new handle to the next `len` uninitialized values in the builder.
61    ///
62    /// All reads/writes through the handle to the values buffer or the validity buffer will operate
63    /// on indices relative to the start of the range.
64    ///
65    ///
66    /// ## Example
67    ///
68    /// ```
69    /// use std::mem::MaybeUninit;
70    /// use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder};
71    /// use vortex_dtype::Nullability;
72    ///
73    /// // Create a new builder.
74    /// let mut builder: PrimitiveBuilder<i32> = PrimitiveBuilder::with_capacity(Nullability::NonNullable, 5);
75    ///
76    /// // Populate the values in reverse order.
77    /// let mut range = builder.uninit_range(5);
78    /// for i in [4, 3, 2, 1, 0] {
79    ///     range[i] = MaybeUninit::new(i as i32);
80    /// }
81    /// range.finish();
82    ///
83    /// let built = builder.finish_into_primitive();
84    ///
85    /// assert_eq!(built.as_slice::<i32>(), &[0i32, 1, 2, 3, 4]);
86    /// ```
87    pub fn uninit_range(&mut self, len: usize) -> UninitRange<T> {
88        let offset = self.values.len();
89        assert!(
90            offset + len <= self.values.capacity(),
91            "uninit_range of len {len} exceeds builder capacity {}",
92            self.values.capacity()
93        );
94
95        UninitRange {
96            offset,
97            len,
98            builder: self,
99        }
100    }
101
102    pub fn finish_into_primitive(&mut self) -> PrimitiveArray {
103        let nulls = self.nulls.finish();
104
105        if let Some(null_buf) = nulls.as_ref() {
106            assert_eq!(
107                null_buf.len(),
108                self.values.len(),
109                "null buffer length must equal value buffer length"
110            );
111        }
112
113        let validity = match (nulls, self.dtype().nullability()) {
114            (None, Nullability::NonNullable) => Validity::NonNullable,
115            (Some(_), Nullability::NonNullable) => {
116                vortex_panic!("Non-nullable builder has null values")
117            }
118            (None, Nullability::Nullable) => Validity::AllValid,
119            (Some(nulls), Nullability::Nullable) => {
120                if nulls.null_count() == nulls.len() {
121                    Validity::AllInvalid
122                } else {
123                    Validity::Array(BoolArray::from(nulls.into_inner()).into_array())
124                }
125            }
126        };
127
128        PrimitiveArray::new(std::mem::take(&mut self.values).freeze(), validity)
129    }
130
131    pub fn extend_with_iterator(&mut self, iter: impl IntoIterator<Item = T>, mask: Mask) {
132        self.values.extend(iter);
133        self.extend_with_validity_mask(mask)
134    }
135
136    fn extend_with_validity_mask(&mut self, validity_mask: Mask) {
137        self.nulls.append_validity_mask(validity_mask);
138    }
139}
140
141impl<T: NativePType> ArrayBuilder for PrimitiveBuilder<T> {
142    fn as_any(&self) -> &dyn Any {
143        self
144    }
145
146    fn as_any_mut(&mut self) -> &mut dyn Any {
147        self
148    }
149
150    fn dtype(&self) -> &DType {
151        &self.dtype
152    }
153
154    fn len(&self) -> usize {
155        self.values.len()
156    }
157
158    fn append_zeros(&mut self, n: usize) {
159        self.values.push_n(T::default(), n);
160        self.nulls.append_n_non_nulls(n);
161    }
162
163    fn append_nulls(&mut self, n: usize) {
164        self.values.push_n(T::default(), n);
165        self.nulls.append_n_nulls(n);
166    }
167
168    fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
169        let array = array.to_primitive()?;
170        if array.ptype() != T::PTYPE {
171            vortex_bail!("Cannot extend from array with different ptype");
172        }
173
174        self.values.extend_from_slice(array.as_slice::<T>());
175
176        self.extend_with_validity_mask(array.validity_mask()?);
177
178        Ok(())
179    }
180
181    fn ensure_capacity(&mut self, capacity: usize) {
182        if capacity > self.values.capacity() {
183            self.values.reserve(capacity - self.values.len());
184            self.nulls.ensure_capacity(capacity);
185        }
186    }
187
188    fn set_validity(&mut self, validity: Mask) {
189        self.nulls = LazyNullBufferBuilder::new(validity.len());
190        self.nulls.append_validity_mask(validity);
191    }
192
193    fn finish(&mut self) -> ArrayRef {
194        self.finish_into_primitive().into_array()
195    }
196}
197
198pub struct UninitRange<'a, T> {
199    offset: usize,
200    len: usize,
201    builder: &'a mut PrimitiveBuilder<T>,
202}
203
204impl<T> Deref for UninitRange<'_, T> {
205    type Target = [MaybeUninit<T>];
206
207    fn deref(&self) -> &[MaybeUninit<T>] {
208        let start = self.builder.values.as_ptr();
209        unsafe {
210            // SAFETY: start + len is checked on construction to be in range.
211            let dst = std::slice::from_raw_parts(start, self.len);
212
213            // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout
214            let dst: &[MaybeUninit<T>] = std::mem::transmute(dst);
215
216            dst
217        }
218    }
219}
220
221impl<T> DerefMut for UninitRange<'_, T> {
222    fn deref_mut(&mut self) -> &mut [MaybeUninit<T>] {
223        &mut self.builder.values.spare_capacity_mut()[..self.len]
224    }
225}
226
227impl<T> UninitRange<'_, T> {
228    /// Set a validity bit at the given index. The index is relative to the start of this range
229    /// of the builder.
230    pub fn set_bit(&mut self, index: usize, v: bool) {
231        self.builder.nulls.set_bit(self.offset + index, v);
232    }
233
234    /// Set values from an initialized range.
235    pub fn copy_from_init(&mut self, offset: usize, len: usize, src: &[T])
236    where
237        T: Copy,
238    {
239        // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout
240        let uninit_src: &[MaybeUninit<T>] = unsafe { std::mem::transmute(src) };
241
242        let dst = &mut self[offset..][..len];
243        dst.copy_from_slice(uninit_src);
244    }
245
246    /// Finish building this range, marking it as initialized and advancing the length of the
247    /// underlying values buffer.
248    pub fn finish(self) {
249        // SAFETY: constructor enforces that offset + len does not exceed the capacity of the array.
250        unsafe { self.builder.values.set_len(self.offset + self.len) };
251    }
252}