vortex_array/builders/
primitive.rs

1use std::any::Any;
2use std::mem::MaybeUninit;
3use std::ops::{Deref, DerefMut};
4
5use vortex_buffer::BufferMut;
6use vortex_dtype::{DType, NativePType, Nullability};
7use vortex_error::{VortexResult, vortex_bail, vortex_panic};
8use vortex_mask::Mask;
9
10use crate::arrays::{BoolArray, PrimitiveArray};
11use crate::builders::ArrayBuilder;
12use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
13use crate::validity::Validity;
14use crate::variants::PrimitiveArrayTrait;
15use crate::{Array, ArrayRef, ToCanonical};
16
17/// Builder for [`PrimitiveArray`].
18pub struct PrimitiveBuilder<T> {
19    values: BufferMut<T>,
20    nulls: LazyNullBufferBuilder,
21    dtype: DType,
22}
23
24impl<T: NativePType> PrimitiveBuilder<T> {
25    pub fn new(nullability: Nullability) -> Self {
26        Self::with_capacity(nullability, 1024) // Same as Arrow builders
27    }
28
29    pub fn with_capacity(nullability: Nullability, capacity: usize) -> Self {
30        Self {
31            values: BufferMut::with_capacity(capacity),
32            nulls: LazyNullBufferBuilder::new(capacity),
33            dtype: DType::Primitive(T::PTYPE, nullability),
34        }
35    }
36
37    /// Append a `Mask` to the null buffer.
38    pub fn append_mask(&mut self, mask: Mask) {
39        self.nulls.append_validity_mask(mask);
40    }
41
42    pub fn append_value(&mut self, value: T) {
43        self.values.push(value);
44        self.nulls.append(true);
45    }
46
47    pub fn append_option(&mut self, value: Option<T>) {
48        match value {
49            Some(value) => {
50                self.values.push(value);
51                self.nulls.append(true);
52            }
53            None => self.append_null(),
54        }
55    }
56
57    pub fn values(&self) -> &[T] {
58        self.values.as_ref()
59    }
60
61    /// Create a new handle to the next `len` uninitialized values in the builder.
62    ///
63    /// All reads/writes through the handle to the values buffer or the validity buffer will operate
64    /// on indices relative to the start of the range.
65    ///
66    ///
67    /// ## Example
68    ///
69    /// ```
70    /// use std::mem::MaybeUninit;
71    /// use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder};
72    /// use vortex_dtype::Nullability;
73    ///
74    /// // Create a new builder.
75    /// let mut builder: PrimitiveBuilder<i32> = PrimitiveBuilder::with_capacity(Nullability::NonNullable, 5);
76    ///
77    /// // Populate the values in reverse order.
78    /// let mut range = builder.uninit_range(5);
79    /// for i in [4, 3, 2, 1, 0] {
80    ///     range[i] = MaybeUninit::new(i as i32);
81    /// }
82    /// range.finish();
83    ///
84    /// let built = builder.finish_into_primitive();
85    ///
86    /// assert_eq!(built.as_slice::<i32>(), &[0i32, 1, 2, 3, 4]);
87    /// ```
88    pub fn uninit_range(&mut self, len: usize) -> UninitRange<T> {
89        let offset = self.values.len();
90        assert!(
91            offset + len <= self.values.capacity(),
92            "uninit_range of len {len} exceeds builder capacity {}",
93            self.values.capacity()
94        );
95
96        UninitRange {
97            offset,
98            len,
99            builder: self,
100        }
101    }
102
103    pub fn finish_into_primitive(&mut self) -> PrimitiveArray {
104        let nulls = self.nulls.finish();
105
106        if let Some(null_buf) = nulls.as_ref() {
107            assert_eq!(
108                null_buf.len(),
109                self.values.len(),
110                "null buffer length must equal value buffer length"
111            );
112        }
113
114        let validity = match (nulls, self.dtype().nullability()) {
115            (None, Nullability::NonNullable) => Validity::NonNullable,
116            (Some(_), Nullability::NonNullable) => {
117                vortex_panic!("Non-nullable builder has null values")
118            }
119            (None, Nullability::Nullable) => Validity::AllValid,
120            (Some(nulls), Nullability::Nullable) => {
121                if nulls.null_count() == nulls.len() {
122                    Validity::AllInvalid
123                } else {
124                    Validity::Array(BoolArray::from(nulls.into_inner()).into_array())
125                }
126            }
127        };
128
129        PrimitiveArray::new(std::mem::take(&mut self.values).freeze(), validity)
130    }
131
132    pub fn extend_with_iterator(&mut self, iter: impl IntoIterator<Item = T>, mask: Mask) {
133        self.values.extend(iter);
134        self.extend_with_validity_mask(mask)
135    }
136
137    fn extend_with_validity_mask(&mut self, validity_mask: Mask) {
138        self.nulls.append_validity_mask(validity_mask);
139    }
140}
141
142impl<T: NativePType> ArrayBuilder for PrimitiveBuilder<T> {
143    fn as_any(&self) -> &dyn Any {
144        self
145    }
146
147    fn as_any_mut(&mut self) -> &mut dyn Any {
148        self
149    }
150
151    fn dtype(&self) -> &DType {
152        &self.dtype
153    }
154
155    fn len(&self) -> usize {
156        self.values.len()
157    }
158
159    fn append_zeros(&mut self, n: usize) {
160        self.values.push_n(T::default(), n);
161        self.nulls.append_n_non_nulls(n);
162    }
163
164    fn append_nulls(&mut self, n: usize) {
165        self.values.push_n(T::default(), n);
166        self.nulls.append_n_nulls(n);
167    }
168
169    fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
170        let array = array.to_primitive()?;
171        if array.ptype() != T::PTYPE {
172            vortex_bail!("Cannot extend from array with different ptype");
173        }
174
175        self.values.extend_from_slice(array.as_slice::<T>());
176
177        self.extend_with_validity_mask(array.validity_mask()?);
178
179        Ok(())
180    }
181
182    fn ensure_capacity(&mut self, capacity: usize) {
183        if capacity > self.values.capacity() {
184            self.values.reserve(capacity - self.values.len());
185            self.nulls.ensure_capacity(capacity);
186        }
187    }
188
189    fn set_validity(&mut self, validity: Mask) {
190        self.nulls = LazyNullBufferBuilder::new(validity.len());
191        self.nulls.append_validity_mask(validity);
192    }
193
194    fn finish(&mut self) -> ArrayRef {
195        self.finish_into_primitive().into_array()
196    }
197}
198
199pub struct UninitRange<'a, T> {
200    offset: usize,
201    len: usize,
202    builder: &'a mut PrimitiveBuilder<T>,
203}
204
205impl<T> Deref for UninitRange<'_, T> {
206    type Target = [MaybeUninit<T>];
207
208    fn deref(&self) -> &[MaybeUninit<T>] {
209        let start = self.builder.values.as_ptr();
210        unsafe {
211            // SAFETY: start + len is checked on construction to be in range.
212            let dst = std::slice::from_raw_parts(start, self.len);
213
214            // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout
215            let dst: &[MaybeUninit<T>] = std::mem::transmute(dst);
216
217            dst
218        }
219    }
220}
221
222impl<T> DerefMut for UninitRange<'_, T> {
223    fn deref_mut(&mut self) -> &mut [MaybeUninit<T>] {
224        &mut self.builder.values.spare_capacity_mut()[..self.len]
225    }
226}
227
228impl<T> UninitRange<'_, T> {
229    /// Set a validity bit at the given index. The index is relative to the start of this range
230    /// of the builder.
231    pub fn set_bit(&mut self, index: usize, v: bool) {
232        self.builder.nulls.set_bit(self.offset + index, v);
233    }
234
235    /// Set values from an initialized range.
236    pub fn copy_from_init(&mut self, offset: usize, len: usize, src: &[T])
237    where
238        T: Copy,
239    {
240        // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout
241        let uninit_src: &[MaybeUninit<T>] = unsafe { std::mem::transmute(src) };
242
243        let dst = &mut self[offset..][..len];
244        dst.copy_from_slice(uninit_src);
245    }
246
247    /// Finish building this range, marking it as initialized and advancing the length of the
248    /// underlying values buffer.
249    pub fn finish(self) {
250        // SAFETY: constructor enforces that offset + len does not exceed the capacity of the array.
251        unsafe { self.builder.values.set_len(self.offset + self.len) };
252    }
253}