vortex_array/builders/
primitive.rs

1use std::any::Any;
2use std::mem::MaybeUninit;
3use std::ops::{Deref, DerefMut};
4
5use vortex_buffer::BufferMut;
6use vortex_dtype::{DType, NativePType, Nullability};
7use vortex_error::{VortexResult, vortex_bail, vortex_panic};
8use vortex_mask::Mask;
9
10use crate::arrays::{BoolArray, PrimitiveArray};
11use crate::builders::ArrayBuilder;
12use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
13use crate::validity::Validity;
14use crate::variants::PrimitiveArrayTrait;
15use crate::{Array, ArrayRef};
16
17/// Builder for [`PrimitiveArray`].
18pub struct PrimitiveBuilder<T> {
19    values: BufferMut<T>,
20    nulls: LazyNullBufferBuilder,
21    dtype: DType,
22}
23
24impl<T: NativePType> PrimitiveBuilder<T> {
25    pub fn new(nullability: Nullability) -> Self {
26        Self::with_capacity(nullability, 1024) // Same as Arrow builders
27    }
28
29    pub fn with_capacity(nullability: Nullability, capacity: usize) -> Self {
30        Self {
31            values: BufferMut::with_capacity(capacity),
32            nulls: LazyNullBufferBuilder::new(capacity),
33            dtype: DType::Primitive(T::PTYPE, nullability),
34        }
35    }
36
37    /// Append a `Mask` to the null buffer.
38    pub fn append_mask(&mut self, mask: Mask) {
39        self.nulls.append_validity_mask(mask);
40    }
41
42    pub fn append_value(&mut self, value: T) {
43        self.values.push(value);
44        self.nulls.append(true);
45    }
46
47    pub fn append_option(&mut self, value: Option<T>) {
48        match value {
49            Some(value) => {
50                self.values.push(value);
51                self.nulls.append(true);
52            }
53            None => self.append_null(),
54        }
55    }
56
57    pub fn values(&self) -> &[T] {
58        self.values.as_ref()
59    }
60
61    /// Create a new handle to the next `len` uninitialized values in the builder.
62    ///
63    /// All reads/writes through the handle to the values buffer or the validity buffer will operate
64    /// on indices relative to the start of the range.
65    ///
66    ///
67    /// ## Example
68    ///
69    /// ```
70    /// use std::mem::MaybeUninit;
71    /// use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder};
72    /// use vortex_dtype::Nullability;
73    ///
74    /// // Create a new builder.
75    /// let mut builder: PrimitiveBuilder<i32> = PrimitiveBuilder::with_capacity(Nullability::NonNullable, 5);
76    ///
77    /// // Populate the values in reverse order.
78    /// let mut range = builder.uninit_range(5);
79    /// for i in [4, 3, 2, 1, 0] {
80    ///     range[i] = MaybeUninit::new(i as i32);
81    /// }
82    /// range.finish();
83    ///
84    /// let built = builder.finish_into_primitive();
85    ///
86    /// assert_eq!(built.as_slice::<i32>(), &[0i32, 1, 2, 3, 4]);
87    /// ```
88    pub fn uninit_range(&mut self, len: usize) -> UninitRange<T> {
89        let offset = self.values.len();
90        assert!(
91            offset + len <= self.values.capacity(),
92            "uninit_range of len {len} exceeds builder capacity"
93        );
94
95        UninitRange {
96            offset,
97            len,
98            builder: self,
99        }
100    }
101
102    pub fn finish_into_primitive(&mut self) -> PrimitiveArray {
103        let nulls = self.nulls.finish();
104
105        if let Some(null_buf) = nulls.as_ref() {
106            assert_eq!(
107                null_buf.len(),
108                self.values.len(),
109                "null buffer length must equal value buffer length"
110            );
111        }
112
113        let validity = match (nulls, self.dtype().nullability()) {
114            (None, Nullability::NonNullable) => Validity::NonNullable,
115            (Some(_), Nullability::NonNullable) => {
116                vortex_panic!("Non-nullable builder has null values")
117            }
118            (None, Nullability::Nullable) => Validity::AllValid,
119            (Some(nulls), Nullability::Nullable) => {
120                if nulls.null_count() == nulls.len() {
121                    Validity::AllInvalid
122                } else {
123                    Validity::Array(BoolArray::from(nulls.into_inner()).into_array())
124                }
125            }
126        };
127
128        PrimitiveArray::new(std::mem::take(&mut self.values).freeze(), validity)
129    }
130
131    pub fn extend_with_iterator(&mut self, iter: impl IntoIterator<Item = T>, mask: Mask) {
132        self.values.extend(iter);
133        self.extend_with_validity_mask(mask)
134    }
135
136    fn extend_with_validity_mask(&mut self, validity_mask: Mask) {
137        self.nulls.append_validity_mask(validity_mask);
138    }
139}
140
141impl<T: NativePType> ArrayBuilder for PrimitiveBuilder<T> {
142    fn as_any(&self) -> &dyn Any {
143        self
144    }
145
146    fn as_any_mut(&mut self) -> &mut dyn Any {
147        self
148    }
149
150    fn dtype(&self) -> &DType {
151        &self.dtype
152    }
153
154    fn len(&self) -> usize {
155        self.values.len()
156    }
157
158    fn append_zeros(&mut self, n: usize) {
159        self.values.push_n(T::default(), n);
160        self.nulls.append_n_non_nulls(n);
161    }
162
163    fn append_nulls(&mut self, n: usize) {
164        self.values.push_n(T::default(), n);
165        self.nulls.append_n_nulls(n);
166    }
167
168    fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
169        let array = array.to_canonical()?.into_primitive()?;
170        if array.ptype() != T::PTYPE {
171            vortex_bail!("Cannot extend from array with different ptype");
172        }
173
174        self.values.extend_from_slice(array.as_slice::<T>());
175
176        self.extend_with_validity_mask(array.validity_mask()?);
177
178        Ok(())
179    }
180
181    fn finish(&mut self) -> ArrayRef {
182        self.finish_into_primitive().into_array()
183    }
184}
185
186pub struct UninitRange<'a, T> {
187    offset: usize,
188    len: usize,
189    builder: &'a mut PrimitiveBuilder<T>,
190}
191
192impl<T> Deref for UninitRange<'_, T> {
193    type Target = [MaybeUninit<T>];
194
195    fn deref(&self) -> &[MaybeUninit<T>] {
196        let start = self.builder.values.as_ptr();
197        unsafe {
198            // SAFETY: start + len is checked on construction to be in range.
199            let dst = std::slice::from_raw_parts(start, self.len);
200
201            // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout
202            let dst: &[MaybeUninit<T>] = std::mem::transmute(dst);
203
204            dst
205        }
206    }
207}
208
209impl<T> DerefMut for UninitRange<'_, T> {
210    fn deref_mut(&mut self) -> &mut [MaybeUninit<T>] {
211        &mut self.builder.values.spare_capacity_mut()[..self.len]
212    }
213}
214
215impl<T> UninitRange<'_, T> {
216    /// Set a validity bit at the given index. The index is relative to the start of this range
217    /// of the builder.
218    pub fn set_bit(&mut self, index: usize, v: bool) {
219        self.builder.nulls.set_bit(self.offset + index, v);
220    }
221
222    /// Set values from an initialized range.
223    pub fn copy_from_init(&mut self, offset: usize, len: usize, src: &[T])
224    where
225        T: Copy,
226    {
227        // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout
228        let uninit_src: &[MaybeUninit<T>] = unsafe { std::mem::transmute(src) };
229
230        let dst = &mut self[offset..][..len];
231        dst.copy_from_slice(uninit_src);
232    }
233
234    /// Finish building this range, marking it as initialized and advancing the length of the
235    /// underlying values buffer.
236    pub fn finish(self) {
237        // SAFETY: constructor enforces that offset + len does not exceed the capacity of the array.
238        unsafe { self.builder.values.set_len(self.offset + self.len) };
239    }
240}