vortex_array/builders/
primitive.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::mem::MaybeUninit;
6use std::ops::{Deref, DerefMut};
7
8use vortex_buffer::BufferMut;
9use vortex_dtype::{DType, NativePType, Nullability};
10use vortex_error::{VortexResult, vortex_bail, vortex_panic};
11use vortex_mask::Mask;
12
13use crate::arrays::{BoolArray, PrimitiveArray};
14use crate::builders::ArrayBuilder;
15use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
16use crate::validity::Validity;
17use crate::{Array, ArrayRef, IntoArray, ToCanonical};
18
19/// Builder for [`PrimitiveArray`].
20pub struct PrimitiveBuilder<T> {
21    values: BufferMut<T>,
22    nulls: LazyNullBufferBuilder,
23    dtype: DType,
24}
25
26impl<T: NativePType> PrimitiveBuilder<T> {
27    pub fn new(nullability: Nullability) -> Self {
28        Self::with_capacity(nullability, 1024) // Same as Arrow builders
29    }
30
31    pub fn with_capacity(nullability: Nullability, capacity: usize) -> Self {
32        Self {
33            values: BufferMut::with_capacity(capacity),
34            nulls: LazyNullBufferBuilder::new(capacity),
35            dtype: DType::Primitive(T::PTYPE, nullability),
36        }
37    }
38
39    /// Append a `Mask` to the null buffer.
40    pub fn append_mask(&mut self, mask: Mask) {
41        self.nulls.append_validity_mask(mask);
42    }
43
44    pub fn append_value(&mut self, value: T) {
45        self.values.push(value);
46        self.nulls.append(true);
47    }
48
49    pub fn append_option(&mut self, value: Option<T>) {
50        match value {
51            Some(value) => {
52                self.values.push(value);
53                self.nulls.append(true);
54            }
55            None => self.append_null(),
56        }
57    }
58
59    pub fn values(&self) -> &[T] {
60        self.values.as_ref()
61    }
62
63    /// Create a new handle to the next `len` uninitialized values in the builder.
64    ///
65    /// All reads/writes through the handle to the values buffer or the validity buffer will operate
66    /// on indices relative to the start of the range.
67    ///
68    ///
69    /// ## Example
70    ///
71    /// ```
72    /// use std::mem::MaybeUninit;
73    /// use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder};
74    /// use vortex_dtype::Nullability;
75    ///
76    /// // Create a new builder.
77    /// let mut builder: PrimitiveBuilder<i32> = PrimitiveBuilder::with_capacity(Nullability::NonNullable, 5);
78    ///
79    /// // Populate the values in reverse order.
80    /// let mut range = builder.uninit_range(5);
81    /// for i in [4, 3, 2, 1, 0] {
82    ///     range[i] = MaybeUninit::new(i as i32);
83    /// }
84    /// range.finish();
85    ///
86    /// let built = builder.finish_into_primitive();
87    ///
88    /// assert_eq!(built.as_slice::<i32>(), &[0i32, 1, 2, 3, 4]);
89    /// ```
90    pub fn uninit_range(&mut self, len: usize) -> UninitRange<'_, T> {
91        let offset = self.values.len();
92        assert!(
93            offset + len <= self.values.capacity(),
94            "uninit_range of len {len} exceeds builder capacity {}",
95            self.values.capacity()
96        );
97
98        UninitRange {
99            offset,
100            len,
101            builder: self,
102        }
103    }
104
105    pub fn finish_into_primitive(&mut self) -> PrimitiveArray {
106        let nulls = self.nulls.finish();
107
108        if let Some(null_buf) = nulls.as_ref() {
109            assert_eq!(
110                null_buf.len(),
111                self.values.len(),
112                "null buffer length must equal value buffer length"
113            );
114        }
115
116        let validity = match (nulls, self.dtype().nullability()) {
117            (None, Nullability::NonNullable) => Validity::NonNullable,
118            (Some(_), Nullability::NonNullable) => {
119                vortex_panic!("Non-nullable builder has null values")
120            }
121            (None, Nullability::Nullable) => Validity::AllValid,
122            (Some(nulls), Nullability::Nullable) => {
123                if nulls.null_count() == nulls.len() {
124                    Validity::AllInvalid
125                } else {
126                    Validity::Array(BoolArray::from(nulls.into_inner()).into_array())
127                }
128            }
129        };
130
131        PrimitiveArray::new(std::mem::take(&mut self.values).freeze(), validity)
132    }
133
134    pub fn extend_with_iterator(&mut self, iter: impl IntoIterator<Item = T>, mask: Mask) {
135        self.values.extend(iter);
136        self.extend_with_validity_mask(mask)
137    }
138
139    fn extend_with_validity_mask(&mut self, validity_mask: Mask) {
140        self.nulls.append_validity_mask(validity_mask);
141    }
142}
143
144impl<T: NativePType> ArrayBuilder for PrimitiveBuilder<T> {
145    fn as_any(&self) -> &dyn Any {
146        self
147    }
148
149    fn as_any_mut(&mut self) -> &mut dyn Any {
150        self
151    }
152
153    fn dtype(&self) -> &DType {
154        &self.dtype
155    }
156
157    fn len(&self) -> usize {
158        self.values.len()
159    }
160
161    fn append_zeros(&mut self, n: usize) {
162        self.values.push_n(T::default(), n);
163        self.nulls.append_n_non_nulls(n);
164    }
165
166    fn append_nulls(&mut self, n: usize) {
167        self.values.push_n(T::default(), n);
168        self.nulls.append_n_nulls(n);
169    }
170
171    fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
172        let array = array.to_primitive()?;
173        if array.ptype() != T::PTYPE {
174            vortex_bail!("Cannot extend from array with different ptype");
175        }
176
177        self.values.extend_from_slice(array.as_slice::<T>());
178
179        self.extend_with_validity_mask(array.validity_mask()?);
180
181        Ok(())
182    }
183
184    fn ensure_capacity(&mut self, capacity: usize) {
185        if capacity > self.values.capacity() {
186            self.values.reserve(capacity - self.values.len());
187            self.nulls.ensure_capacity(capacity);
188        }
189    }
190
191    fn set_validity(&mut self, validity: Mask) {
192        self.nulls = LazyNullBufferBuilder::new(validity.len());
193        self.nulls.append_validity_mask(validity);
194    }
195
196    fn finish(&mut self) -> ArrayRef {
197        self.finish_into_primitive().into_array()
198    }
199}
200
201pub struct UninitRange<'a, T> {
202    offset: usize,
203    len: usize,
204    builder: &'a mut PrimitiveBuilder<T>,
205}
206
207impl<T> Deref for UninitRange<'_, T> {
208    type Target = [MaybeUninit<T>];
209
210    fn deref(&self) -> &[MaybeUninit<T>] {
211        let start = self.builder.values.as_ptr();
212        unsafe {
213            // SAFETY: start + len is checked on construction to be in range.
214            let dst = std::slice::from_raw_parts(start, self.len);
215
216            // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout
217            let dst: &[MaybeUninit<T>] = std::mem::transmute(dst);
218
219            dst
220        }
221    }
222}
223
224impl<T> DerefMut for UninitRange<'_, T> {
225    fn deref_mut(&mut self) -> &mut [MaybeUninit<T>] {
226        &mut self.builder.values.spare_capacity_mut()[..self.len]
227    }
228}
229
230impl<T> UninitRange<'_, T> {
231    /// Set a validity bit at the given index. The index is relative to the start of this range
232    /// of the builder.
233    pub fn set_bit(&mut self, index: usize, v: bool) {
234        self.builder.nulls.set_bit(self.offset + index, v);
235    }
236
237    /// Set values from an initialized range.
238    pub fn copy_from_init(&mut self, offset: usize, len: usize, src: &[T])
239    where
240        T: Copy,
241    {
242        // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout
243        let uninit_src: &[MaybeUninit<T>] = unsafe { std::mem::transmute(src) };
244
245        let dst = &mut self[offset..][..len];
246        dst.copy_from_slice(uninit_src);
247    }
248
249    /// Finish building this range, marking it as initialized and advancing the length of the
250    /// underlying values buffer.
251    pub fn finish(self) {
252        // SAFETY: constructor enforces that offset + len does not exceed the capacity of the array.
253        unsafe { self.builder.values.set_len(self.offset + self.len) };
254    }
255}