vortex_array/builders/
primitive.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::mem::MaybeUninit;
6use std::ops::{Deref, DerefMut};
7
8use vortex_buffer::BufferMut;
9use vortex_dtype::{DType, NativePType, Nullability};
10use vortex_error::{VortexResult, vortex_bail};
11use vortex_mask::Mask;
12
13use crate::arrays::PrimitiveArray;
14use crate::builders::ArrayBuilder;
15use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
16use crate::{Array, ArrayRef, IntoArray, ToCanonical};
17
18/// Builder for [`PrimitiveArray`].
19pub struct PrimitiveBuilder<T> {
20    values: BufferMut<T>,
21    nulls: LazyNullBufferBuilder,
22    dtype: DType,
23}
24
25impl<T: NativePType> PrimitiveBuilder<T> {
26    pub fn new(nullability: Nullability) -> Self {
27        Self::with_capacity(nullability, 1024) // Same as Arrow builders
28    }
29
30    pub fn with_capacity(nullability: Nullability, capacity: usize) -> Self {
31        Self {
32            values: BufferMut::with_capacity(capacity),
33            nulls: LazyNullBufferBuilder::new(capacity),
34            dtype: DType::Primitive(T::PTYPE, nullability),
35        }
36    }
37
38    /// Append a `Mask` to the null buffer.
39    pub fn append_mask(&mut self, mask: Mask) {
40        self.nulls.append_validity_mask(mask);
41    }
42
43    pub fn append_value(&mut self, value: T) {
44        self.values.push(value);
45        self.nulls.append(true);
46    }
47
48    pub fn append_option(&mut self, value: Option<T>) {
49        match value {
50            Some(value) => {
51                self.values.push(value);
52                self.nulls.append(true);
53            }
54            None => self.append_null(),
55        }
56    }
57
58    pub fn values(&self) -> &[T] {
59        self.values.as_ref()
60    }
61
62    /// Create a new handle to the next `len` uninitialized values in the builder.
63    ///
64    /// All reads/writes through the handle to the values buffer or the validity buffer will operate
65    /// on indices relative to the start of the range.
66    ///
67    ///
68    /// ## Example
69    ///
70    /// ```
71    /// use std::mem::MaybeUninit;
72    /// use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder};
73    /// use vortex_dtype::Nullability;
74    ///
75    /// // Create a new builder.
76    /// let mut builder: PrimitiveBuilder<i32> = PrimitiveBuilder::with_capacity(Nullability::NonNullable, 5);
77    ///
78    /// // Populate the values in reverse order.
79    /// let mut range = builder.uninit_range(5);
80    /// for i in [4, 3, 2, 1, 0] {
81    ///     range[i] = MaybeUninit::new(i as i32);
82    /// }
83    /// range.finish();
84    ///
85    /// let built = builder.finish_into_primitive();
86    ///
87    /// assert_eq!(built.as_slice::<i32>(), &[0i32, 1, 2, 3, 4]);
88    /// ```
89    pub fn uninit_range(&mut self, len: usize) -> UninitRange<'_, T> {
90        let offset = self.values.len();
91        assert!(
92            offset + len <= self.values.capacity(),
93            "uninit_range of len {len} exceeds builder capacity {}",
94            self.values.capacity()
95        );
96
97        UninitRange {
98            offset,
99            len,
100            builder: self,
101        }
102    }
103
104    pub fn finish_into_primitive(&mut self) -> PrimitiveArray {
105        let validity = self
106            .nulls
107            .finish_with_nullability(self.dtype().nullability());
108
109        PrimitiveArray::new(std::mem::take(&mut self.values).freeze(), validity)
110    }
111
112    pub fn extend_with_iterator(&mut self, iter: impl IntoIterator<Item = T>, mask: Mask) {
113        self.values.extend(iter);
114        self.extend_with_validity_mask(mask)
115    }
116
117    fn extend_with_validity_mask(&mut self, validity_mask: Mask) {
118        self.nulls.append_validity_mask(validity_mask);
119    }
120}
121
122impl<T: NativePType> ArrayBuilder for PrimitiveBuilder<T> {
123    fn as_any(&self) -> &dyn Any {
124        self
125    }
126
127    fn as_any_mut(&mut self) -> &mut dyn Any {
128        self
129    }
130
131    fn dtype(&self) -> &DType {
132        &self.dtype
133    }
134
135    fn len(&self) -> usize {
136        self.values.len()
137    }
138
139    fn append_zeros(&mut self, n: usize) {
140        self.values.push_n(T::default(), n);
141        self.nulls.append_n_non_nulls(n);
142    }
143
144    fn append_nulls(&mut self, n: usize) {
145        self.values.push_n(T::default(), n);
146        self.nulls.append_n_nulls(n);
147    }
148
149    fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
150        let array = array.to_primitive()?;
151        if array.ptype() != T::PTYPE {
152            vortex_bail!("Cannot extend from array with different ptype");
153        }
154
155        self.values.extend_from_slice(array.as_slice::<T>());
156
157        self.extend_with_validity_mask(array.validity_mask()?);
158
159        Ok(())
160    }
161
162    fn ensure_capacity(&mut self, capacity: usize) {
163        if capacity > self.values.capacity() {
164            self.values.reserve(capacity - self.values.len());
165            self.nulls.ensure_capacity(capacity);
166        }
167    }
168
169    fn set_validity(&mut self, validity: Mask) {
170        self.nulls = LazyNullBufferBuilder::new(validity.len());
171        self.nulls.append_validity_mask(validity);
172    }
173
174    fn finish(&mut self) -> ArrayRef {
175        self.finish_into_primitive().into_array()
176    }
177}
178
179pub struct UninitRange<'a, T> {
180    offset: usize,
181    len: usize,
182    builder: &'a mut PrimitiveBuilder<T>,
183}
184
185impl<T> Deref for UninitRange<'_, T> {
186    type Target = [MaybeUninit<T>];
187
188    fn deref(&self) -> &[MaybeUninit<T>] {
189        let start = self.builder.values.as_ptr();
190        unsafe {
191            // SAFETY: start + len is checked on construction to be in range.
192            let dst = std::slice::from_raw_parts(start, self.len);
193
194            // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout
195            let dst: &[MaybeUninit<T>] = std::mem::transmute(dst);
196
197            dst
198        }
199    }
200}
201
202impl<T> DerefMut for UninitRange<'_, T> {
203    fn deref_mut(&mut self) -> &mut [MaybeUninit<T>] {
204        &mut self.builder.values.spare_capacity_mut()[..self.len]
205    }
206}
207
208impl<T> UninitRange<'_, T> {
209    /// Set a validity bit at the given index. The index is relative to the start of this range
210    /// of the builder.
211    pub fn set_bit(&mut self, index: usize, v: bool) {
212        self.builder.nulls.set_bit(self.offset + index, v);
213    }
214
215    /// Set values from an initialized range.
216    pub fn copy_from_init(&mut self, offset: usize, len: usize, src: &[T])
217    where
218        T: Copy,
219    {
220        // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout
221        let uninit_src: &[MaybeUninit<T>] = unsafe { std::mem::transmute(src) };
222
223        let dst = &mut self[offset..][..len];
224        dst.copy_from_slice(uninit_src);
225    }
226
227    /// Finish building this range, marking it as initialized and advancing the length of the
228    /// underlying values buffer.
229    pub fn finish(self) {
230        // SAFETY: constructor enforces that offset + len does not exceed the capacity of the array.
231        unsafe { self.builder.values.set_len(self.offset + self.len) };
232    }
233}