vortex_array/builders/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4// TODO(connor): Make the methods on all builders consistent (`_opt` methods).
5
6//! Builders for Vortex arrays.
7//!
8//! Every logical type in Vortex has a canonical (uncompressed) in-memory encoding. This module
9//! provides pre-allocated builders to construct new canonical arrays.
10//!
11//! ## Example:
12//!
13//! ```
14//! use vortex_array::builders::{builder_with_capacity, ArrayBuilder};
15//! use vortex_dtype::{DType, Nullability};
16//!
17//! // Create a new builder for string data.
18//! let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
19//!
20//! builder.append_scalar(&"a".into()).unwrap();
21//! builder.append_scalar(&"b".into()).unwrap();
22//! builder.append_scalar(&"c".into()).unwrap();
23//! builder.append_scalar(&"d".into()).unwrap();
24//!
25//! let strings = builder.finish();
26//!
27//! assert_eq!(strings.scalar_at(0), "a".into());
28//! assert_eq!(strings.scalar_at(1), "b".into());
29//! assert_eq!(strings.scalar_at(2), "c".into());
30//! assert_eq!(strings.scalar_at(3), "d".into());
31//! ```
32
33use std::any::Any;
34
35use vortex_dtype::{DType, match_each_native_ptype};
36use vortex_error::{VortexResult, vortex_bail, vortex_err, vortex_panic};
37use vortex_mask::Mask;
38use vortex_scalar::{
39    BinaryScalar, BoolScalar, DecimalValue, ExtScalar, ListScalar, PrimitiveScalar, Scalar,
40    StructScalar, Utf8Scalar, match_each_decimal_value, match_each_decimal_value_type,
41};
42
43use crate::arrays::smallest_storage_type;
44use crate::canonical::Canonical;
45use crate::{Array, ArrayRef};
46
47mod lazy_null_builder;
48use lazy_null_builder::LazyNullBufferBuilder;
49
50mod bool;
51mod decimal;
52mod extension;
53mod fixed_size_list;
54mod list;
55mod null;
56mod primitive;
57mod struct_;
58mod varbinview;
59
60pub use bool::*;
61pub use decimal::*;
62pub use extension::*;
63pub use fixed_size_list::*;
64pub use list::*;
65pub use null::*;
66pub use primitive::*;
67pub use struct_::*;
68pub use varbinview::*;
69
70#[cfg(test)]
71mod tests;
72
73/// The default capacity for builders.
74///
75/// This is equal to the default capacity for Arrow Arrays.
76pub const DEFAULT_BUILDER_CAPACITY: usize = 1024;
77
78pub trait ArrayBuilder: Send {
79    fn as_any(&self) -> &dyn Any;
80
81    fn as_any_mut(&mut self) -> &mut dyn Any;
82
83    fn dtype(&self) -> &DType;
84
85    fn len(&self) -> usize;
86
87    fn is_empty(&self) -> bool {
88        self.len() == 0
89    }
90
91    /// Append a "zero" value to the array.
92    ///
93    /// Zero values are generally determined by [`Scalar::default_value`].
94    fn append_zero(&mut self) {
95        self.append_zeros(1)
96    }
97
98    /// Appends n "zero" values to the array.
99    ///
100    /// Zero values are generally determined by [`Scalar::default_value`].
101    fn append_zeros(&mut self, n: usize);
102
103    /// Append a "null" value to the array.
104    ///
105    /// Implementors should panic if this method is called on a non-nullable [`ArrayBuilder`].
106    fn append_null(&mut self) {
107        self.append_nulls(1)
108    }
109
110    /// The inner part of `append_nulls`.
111    ///
112    /// # Safety
113    ///
114    /// The array builder must be nullable.
115    unsafe fn append_nulls_unchecked(&mut self, n: usize);
116
117    /// Appends n "null" values to the array.
118    ///
119    /// Implementors should panic if this method is called on a non-nullable [`ArrayBuilder`].
120    fn append_nulls(&mut self, n: usize) {
121        assert!(
122            self.dtype().is_nullable(),
123            "tried to append {n} nulls to a non-nullable array builder"
124        );
125
126        // SAFETY: We check above that the array builder is nullable.
127        unsafe {
128            self.append_nulls_unchecked(n);
129        }
130    }
131
132    /// Appends a default value to the array.
133    fn append_default(&mut self) {
134        self.append_defaults(1)
135    }
136
137    /// Appends n default values to the array.
138    ///
139    /// If the array builder is nullable, then this has the behavior of `self.append_nulls(n)`.
140    /// If the array builder is non-nullable, then it has the behavior of `self.append_zeros(n)`.
141    fn append_defaults(&mut self, n: usize) {
142        if self.dtype().is_nullable() {
143            self.append_nulls(n);
144        } else {
145            self.append_zeros(n);
146        }
147    }
148
149    /// A generic function to append a scalar to the builder.
150    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
151        if scalar.dtype() != self.dtype() {
152            vortex_bail!(
153                "Builder has dtype {:?}, scalar has {:?}",
154                self.dtype(),
155                scalar.dtype()
156            )
157        }
158
159        match scalar.dtype() {
160            DType::Null => self
161                .as_any_mut()
162                .downcast_mut::<NullBuilder>()
163                .ok_or_else(|| vortex_err!("Cannot append null scalar to non-null builder"))?
164                .append_null(),
165            DType::Bool(_) => self
166                .as_any_mut()
167                .downcast_mut::<BoolBuilder>()
168                .ok_or_else(|| vortex_err!("Cannot append bool scalar to non-bool builder"))?
169                .append_option(BoolScalar::try_from(scalar)?.value()),
170            DType::Primitive(ptype, ..) => {
171                match_each_native_ptype!(ptype, |P| {
172                    self.as_any_mut()
173                        .downcast_mut::<PrimitiveBuilder<P>>()
174                        .ok_or_else(|| {
175                            vortex_err!("Cannot append primitive scalar to non-primitive builder")
176                        })?
177                        .append_option(PrimitiveScalar::try_from(scalar)?.typed_value::<P>())
178                })
179            }
180            DType::Decimal(..) => {
181                let builder = self
182                    .as_any_mut()
183                    .downcast_mut::<DecimalBuilder>()
184                    .ok_or_else(|| {
185                        vortex_err!("Cannot append decimal scalar to non-decimal builder")
186                    })?;
187                match scalar.as_decimal().decimal_value() {
188                    None => builder.append_null(),
189                    Some(v) => match_each_decimal_value!(v, |dec_val| {
190                        builder.append_value(dec_val);
191                    }),
192                }
193            }
194            DType::Utf8(_) => self
195                .as_any_mut()
196                .downcast_mut::<VarBinViewBuilder>()
197                .ok_or_else(|| vortex_err!("Cannot append utf8 scalar to non-utf8 builder"))?
198                .append_option(Utf8Scalar::try_from(scalar)?.value()),
199            DType::Binary(_) => self
200                .as_any_mut()
201                .downcast_mut::<VarBinViewBuilder>()
202                .ok_or_else(|| vortex_err!("Cannot append binary scalar to non-binary builder"))?
203                .append_option(BinaryScalar::try_from(scalar)?.value()),
204            DType::Struct(..) => self
205                .as_any_mut()
206                .downcast_mut::<StructBuilder>()
207                .ok_or_else(|| vortex_err!("Cannot append struct scalar to non-struct builder"))?
208                .append_value(StructScalar::try_from(scalar)?)?,
209            DType::List(..) => self
210                .as_any_mut()
211                .downcast_mut::<ListBuilder<u64>>()
212                .ok_or_else(|| vortex_err!("Cannot append list scalar to non-list builder"))?
213                .append_value(ListScalar::try_from(scalar)?)?,
214            DType::FixedSizeList(..) => self
215                .as_any_mut()
216                .downcast_mut::<FixedSizeListBuilder>()
217                .ok_or_else(|| vortex_err!("Cannot append list scalar to non-list builder"))?
218                .append_value(ListScalar::try_from(scalar)?)?,
219            DType::Extension(..) => self
220                .as_any_mut()
221                .downcast_mut::<ExtensionBuilder>()
222                .ok_or_else(|| {
223                    vortex_err!("Cannot append extension scalar to non-extension builder")
224                })?
225                .append_value(ExtScalar::try_from(scalar)?)?,
226        }
227        Ok(())
228    }
229
230    /// The inner part of `extend_from_array`.
231    ///
232    /// # Safety
233    ///
234    /// The array that must have an equal [`DType`] to the array builder's `DType` (with nullability
235    /// superset semantics).
236    unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array);
237
238    /// Extends the array with the provided array, canonicalizing if necessary.
239    ///
240    /// Implementors must validate that the passed in [`Array`] has the correct [`DType`].
241    fn extend_from_array(&mut self, array: &dyn Array) {
242        if !self.dtype().eq_with_nullability_superset(array.dtype()) {
243            vortex_panic!(
244                "tried to extend a builder with `DType` {} with an array with `DType {}",
245                self.dtype(),
246                array.dtype()
247            );
248        }
249
250        // SAFETY: We checked that the array had a valid `DType` above.
251        unsafe { self.extend_from_array_unchecked(array) }
252    }
253
254    /// Ensure that the builder can hold at least `capacity` number of items
255    fn ensure_capacity(&mut self, capacity: usize);
256
257    /// Override builders validity with the one provided.
258    ///
259    /// Note that this will have no effect on the final array if the array builder is non-nullable.
260    fn set_validity(&mut self, validity: Mask);
261
262    /// Constructs an Array from the builder components.
263    ///
264    /// # Panics
265    ///
266    /// This function may panic if the builder's methods are called with invalid arguments. If only
267    /// the methods on this interface are used, the builder should not panic. However, specific
268    /// builders have interfaces that may be misused. For example, if the number of values in a
269    /// [PrimitiveBuilder]'s [vortex_buffer::BufferMut] does not match the number of validity bits,
270    /// the PrimitiveBuilder's [Self::finish] will panic.
271    fn finish(&mut self) -> ArrayRef;
272
273    /// Constructs a canonical array directly from the builder.
274    ///
275    /// This method provides a default implementation that creates an [`ArrayRef`] via `finish` and
276    /// then converts it to canonical form. Specific builders can override this with optimized
277    /// implementations that avoid the intermediate [`Array`] creation.
278    fn finish_into_canonical(&mut self) -> Canonical {
279        self.finish().to_canonical()
280    }
281}
282
283/// Construct a new canonical builder for the given [`DType`].
284///
285///
286/// # Example
287///
288/// ```
289/// use vortex_array::builders::{builder_with_capacity, ArrayBuilder};
290/// use vortex_dtype::{DType, Nullability};
291///
292/// // Create a new builder for string data.
293/// let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
294///
295/// builder.append_scalar(&"a".into()).unwrap();
296/// builder.append_scalar(&"b".into()).unwrap();
297/// builder.append_scalar(&"c".into()).unwrap();
298/// builder.append_scalar(&"d".into()).unwrap();
299///
300/// let strings = builder.finish();
301///
302/// assert_eq!(strings.scalar_at(0), "a".into());
303/// assert_eq!(strings.scalar_at(1), "b".into());
304/// assert_eq!(strings.scalar_at(2), "c".into());
305/// assert_eq!(strings.scalar_at(3), "d".into());
306/// ```
307pub fn builder_with_capacity(dtype: &DType, capacity: usize) -> Box<dyn ArrayBuilder> {
308    match dtype {
309        DType::Null => Box::new(NullBuilder::new()),
310        DType::Bool(n) => Box::new(BoolBuilder::with_capacity(*n, capacity)),
311        DType::Primitive(ptype, n) => {
312            match_each_native_ptype!(ptype, |P| {
313                Box::new(PrimitiveBuilder::<P>::with_capacity(*n, capacity))
314            })
315        }
316        DType::Decimal(decimal_type, n) => {
317            match_each_decimal_value_type!(smallest_storage_type(decimal_type), |D| {
318                Box::new(DecimalBuilder::with_capacity::<D>(
319                    capacity,
320                    *decimal_type,
321                    *n,
322                ))
323            })
324        }
325        DType::Utf8(n) => Box::new(VarBinViewBuilder::with_capacity(DType::Utf8(*n), capacity)),
326        DType::Binary(n) => Box::new(VarBinViewBuilder::with_capacity(
327            DType::Binary(*n),
328            capacity,
329        )),
330        DType::Struct(struct_dtype, n) => Box::new(StructBuilder::with_capacity(
331            struct_dtype.clone(),
332            *n,
333            capacity,
334        )),
335        DType::List(dtype, n) => Box::new(ListBuilder::<u64>::with_capacity(
336            dtype.clone(),
337            *n,
338            capacity,
339        )),
340        DType::FixedSizeList(elem_dtype, list_size, null) => Box::new(
341            FixedSizeListBuilder::with_capacity(elem_dtype.clone(), *list_size, *null, capacity),
342        ),
343        DType::Extension(ext_dtype) => {
344            Box::new(ExtensionBuilder::with_capacity(ext_dtype.clone(), capacity))
345        }
346    }
347}