vortex_array/builders/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Builders for Vortex arrays.
5//!
6//! Every logical type in Vortex has a canonical (uncompressed) in-memory encoding. This module
7//! provides pre-allocated builders to construct new canonical arrays.
8//!
9//! ## Example:
10//!
11//! ```
12//! use vortex_array::builders::{builder_with_capacity, ArrayBuilder};
13//! use vortex_dtype::{DType, Nullability};
14//!
15//! // Create a new builder for string data.
16//! let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
17//!
18//! builder.append_scalar(&"a".into()).unwrap();
19//! builder.append_scalar(&"b".into()).unwrap();
20//! builder.append_scalar(&"c".into()).unwrap();
21//! builder.append_scalar(&"d".into()).unwrap();
22//!
23//! let strings = builder.finish();
24//!
25//! assert_eq!(strings.scalar_at(0), "a".into());
26//! assert_eq!(strings.scalar_at(1), "b".into());
27//! assert_eq!(strings.scalar_at(2), "c".into());
28//! assert_eq!(strings.scalar_at(3), "d".into());
29//! ```
30
31use std::any::Any;
32
33use vortex_dtype::{DType, match_each_native_ptype};
34use vortex_error::{VortexResult, vortex_panic};
35use vortex_mask::Mask;
36use vortex_scalar::{Scalar, match_each_decimal_value_type};
37
38use crate::arrays::smallest_decimal_value_type;
39use crate::canonical::Canonical;
40use crate::{Array, ArrayRef};
41
42mod lazy_null_builder;
43use lazy_null_builder::LazyNullBufferBuilder;
44
45mod bool;
46mod decimal;
47mod extension;
48mod fixed_size_list;
49mod list;
50mod listview;
51mod null;
52mod primitive;
53mod struct_;
54mod varbinview;
55
56pub use bool::*;
57pub use decimal::*;
58pub use extension::*;
59pub use fixed_size_list::*;
60pub use list::*;
61pub use listview::*;
62pub use null::*;
63pub use primitive::*;
64pub use struct_::*;
65pub use varbinview::*;
66
67#[cfg(test)]
68mod tests;
69
70/// The default capacity for builders.
71///
72/// This is equal to the default capacity for Arrow Arrays.
73pub const DEFAULT_BUILDER_CAPACITY: usize = 1024;
74
75pub trait ArrayBuilder: Send {
76    fn as_any(&self) -> &dyn Any;
77
78    fn as_any_mut(&mut self) -> &mut dyn Any;
79
80    fn dtype(&self) -> &DType;
81
82    fn len(&self) -> usize;
83
84    fn is_empty(&self) -> bool {
85        self.len() == 0
86    }
87
88    /// Append a "zero" value to the array.
89    ///
90    /// Zero values are generally determined by [`Scalar::default_value`].
91    fn append_zero(&mut self) {
92        self.append_zeros(1)
93    }
94
95    /// Appends n "zero" values to the array.
96    ///
97    /// Zero values are generally determined by [`Scalar::default_value`].
98    fn append_zeros(&mut self, n: usize);
99
100    /// Append a "null" value to the array.
101    ///
102    /// Implementors should panic if this method is called on a non-nullable [`ArrayBuilder`].
103    fn append_null(&mut self) {
104        self.append_nulls(1)
105    }
106
107    /// The inner part of `append_nulls`.
108    ///
109    /// # Safety
110    ///
111    /// The array builder must be nullable.
112    unsafe fn append_nulls_unchecked(&mut self, n: usize);
113
114    /// Appends n "null" values to the array.
115    ///
116    /// Implementors should panic if this method is called on a non-nullable [`ArrayBuilder`].
117    fn append_nulls(&mut self, n: usize) {
118        assert!(
119            self.dtype().is_nullable(),
120            "tried to append {n} nulls to a non-nullable array builder"
121        );
122
123        // SAFETY: We check above that the array builder is nullable.
124        unsafe {
125            self.append_nulls_unchecked(n);
126        }
127    }
128
129    /// Appends a default value to the array.
130    fn append_default(&mut self) {
131        self.append_defaults(1)
132    }
133
134    /// Appends n default values to the array.
135    ///
136    /// If the array builder is nullable, then this has the behavior of `self.append_nulls(n)`.
137    /// If the array builder is non-nullable, then it has the behavior of `self.append_zeros(n)`.
138    fn append_defaults(&mut self, n: usize) {
139        if self.dtype().is_nullable() {
140            self.append_nulls(n);
141        } else {
142            self.append_zeros(n);
143        }
144    }
145
146    /// A generic function to append a scalar to the builder.
147    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()>;
148
149    /// The inner part of `extend_from_array`.
150    ///
151    /// # Safety
152    ///
153    /// The array that must have an equal [`DType`] to the array builder's `DType` (with nullability
154    /// superset semantics).
155    unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array);
156
157    /// Extends the array with the provided array, canonicalizing if necessary.
158    ///
159    /// Implementors must validate that the passed in [`Array`] has the correct [`DType`].
160    fn extend_from_array(&mut self, array: &dyn Array) {
161        if !self.dtype().eq_with_nullability_superset(array.dtype()) {
162            vortex_panic!(
163                "tried to extend a builder with `DType` {} with an array with `DType {}",
164                self.dtype(),
165                array.dtype()
166            );
167        }
168
169        // SAFETY: We checked that the array had a valid `DType` above.
170        unsafe { self.extend_from_array_unchecked(array) }
171    }
172
173    /// Allocate space for extra `additional` items
174    fn reserve_exact(&mut self, additional: usize);
175
176    /// Override builders validity with the one provided.
177    ///
178    /// Note that this will have no effect on the final array if the array builder is non-nullable.
179    fn set_validity(&mut self, validity: Mask) {
180        if !self.dtype().is_nullable() {
181            return;
182        }
183        assert_eq!(self.len(), validity.len());
184        unsafe { self.set_validity_unchecked(validity) }
185    }
186
187    /// override validity with the one provided, without checking lengths
188    ///
189    /// # Safety
190    ///
191    /// Given validity must have an equal length to [`self.len()`].
192    unsafe fn set_validity_unchecked(&mut self, validity: Mask);
193
194    /// Constructs an Array from the builder components.
195    ///
196    /// # Panics
197    ///
198    /// This function may panic if the builder's methods are called with invalid arguments. If only
199    /// the methods on this interface are used, the builder should not panic. However, specific
200    /// builders have interfaces that may be misused. For example, if the number of values in a
201    /// [PrimitiveBuilder]'s [vortex_buffer::BufferMut] does not match the number of validity bits,
202    /// the PrimitiveBuilder's [Self::finish] will panic.
203    fn finish(&mut self) -> ArrayRef;
204
205    /// Constructs a canonical array directly from the builder.
206    ///
207    /// This method provides a default implementation that creates an [`ArrayRef`] via `finish` and
208    /// then converts it to canonical form. Specific builders can override this with optimized
209    /// implementations that avoid the intermediate [`Array`] creation.
210    fn finish_into_canonical(&mut self) -> Canonical {
211        self.finish().to_canonical()
212    }
213}
214
215/// Construct a new canonical builder for the given [`DType`].
216///
217///
218/// # Example
219///
220/// ```
221/// use vortex_array::builders::{builder_with_capacity, ArrayBuilder};
222/// use vortex_dtype::{DType, Nullability};
223///
224/// // Create a new builder for string data.
225/// let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
226///
227/// builder.append_scalar(&"a".into()).unwrap();
228/// builder.append_scalar(&"b".into()).unwrap();
229/// builder.append_scalar(&"c".into()).unwrap();
230/// builder.append_scalar(&"d".into()).unwrap();
231///
232/// let strings = builder.finish();
233///
234/// assert_eq!(strings.scalar_at(0), "a".into());
235/// assert_eq!(strings.scalar_at(1), "b".into());
236/// assert_eq!(strings.scalar_at(2), "c".into());
237/// assert_eq!(strings.scalar_at(3), "d".into());
238/// ```
239pub fn builder_with_capacity(dtype: &DType, capacity: usize) -> Box<dyn ArrayBuilder> {
240    match dtype {
241        DType::Null => Box::new(NullBuilder::new()),
242        DType::Bool(n) => Box::new(BoolBuilder::with_capacity(*n, capacity)),
243        DType::Primitive(ptype, n) => {
244            match_each_native_ptype!(ptype, |P| {
245                Box::new(PrimitiveBuilder::<P>::with_capacity(*n, capacity))
246            })
247        }
248        DType::Decimal(decimal_type, n) => {
249            match_each_decimal_value_type!(smallest_decimal_value_type(decimal_type), |D| {
250                Box::new(DecimalBuilder::with_capacity::<D>(
251                    capacity,
252                    *decimal_type,
253                    *n,
254                ))
255            })
256        }
257        DType::Utf8(n) => Box::new(VarBinViewBuilder::with_capacity(DType::Utf8(*n), capacity)),
258        DType::Binary(n) => Box::new(VarBinViewBuilder::with_capacity(
259            DType::Binary(*n),
260            capacity,
261        )),
262        DType::Struct(struct_dtype, n) => Box::new(StructBuilder::with_capacity(
263            struct_dtype.clone(),
264            *n,
265            capacity,
266        )),
267        DType::List(dtype, n) => Box::new(ListViewBuilder::<u64, u64>::with_capacity(
268            dtype.clone(),
269            *n,
270            2 * capacity, // Arbitrarily choose 2 times the `offsets` capacity here.
271            capacity,
272        )),
273        DType::FixedSizeList(elem_dtype, list_size, null) => Box::new(
274            FixedSizeListBuilder::with_capacity(elem_dtype.clone(), *list_size, *null, capacity),
275        ),
276        DType::Extension(ext_dtype) => {
277            Box::new(ExtensionBuilder::with_capacity(ext_dtype.clone(), capacity))
278        }
279    }
280}