vortex_array/builders/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Builders for Vortex arrays.
5//!
6//! Every logical type in Vortex has a canonical (uncompressed) in-memory encoding. This module
7//! provides pre-allocated builders to construct new canonical arrays.
8//!
9//! ## Example:
10//!
11//! ```
12//! use vortex_array::builders::{builder_with_capacity, ArrayBuilder};
13//! use vortex_dtype::{DType, Nullability};
14//!
15//! // Create a new builder for string data.
16//! let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
17//!
18//! builder.append_scalar(&"a".into()).unwrap();
19//! builder.append_scalar(&"b".into()).unwrap();
20//! builder.append_scalar(&"c".into()).unwrap();
21//! builder.append_scalar(&"d".into()).unwrap();
22//!
23//! let strings = builder.finish();
24//!
25//! assert_eq!(strings.scalar_at(0), "a".into());
26//! assert_eq!(strings.scalar_at(1), "b".into());
27//! assert_eq!(strings.scalar_at(2), "c".into());
28//! assert_eq!(strings.scalar_at(3), "d".into());
29//! ```
30
31use std::any::Any;
32
33use vortex_dtype::DType;
34use vortex_dtype::match_each_decimal_value_type;
35use vortex_dtype::match_each_native_ptype;
36use vortex_error::VortexResult;
37use vortex_error::vortex_panic;
38use vortex_mask::Mask;
39use vortex_scalar::Scalar;
40
41use crate::Array;
42use crate::ArrayRef;
43use crate::canonical::Canonical;
44
45mod lazy_null_builder;
46pub(crate) use lazy_null_builder::LazyBitBufferBuilder;
47
48mod bool;
49mod decimal;
50pub mod dict;
51mod extension;
52mod fixed_size_list;
53mod list;
54mod listview;
55mod null;
56mod primitive;
57mod struct_;
58mod varbinview;
59
60pub use bool::*;
61pub use decimal::*;
62pub use extension::*;
63pub use fixed_size_list::*;
64pub use list::*;
65pub use listview::*;
66pub use null::*;
67pub use primitive::*;
68pub use struct_::*;
69pub use varbinview::*;
70
71#[cfg(test)]
72mod tests;
73
74/// The default capacity for builders.
75///
76/// This is equal to the default capacity for Arrow Arrays.
77pub const DEFAULT_BUILDER_CAPACITY: usize = 1024;
78
79pub trait ArrayBuilder: Send {
80    fn as_any(&self) -> &dyn Any;
81
82    fn as_any_mut(&mut self) -> &mut dyn Any;
83
84    fn dtype(&self) -> &DType;
85
86    fn len(&self) -> usize;
87
88    fn is_empty(&self) -> bool {
89        self.len() == 0
90    }
91
92    /// Append a "zero" value to the array.
93    ///
94    /// Zero values are generally determined by [`Scalar::default_value`].
95    fn append_zero(&mut self) {
96        self.append_zeros(1)
97    }
98
99    /// Appends n "zero" values to the array.
100    ///
101    /// Zero values are generally determined by [`Scalar::default_value`].
102    fn append_zeros(&mut self, n: usize);
103
104    /// Append a "null" value to the array.
105    ///
106    /// Implementors should panic if this method is called on a non-nullable [`ArrayBuilder`].
107    fn append_null(&mut self) {
108        self.append_nulls(1)
109    }
110
111    /// The inner part of `append_nulls`.
112    ///
113    /// # Safety
114    ///
115    /// The array builder must be nullable.
116    unsafe fn append_nulls_unchecked(&mut self, n: usize);
117
118    /// Appends n "null" values to the array.
119    ///
120    /// Implementors should panic if this method is called on a non-nullable [`ArrayBuilder`].
121    fn append_nulls(&mut self, n: usize) {
122        assert!(
123            self.dtype().is_nullable(),
124            "tried to append {n} nulls to a non-nullable array builder"
125        );
126
127        // SAFETY: We check above that the array builder is nullable.
128        unsafe {
129            self.append_nulls_unchecked(n);
130        }
131    }
132
133    /// Appends a default value to the array.
134    fn append_default(&mut self) {
135        self.append_defaults(1)
136    }
137
138    /// Appends n default values to the array.
139    ///
140    /// If the array builder is nullable, then this has the behavior of `self.append_nulls(n)`.
141    /// If the array builder is non-nullable, then it has the behavior of `self.append_zeros(n)`.
142    fn append_defaults(&mut self, n: usize) {
143        if self.dtype().is_nullable() {
144            self.append_nulls(n);
145        } else {
146            self.append_zeros(n);
147        }
148    }
149
150    /// A generic function to append a scalar to the builder.
151    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()>;
152
153    /// The inner part of `extend_from_array`.
154    ///
155    /// # Safety
156    ///
157    /// The array that must have an equal [`DType`] to the array builder's `DType` (with nullability
158    /// superset semantics).
159    unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array);
160
161    /// Extends the array with the provided array, canonicalizing if necessary.
162    ///
163    /// Implementors must validate that the passed in [`Array`] has the correct [`DType`].
164    fn extend_from_array(&mut self, array: &dyn Array) {
165        if !self.dtype().eq_with_nullability_superset(array.dtype()) {
166            vortex_panic!(
167                "tried to extend a builder with `DType` {} with an array with `DType {}",
168                self.dtype(),
169                array.dtype()
170            );
171        }
172
173        // SAFETY: We checked that the array had a valid `DType` above.
174        unsafe { self.extend_from_array_unchecked(array) }
175    }
176
177    /// Allocate space for extra `additional` items
178    fn reserve_exact(&mut self, additional: usize);
179
180    /// Override builders validity with the one provided.
181    ///
182    /// Note that this will have no effect on the final array if the array builder is non-nullable.
183    fn set_validity(&mut self, validity: Mask) {
184        if !self.dtype().is_nullable() {
185            return;
186        }
187        assert_eq!(self.len(), validity.len());
188        unsafe { self.set_validity_unchecked(validity) }
189    }
190
191    /// override validity with the one provided, without checking lengths
192    ///
193    /// # Safety
194    ///
195    /// Given validity must have an equal length to [`self.len()`].
196    unsafe fn set_validity_unchecked(&mut self, validity: Mask);
197
198    /// Constructs an Array from the builder components.
199    ///
200    /// # Panics
201    ///
202    /// This function may panic if the builder's methods are called with invalid arguments. If only
203    /// the methods on this interface are used, the builder should not panic. However, specific
204    /// builders have interfaces that may be misused. For example, if the number of values in a
205    /// [PrimitiveBuilder]'s [vortex_buffer::BufferMut] does not match the number of validity bits,
206    /// the PrimitiveBuilder's [Self::finish] will panic.
207    fn finish(&mut self) -> ArrayRef;
208
209    /// Constructs a canonical array directly from the builder.
210    ///
211    /// This method provides a default implementation that creates an [`ArrayRef`] via `finish` and
212    /// then converts it to canonical form. Specific builders can override this with optimized
213    /// implementations that avoid the intermediate [`Array`] creation.
214    fn finish_into_canonical(&mut self) -> Canonical {
215        self.finish().to_canonical()
216    }
217}
218
219/// Construct a new canonical builder for the given [`DType`].
220///
221///
222/// # Example
223///
224/// ```
225/// use vortex_array::builders::{builder_with_capacity, ArrayBuilder};
226/// use vortex_dtype::{DType, Nullability};
227///
228/// // Create a new builder for string data.
229/// let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
230///
231/// builder.append_scalar(&"a".into()).unwrap();
232/// builder.append_scalar(&"b".into()).unwrap();
233/// builder.append_scalar(&"c".into()).unwrap();
234/// builder.append_scalar(&"d".into()).unwrap();
235///
236/// let strings = builder.finish();
237///
238/// assert_eq!(strings.scalar_at(0), "a".into());
239/// assert_eq!(strings.scalar_at(1), "b".into());
240/// assert_eq!(strings.scalar_at(2), "c".into());
241/// assert_eq!(strings.scalar_at(3), "d".into());
242/// ```
243pub fn builder_with_capacity(dtype: &DType, capacity: usize) -> Box<dyn ArrayBuilder> {
244    match dtype {
245        DType::Null => Box::new(NullBuilder::new()),
246        DType::Bool(n) => Box::new(BoolBuilder::with_capacity(*n, capacity)),
247        DType::Primitive(ptype, n) => {
248            match_each_native_ptype!(ptype, |P| {
249                Box::new(PrimitiveBuilder::<P>::with_capacity(*n, capacity))
250            })
251        }
252        DType::Decimal(decimal_type, n) => {
253            match_each_decimal_value_type!(
254                DecimalType::smallest_decimal_value_type(decimal_type),
255                |D| {
256                    Box::new(DecimalBuilder::with_capacity::<D>(
257                        capacity,
258                        *decimal_type,
259                        *n,
260                    ))
261                }
262            )
263        }
264        DType::Utf8(n) => Box::new(VarBinViewBuilder::with_capacity(DType::Utf8(*n), capacity)),
265        DType::Binary(n) => Box::new(VarBinViewBuilder::with_capacity(
266            DType::Binary(*n),
267            capacity,
268        )),
269        DType::Struct(struct_dtype, n) => Box::new(StructBuilder::with_capacity(
270            struct_dtype.clone(),
271            *n,
272            capacity,
273        )),
274        DType::List(dtype, n) => Box::new(ListViewBuilder::<u64, u64>::with_capacity(
275            dtype.clone(),
276            *n,
277            2 * capacity, // Arbitrarily choose 2 times the `offsets` capacity here.
278            capacity,
279        )),
280        DType::FixedSizeList(elem_dtype, list_size, null) => Box::new(
281            FixedSizeListBuilder::with_capacity(elem_dtype.clone(), *list_size, *null, capacity),
282        ),
283        DType::Extension(ext_dtype) => {
284            Box::new(ExtensionBuilder::with_capacity(ext_dtype.clone(), capacity))
285        }
286    }
287}