Skip to main content

vortex_array/builders/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Builders for Vortex arrays.
5//!
6//! Every logical type in Vortex has a canonical (uncompressed) in-memory encoding. This module
7//! provides pre-allocated builders to construct new canonical arrays.
8//!
9//! ## Example:
10//!
11//! ```
12//! use vortex_array::builders::{builder_with_capacity, ArrayBuilder};
13//! use vortex_array::dtype::{DType, Nullability};
14//! use vortex_array::{LEGACY_SESSION, VortexSessionExecute};
15//!
16//! // Create a new builder for string data.
17//! let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
18//!
19//! builder.append_scalar(&"a".into()).unwrap();
20//! builder.append_scalar(&"b".into()).unwrap();
21//! builder.append_scalar(&"c".into()).unwrap();
22//! builder.append_scalar(&"d".into()).unwrap();
23//!
24//! let strings = builder.finish();
25//! let mut ctx = LEGACY_SESSION.create_execution_ctx();
26//!
27//! assert_eq!(strings.execute_scalar(0, &mut ctx).unwrap(), "a".into());
28//! assert_eq!(strings.execute_scalar(1, &mut ctx).unwrap(), "b".into());
29//! assert_eq!(strings.execute_scalar(2, &mut ctx).unwrap(), "c".into());
30//! assert_eq!(strings.execute_scalar(3, &mut ctx).unwrap(), "d".into());
31//! ```
32
33use std::any::Any;
34use std::sync::Arc;
35
36use vortex_error::VortexResult;
37use vortex_error::vortex_panic;
38use vortex_mask::Mask;
39
40use crate::ArrayRef;
41use crate::canonical::Canonical;
42use crate::dtype::DType;
43use crate::match_each_decimal_value_type;
44use crate::match_each_native_ptype;
45use crate::memory::HostAllocatorRef;
46use crate::scalar::Scalar;
47
48mod lazy_null_builder;
49pub(crate) use lazy_null_builder::LazyBitBufferBuilder;
50
51mod bool;
52mod decimal;
53pub mod dict;
54mod extension;
55mod fixed_size_list;
56mod list;
57mod listview;
58mod null;
59mod primitive;
60mod struct_;
61mod varbinview;
62
63pub use bool::*;
64pub use decimal::*;
65pub use extension::*;
66pub use fixed_size_list::*;
67pub use list::*;
68pub use listview::*;
69pub use null::*;
70pub use primitive::*;
71pub use struct_::*;
72pub use varbinview::*;
73
74#[cfg(test)]
75mod tests;
76
77/// The default capacity for builders.
78///
79/// This is equal to the default capacity for Arrow Arrays.
80pub const DEFAULT_BUILDER_CAPACITY: usize = 1024;
81
82pub trait ArrayBuilder: Send {
83    fn as_any(&self) -> &dyn Any;
84
85    fn as_any_mut(&mut self) -> &mut dyn Any;
86
87    fn dtype(&self) -> &DType;
88
89    fn len(&self) -> usize;
90
91    fn is_empty(&self) -> bool {
92        self.len() == 0
93    }
94
95    /// Append a "zero" value to the array.
96    ///
97    /// Zero values are generally determined by [`Scalar::default_value`].
98    fn append_zero(&mut self) {
99        self.append_zeros(1)
100    }
101
102    /// Appends n "zero" values to the array.
103    ///
104    /// Zero values are generally determined by [`Scalar::default_value`].
105    fn append_zeros(&mut self, n: usize);
106
107    /// Append a "null" value to the array.
108    ///
109    /// Implementors should panic if this method is called on a non-nullable [`ArrayBuilder`].
110    fn append_null(&mut self) {
111        self.append_nulls(1)
112    }
113
114    /// The inner part of `append_nulls`.
115    ///
116    /// # Safety
117    ///
118    /// The array builder must be nullable.
119    unsafe fn append_nulls_unchecked(&mut self, n: usize);
120
121    /// Appends n "null" values to the array.
122    ///
123    /// Implementors should panic if this method is called on a non-nullable [`ArrayBuilder`].
124    fn append_nulls(&mut self, n: usize) {
125        assert!(
126            self.dtype().is_nullable(),
127            "tried to append {n} nulls to a non-nullable array builder"
128        );
129
130        // SAFETY: We check above that the array builder is nullable.
131        unsafe {
132            self.append_nulls_unchecked(n);
133        }
134    }
135
136    /// Appends a default value to the array.
137    fn append_default(&mut self) {
138        self.append_defaults(1)
139    }
140
141    /// Appends n default values to the array.
142    ///
143    /// If the array builder is nullable, then this has the behavior of `self.append_nulls(n)`.
144    /// If the array builder is non-nullable, then it has the behavior of `self.append_zeros(n)`.
145    fn append_defaults(&mut self, n: usize) {
146        if self.dtype().is_nullable() {
147            self.append_nulls(n);
148        } else {
149            self.append_zeros(n);
150        }
151    }
152
153    /// A generic function to append a scalar to the builder.
154    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()>;
155
156    /// The inner part of `extend_from_array`.
157    ///
158    /// # Safety
159    ///
160    /// The array that must have an equal [`DType`] to the array builder's `DType` (with nullability
161    /// superset semantics).
162    unsafe fn extend_from_array_unchecked(&mut self, array: &ArrayRef);
163
164    /// Extends the array with the provided array, canonicalizing if necessary.
165    ///
166    /// Implementors must validate that the passed in [`ArrayRef`] has the correct [`DType`].
167    fn extend_from_array(&mut self, array: &ArrayRef) {
168        if !self.dtype().eq_with_nullability_superset(array.dtype()) {
169            vortex_panic!(
170                "tried to extend a builder with `DType` {} with an array with `DType {}",
171                self.dtype(),
172                array.dtype()
173            );
174        }
175
176        // SAFETY: We checked that the array had a valid `DType` above.
177        unsafe { self.extend_from_array_unchecked(array) }
178    }
179
180    /// Allocate space for extra `additional` items
181    fn reserve_exact(&mut self, additional: usize);
182
183    /// Override builders validity with the one provided.
184    ///
185    /// Note that this will have no effect on the final array if the array builder is non-nullable.
186    fn set_validity(&mut self, validity: Mask) {
187        if !self.dtype().is_nullable() {
188            return;
189        }
190        assert_eq!(self.len(), validity.len());
191        unsafe { self.set_validity_unchecked(validity) }
192    }
193
194    /// override validity with the one provided, without checking lengths
195    ///
196    /// # Safety
197    ///
198    /// Given validity must have an equal length to [`self.len()`](Self::len).
199    unsafe fn set_validity_unchecked(&mut self, validity: Mask);
200
201    /// Constructs an Array from the builder components.
202    ///
203    /// # Panics
204    ///
205    /// This function may panic if the builder's methods are called with invalid arguments. If only
206    /// the methods on this interface are used, the builder should not panic. However, specific
207    /// builders have interfaces that may be misused. For example, if the number of values in a
208    /// [PrimitiveBuilder]'s [vortex_buffer::BufferMut] does not match the number of validity bits,
209    /// the PrimitiveBuilder's [Self::finish] will panic.
210    fn finish(&mut self) -> ArrayRef;
211
212    /// Constructs a canonical array directly from the builder.
213    ///
214    /// This method provides a default implementation that creates an [`ArrayRef`] via `finish` and
215    /// then converts it to canonical form. Specific builders can override this with optimized
216    /// implementations that avoid the intermediate [`ArrayRef`] creation.
217    fn finish_into_canonical(&mut self) -> Canonical;
218}
219
220/// Construct a new canonical builder for the given [`DType`].
221///
222///
223/// # Example
224///
225/// ```
226/// use vortex_array::builders::{builder_with_capacity, ArrayBuilder};
227/// use vortex_array::dtype::{DType, Nullability};
228/// use vortex_array::{LEGACY_SESSION, VortexSessionExecute};
229///
230/// // Create a new builder for string data.
231/// let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
232///
233/// builder.append_scalar(&"a".into()).unwrap();
234/// builder.append_scalar(&"b".into()).unwrap();
235/// builder.append_scalar(&"c".into()).unwrap();
236/// builder.append_scalar(&"d".into()).unwrap();
237///
238/// let strings = builder.finish();
239/// let mut ctx = LEGACY_SESSION.create_execution_ctx();
240///
241/// assert_eq!(strings.execute_scalar(0, &mut ctx).unwrap(), "a".into());
242/// assert_eq!(strings.execute_scalar(1, &mut ctx).unwrap(), "b".into());
243/// assert_eq!(strings.execute_scalar(2, &mut ctx).unwrap(), "c".into());
244/// assert_eq!(strings.execute_scalar(3, &mut ctx).unwrap(), "d".into());
245/// ```
246pub fn builder_with_capacity(dtype: &DType, capacity: usize) -> Box<dyn ArrayBuilder> {
247    match dtype {
248        DType::Null => Box::new(NullBuilder::new()),
249        DType::Bool(n) => Box::new(BoolBuilder::with_capacity(*n, capacity)),
250        DType::Primitive(ptype, n) => {
251            match_each_native_ptype!(ptype, |P| {
252                Box::new(PrimitiveBuilder::<P>::with_capacity(*n, capacity))
253            })
254        }
255        DType::Decimal(decimal_type, n) => {
256            match_each_decimal_value_type!(
257                DecimalType::smallest_decimal_value_type(decimal_type),
258                |D| {
259                    Box::new(DecimalBuilder::with_capacity::<D>(
260                        capacity,
261                        *decimal_type,
262                        *n,
263                    ))
264                }
265            )
266        }
267        DType::Utf8(n) => Box::new(VarBinViewBuilder::with_capacity(DType::Utf8(*n), capacity)),
268        DType::Binary(n) => Box::new(VarBinViewBuilder::with_capacity(
269            DType::Binary(*n),
270            capacity,
271        )),
272        DType::Struct(struct_dtype, n) => Box::new(StructBuilder::with_capacity(
273            struct_dtype.clone(),
274            *n,
275            capacity,
276        )),
277        DType::List(dtype, n) => Box::new(ListViewBuilder::<u64, u64>::with_capacity(
278            Arc::clone(dtype),
279            *n,
280            2 * capacity, // Arbitrarily choose 2 times the `offsets` capacity here.
281            capacity,
282        )),
283        DType::FixedSizeList(elem_dtype, list_size, null) => {
284            Box::new(FixedSizeListBuilder::with_capacity(
285                Arc::clone(elem_dtype),
286                *list_size,
287                *null,
288                capacity,
289            ))
290        }
291        DType::Extension(ext_dtype) => {
292            Box::new(ExtensionBuilder::with_capacity(ext_dtype.clone(), capacity))
293        }
294        DType::Variant(_) => {
295            unimplemented!()
296        }
297    }
298}
299
300/// Construct a new canonical builder for the given [`DType`] using a host
301/// [`crate::memory::HostAllocator`].
302pub fn builder_with_capacity_in(
303    allocator: HostAllocatorRef,
304    dtype: &DType,
305    capacity: usize,
306) -> Box<dyn ArrayBuilder> {
307    let _allocator = allocator;
308    builder_with_capacity(dtype, capacity)
309}