vortex_array/builders/mod.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Builders for Vortex arrays.
5//!
6//! Every logical type in Vortex has a canonical (uncompressed) in-memory encoding. This module
7//! provides pre-allocated builders to construct new canonical arrays.
8//!
9//! ## Example:
10//!
11//! ```
12//! use vortex_array::builders::{builder_with_capacity, ArrayBuilder};
13//! use vortex_dtype::{DType, Nullability};
14//!
15//! // Create a new builder for string data.
16//! let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
17//!
18//! builder.append_scalar(&"a".into()).unwrap();
19//! builder.append_scalar(&"b".into()).unwrap();
20//! builder.append_scalar(&"c".into()).unwrap();
21//! builder.append_scalar(&"d".into()).unwrap();
22//!
23//! let strings = builder.finish();
24//!
25//! assert_eq!(strings.scalar_at(0), "a".into());
26//! assert_eq!(strings.scalar_at(1), "b".into());
27//! assert_eq!(strings.scalar_at(2), "c".into());
28//! assert_eq!(strings.scalar_at(3), "d".into());
29//! ```
30
31use std::any::Any;
32
33use vortex_dtype::{DType, match_each_native_ptype};
34use vortex_error::{VortexResult, vortex_panic};
35use vortex_mask::Mask;
36use vortex_scalar::{Scalar, match_each_decimal_value_type};
37
38use crate::arrays::smallest_storage_type;
39use crate::canonical::Canonical;
40use crate::{Array, ArrayRef};
41
42mod lazy_null_builder;
43use lazy_null_builder::LazyNullBufferBuilder;
44
45mod bool;
46mod decimal;
47mod extension;
48mod fixed_size_list;
49mod list;
50mod null;
51mod primitive;
52mod struct_;
53mod varbinview;
54
55pub use bool::*;
56pub use decimal::*;
57pub use extension::*;
58pub use fixed_size_list::*;
59pub use list::*;
60pub use null::*;
61pub use primitive::*;
62pub use struct_::*;
63pub use varbinview::*;
64
65#[cfg(test)]
66mod tests;
67
68/// The default capacity for builders.
69///
70/// This is equal to the default capacity for Arrow Arrays.
71pub const DEFAULT_BUILDER_CAPACITY: usize = 1024;
72
73pub trait ArrayBuilder: Send {
74 fn as_any(&self) -> &dyn Any;
75
76 fn as_any_mut(&mut self) -> &mut dyn Any;
77
78 fn dtype(&self) -> &DType;
79
80 fn len(&self) -> usize;
81
82 fn is_empty(&self) -> bool {
83 self.len() == 0
84 }
85
86 /// Append a "zero" value to the array.
87 ///
88 /// Zero values are generally determined by [`Scalar::default_value`].
89 fn append_zero(&mut self) {
90 self.append_zeros(1)
91 }
92
93 /// Appends n "zero" values to the array.
94 ///
95 /// Zero values are generally determined by [`Scalar::default_value`].
96 fn append_zeros(&mut self, n: usize);
97
98 /// Append a "null" value to the array.
99 ///
100 /// Implementors should panic if this method is called on a non-nullable [`ArrayBuilder`].
101 fn append_null(&mut self) {
102 self.append_nulls(1)
103 }
104
105 /// The inner part of `append_nulls`.
106 ///
107 /// # Safety
108 ///
109 /// The array builder must be nullable.
110 unsafe fn append_nulls_unchecked(&mut self, n: usize);
111
112 /// Appends n "null" values to the array.
113 ///
114 /// Implementors should panic if this method is called on a non-nullable [`ArrayBuilder`].
115 fn append_nulls(&mut self, n: usize) {
116 assert!(
117 self.dtype().is_nullable(),
118 "tried to append {n} nulls to a non-nullable array builder"
119 );
120
121 // SAFETY: We check above that the array builder is nullable.
122 unsafe {
123 self.append_nulls_unchecked(n);
124 }
125 }
126
127 /// Appends a default value to the array.
128 fn append_default(&mut self) {
129 self.append_defaults(1)
130 }
131
132 /// Appends n default values to the array.
133 ///
134 /// If the array builder is nullable, then this has the behavior of `self.append_nulls(n)`.
135 /// If the array builder is non-nullable, then it has the behavior of `self.append_zeros(n)`.
136 fn append_defaults(&mut self, n: usize) {
137 if self.dtype().is_nullable() {
138 self.append_nulls(n);
139 } else {
140 self.append_zeros(n);
141 }
142 }
143
144 /// A generic function to append a scalar to the builder.
145 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()>;
146
147 /// The inner part of `extend_from_array`.
148 ///
149 /// # Safety
150 ///
151 /// The array that must have an equal [`DType`] to the array builder's `DType` (with nullability
152 /// superset semantics).
153 unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array);
154
155 /// Extends the array with the provided array, canonicalizing if necessary.
156 ///
157 /// Implementors must validate that the passed in [`Array`] has the correct [`DType`].
158 fn extend_from_array(&mut self, array: &dyn Array) {
159 if !self.dtype().eq_with_nullability_superset(array.dtype()) {
160 vortex_panic!(
161 "tried to extend a builder with `DType` {} with an array with `DType {}",
162 self.dtype(),
163 array.dtype()
164 );
165 }
166
167 // SAFETY: We checked that the array had a valid `DType` above.
168 unsafe { self.extend_from_array_unchecked(array) }
169 }
170
171 /// Ensure that the builder can hold at least `capacity` number of items
172 fn ensure_capacity(&mut self, capacity: usize);
173
174 /// Override builders validity with the one provided.
175 ///
176 /// Note that this will have no effect on the final array if the array builder is non-nullable.
177 fn set_validity(&mut self, validity: Mask);
178
179 /// Constructs an Array from the builder components.
180 ///
181 /// # Panics
182 ///
183 /// This function may panic if the builder's methods are called with invalid arguments. If only
184 /// the methods on this interface are used, the builder should not panic. However, specific
185 /// builders have interfaces that may be misused. For example, if the number of values in a
186 /// [PrimitiveBuilder]'s [vortex_buffer::BufferMut] does not match the number of validity bits,
187 /// the PrimitiveBuilder's [Self::finish] will panic.
188 fn finish(&mut self) -> ArrayRef;
189
190 /// Constructs a canonical array directly from the builder.
191 ///
192 /// This method provides a default implementation that creates an [`ArrayRef`] via `finish` and
193 /// then converts it to canonical form. Specific builders can override this with optimized
194 /// implementations that avoid the intermediate [`Array`] creation.
195 fn finish_into_canonical(&mut self) -> Canonical {
196 self.finish().to_canonical()
197 }
198}
199
200/// Construct a new canonical builder for the given [`DType`].
201///
202///
203/// # Example
204///
205/// ```
206/// use vortex_array::builders::{builder_with_capacity, ArrayBuilder};
207/// use vortex_dtype::{DType, Nullability};
208///
209/// // Create a new builder for string data.
210/// let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
211///
212/// builder.append_scalar(&"a".into()).unwrap();
213/// builder.append_scalar(&"b".into()).unwrap();
214/// builder.append_scalar(&"c".into()).unwrap();
215/// builder.append_scalar(&"d".into()).unwrap();
216///
217/// let strings = builder.finish();
218///
219/// assert_eq!(strings.scalar_at(0), "a".into());
220/// assert_eq!(strings.scalar_at(1), "b".into());
221/// assert_eq!(strings.scalar_at(2), "c".into());
222/// assert_eq!(strings.scalar_at(3), "d".into());
223/// ```
224pub fn builder_with_capacity(dtype: &DType, capacity: usize) -> Box<dyn ArrayBuilder> {
225 match dtype {
226 DType::Null => Box::new(NullBuilder::new()),
227 DType::Bool(n) => Box::new(BoolBuilder::with_capacity(*n, capacity)),
228 DType::Primitive(ptype, n) => {
229 match_each_native_ptype!(ptype, |P| {
230 Box::new(PrimitiveBuilder::<P>::with_capacity(*n, capacity))
231 })
232 }
233 DType::Decimal(decimal_type, n) => {
234 match_each_decimal_value_type!(smallest_storage_type(decimal_type), |D| {
235 Box::new(DecimalBuilder::with_capacity::<D>(
236 capacity,
237 *decimal_type,
238 *n,
239 ))
240 })
241 }
242 DType::Utf8(n) => Box::new(VarBinViewBuilder::with_capacity(DType::Utf8(*n), capacity)),
243 DType::Binary(n) => Box::new(VarBinViewBuilder::with_capacity(
244 DType::Binary(*n),
245 capacity,
246 )),
247 DType::Struct(struct_dtype, n) => Box::new(StructBuilder::with_capacity(
248 struct_dtype.clone(),
249 *n,
250 capacity,
251 )),
252 DType::List(dtype, n) => Box::new(ListBuilder::<u64>::with_capacity(
253 dtype.clone(),
254 *n,
255 capacity,
256 )),
257 DType::FixedSizeList(elem_dtype, list_size, null) => Box::new(
258 FixedSizeListBuilder::with_capacity(elem_dtype.clone(), *list_size, *null, capacity),
259 ),
260 DType::Extension(ext_dtype) => {
261 Box::new(ExtensionBuilder::with_capacity(ext_dtype.clone(), capacity))
262 }
263 }
264}