vortex_array/builders/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Builders for Vortex arrays.
5//!
6//! Every logical type in Vortex has a canonical (uncompressed) in-memory encoding. This module
7//! provides pre-allocated builders to construct new canonical arrays.
8//!
9//! ## Example:
10//!
11//! ```
12//! use vortex_array::builders::{builder_with_capacity, ArrayBuilderExt};
13//! use vortex_dtype::{DType, Nullability};
14//!
15//! // Create a new builder for string data.
16//! let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
17//!
18//! builder.append_scalar(&"a".into()).unwrap();
19//! builder.append_scalar(&"b".into()).unwrap();
20//! builder.append_scalar(&"c".into()).unwrap();
21//! builder.append_scalar(&"d".into()).unwrap();
22//!
23//! let strings = builder.finish();
24//!
25//! assert_eq!(strings.scalar_at(0), "a".into());
26//! assert_eq!(strings.scalar_at(1), "b".into());
27//! assert_eq!(strings.scalar_at(2), "c".into());
28//! assert_eq!(strings.scalar_at(3), "d".into());
29//! ```
30
31mod bool;
32mod decimal;
33mod extension;
34mod fixed_size_list;
35mod lazy_validity_builder;
36mod list;
37mod null;
38mod primitive;
39mod struct_;
40mod varbinview;
41
42use std::any::Any;
43
44pub use bool::*;
45pub use decimal::*;
46pub use extension::*;
47pub use fixed_size_list::*;
48pub use list::*;
49pub use null::*;
50pub use primitive::*;
51pub use struct_::*;
52pub use varbinview::*;
53use vortex_dtype::{DType, match_each_native_ptype};
54use vortex_error::{VortexResult, vortex_bail, vortex_err};
55use vortex_mask::Mask;
56use vortex_scalar::{
57    BinaryScalar, BoolScalar, DecimalValue, ExtScalar, ListScalar, PrimitiveScalar, Scalar,
58    ScalarValue, StructScalar, Utf8Scalar, match_each_decimal_value, match_each_decimal_value_type,
59};
60
61use crate::arrays::smallest_storage_type;
62use crate::{Array, ArrayRef};
63
64pub trait ArrayBuilder: Send {
65    fn as_any(&self) -> &dyn Any;
66
67    fn as_any_mut(&mut self) -> &mut dyn Any;
68
69    fn dtype(&self) -> &DType;
70
71    fn len(&self) -> usize;
72
73    fn is_empty(&self) -> bool {
74        self.len() == 0
75    }
76
77    // TODO(connor): We should probably merge these 4 methods to `append_defaults`.
78
79    /// Append a "zero" value to the array.
80    fn append_zero(&mut self) {
81        self.append_zeros(1)
82    }
83
84    /// Appends n "zero" values to the array.
85    fn append_zeros(&mut self, n: usize);
86
87    /// Append a "null" value to the array.
88    fn append_null(&mut self) {
89        self.append_nulls(1)
90    }
91
92    /// Appends n "null" values to the array.
93    fn append_nulls(&mut self, n: usize);
94
95    // TODO(connor): Document the fact that the passed in `array` is validated to have the correct
96    // dtype via the VTable.
97    /// Extends the array with the provided array, canonicalizing if necessary.
98    fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()>;
99
100    /// Ensure that the builder can hold at least `capacity` number of items
101    fn ensure_capacity(&mut self, capacity: usize);
102
103    /// Override builders validity with the one provided
104    fn set_validity(&mut self, validity: Mask);
105
106    /// Constructs an Array from the builder components.
107    ///
108    /// # Panics
109    ///
110    /// This function may panic if the builder's methods are called with invalid arguments. If only
111    /// the methods on this interface are used, the builder should not panic. However, specific
112    /// builders have interfaces that may be misused. For example, if the number of values in a
113    /// [PrimitiveBuilder]'s [vortex_buffer::BufferMut] does not match the number of validity bits,
114    /// the PrimitiveBuilder's [Self::finish] will panic.
115    fn finish(&mut self) -> ArrayRef;
116}
117
118/// Construct a new canonical builder for the given [`DType`].
119///
120///
121/// # Example
122///
123/// ```
124/// use vortex_array::builders::{builder_with_capacity, ArrayBuilderExt};
125/// use vortex_dtype::{DType, Nullability};
126///
127/// // Create a new builder for string data.
128/// let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
129///
130/// builder.append_scalar(&"a".into()).unwrap();
131/// builder.append_scalar(&"b".into()).unwrap();
132/// builder.append_scalar(&"c".into()).unwrap();
133/// builder.append_scalar(&"d".into()).unwrap();
134///
135/// let strings = builder.finish();
136///
137/// assert_eq!(strings.scalar_at(0), "a".into());
138/// assert_eq!(strings.scalar_at(1), "b".into());
139/// assert_eq!(strings.scalar_at(2), "c".into());
140/// assert_eq!(strings.scalar_at(3), "d".into());
141/// ```
142pub fn builder_with_capacity(dtype: &DType, capacity: usize) -> Box<dyn ArrayBuilder> {
143    match dtype {
144        DType::Null => Box::new(NullBuilder::new()),
145        DType::Bool(n) => Box::new(BoolBuilder::with_capacity(*n, capacity)),
146        DType::Primitive(ptype, n) => {
147            match_each_native_ptype!(ptype, |P| {
148                Box::new(PrimitiveBuilder::<P>::with_capacity(*n, capacity))
149            })
150        }
151        DType::Decimal(decimal_type, n) => {
152            match_each_decimal_value_type!(smallest_storage_type(decimal_type), |D| {
153                Box::new(DecimalBuilder::with_capacity::<D>(
154                    capacity,
155                    *decimal_type,
156                    *n,
157                ))
158            })
159        }
160        DType::Utf8(n) => Box::new(VarBinViewBuilder::with_capacity(DType::Utf8(*n), capacity)),
161        DType::Binary(n) => Box::new(VarBinViewBuilder::with_capacity(
162            DType::Binary(*n),
163            capacity,
164        )),
165        DType::Struct(struct_dtype, n) => Box::new(StructBuilder::with_capacity(
166            struct_dtype.clone(),
167            *n,
168            capacity,
169        )),
170        DType::List(dtype, n) => Box::new(ListBuilder::<u64>::with_capacity(
171            dtype.clone(),
172            *n,
173            capacity,
174        )),
175        DType::FixedSizeList(..) => {
176            unimplemented!("TODO(connor)[FixedSizeList]")
177        }
178        DType::Extension(ext_dtype) => {
179            Box::new(ExtensionBuilder::with_capacity(ext_dtype.clone(), capacity))
180        }
181    }
182}
183
184pub trait ArrayBuilderExt: ArrayBuilder {
185    /// A generic function to append a scalar value to the builder.
186    fn append_scalar_value(&mut self, value: ScalarValue) -> VortexResult<()> {
187        if value.is_null() {
188            self.append_null();
189            Ok(())
190        } else {
191            self.append_scalar(&Scalar::new(self.dtype().clone(), value))
192        }
193    }
194
195    /// A generic function to append a scalar to the builder.
196    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
197        if scalar.dtype() != self.dtype() {
198            vortex_bail!(
199                "Builder has dtype {:?}, scalar has {:?}",
200                self.dtype(),
201                scalar.dtype()
202            )
203        }
204        match scalar.dtype() {
205            DType::Null => self
206                .as_any_mut()
207                .downcast_mut::<NullBuilder>()
208                .ok_or_else(|| vortex_err!("Cannot append null scalar to non-null builder"))?
209                .append_null(),
210            DType::Bool(_) => self
211                .as_any_mut()
212                .downcast_mut::<BoolBuilder>()
213                .ok_or_else(|| vortex_err!("Cannot append bool scalar to non-bool builder"))?
214                .append_option(BoolScalar::try_from(scalar)?.value()),
215            DType::Primitive(ptype, ..) => {
216                match_each_native_ptype!(ptype, |P| {
217                    self.as_any_mut()
218                        .downcast_mut::<PrimitiveBuilder<P>>()
219                        .ok_or_else(|| {
220                            vortex_err!("Cannot append primitive scalar to non-primitive builder")
221                        })?
222                        .append_option(PrimitiveScalar::try_from(scalar)?.typed_value::<P>())
223                })
224            }
225            DType::Decimal(..) => {
226                let builder = self
227                    .as_any_mut()
228                    .downcast_mut::<DecimalBuilder>()
229                    .ok_or_else(|| {
230                        vortex_err!("Cannot append decimal scalar to non-decimal builder")
231                    })?;
232                match scalar.as_decimal().decimal_value() {
233                    None => builder.append_null(),
234                    Some(v) => match_each_decimal_value!(v, |dec_val| {
235                        builder.append_value(dec_val);
236                    }),
237                }
238            }
239            DType::Utf8(_) => self
240                .as_any_mut()
241                .downcast_mut::<VarBinViewBuilder>()
242                .ok_or_else(|| vortex_err!("Cannot append utf8 scalar to non-utf8 builder"))?
243                .append_option(Utf8Scalar::try_from(scalar)?.value()),
244            DType::Binary(_) => self
245                .as_any_mut()
246                .downcast_mut::<VarBinViewBuilder>()
247                .ok_or_else(|| vortex_err!("Cannot append binary scalar to non-binary builder"))?
248                .append_option(BinaryScalar::try_from(scalar)?.value()),
249            DType::Struct(..) => self
250                .as_any_mut()
251                .downcast_mut::<StructBuilder>()
252                .ok_or_else(|| vortex_err!("Cannot append struct scalar to non-struct builder"))?
253                .append_value(StructScalar::try_from(scalar)?)?,
254            DType::List(..) => self
255                .as_any_mut()
256                .downcast_mut::<ListBuilder<u64>>()
257                .ok_or_else(|| vortex_err!("Cannot append list scalar to non-list builder"))?
258                .append_value(ListScalar::try_from(scalar)?)?,
259            DType::FixedSizeList(..) => {
260                unimplemented!("TODO(connor)[FixedSizeList]")
261            }
262            DType::Extension(..) => self
263                .as_any_mut()
264                .downcast_mut::<ExtensionBuilder>()
265                .ok_or_else(|| {
266                    vortex_err!("Cannot append extension scalar to non-extension builder")
267                })?
268                .append_value(ExtScalar::try_from(scalar)?)?,
269        }
270        Ok(())
271    }
272}
273
274impl<T: ?Sized + ArrayBuilder> ArrayBuilderExt for T {}