vortex_array/builders/
mod.rs

1//! Builders for Vortex arrays.
2//!
3//! Every logical type in Vortex has a canonical (uncompressed) in-memory encoding. This module
4//! provides pre-allocated builders to construct new canonical arrays.
5//!
6//! ## Example:
7//!
8//! ```
9//! use vortex_array::builders::{builder_with_capacity, ArrayBuilderExt};
10//! use vortex_dtype::{DType, Nullability};
11//!
12//! // Create a new builder for string data.
13//! let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
14//!
15//! builder.append_scalar(&"a".into()).unwrap();
16//! builder.append_scalar(&"b".into()).unwrap();
17//! builder.append_scalar(&"c".into()).unwrap();
18//! builder.append_scalar(&"d".into()).unwrap();
19//!
20//! let strings = builder.finish();
21//!
22//! assert_eq!(strings.scalar_at(0).unwrap(), "a".into());
23//! assert_eq!(strings.scalar_at(1).unwrap(), "b".into());
24//! assert_eq!(strings.scalar_at(2).unwrap(), "c".into());
25//! assert_eq!(strings.scalar_at(3).unwrap(), "d".into());
26//! ```
27
28mod bool;
29mod decimal;
30mod extension;
31mod lazy_validity_builder;
32mod list;
33mod null;
34mod primitive;
35mod struct_;
36mod varbinview;
37
38use std::any::Any;
39
40pub use bool::*;
41pub use decimal::*;
42pub use extension::*;
43pub use list::*;
44pub use null::*;
45pub use primitive::*;
46pub use struct_::*;
47pub use varbinview::*;
48use vortex_dtype::{DType, match_each_native_ptype};
49use vortex_error::{VortexResult, vortex_bail, vortex_err};
50use vortex_mask::Mask;
51use vortex_scalar::{
52    BinaryScalar, BoolScalar, DecimalValue, ExtScalar, ListScalar, PrimitiveScalar, Scalar,
53    ScalarValue, StructScalar, Utf8Scalar, match_each_decimal_value, match_each_decimal_value_type,
54};
55
56use crate::arrays::smallest_storage_type;
57use crate::{Array, ArrayRef};
58
59pub trait ArrayBuilder: Send {
60    fn as_any(&self) -> &dyn Any;
61
62    fn as_any_mut(&mut self) -> &mut dyn Any;
63
64    fn dtype(&self) -> &DType;
65
66    fn len(&self) -> usize;
67
68    fn is_empty(&self) -> bool {
69        self.len() == 0
70    }
71
72    /// Append a "zero" value to the array.
73    fn append_zero(&mut self) {
74        self.append_zeros(1)
75    }
76
77    /// Appends n "zero" values to the array.
78    fn append_zeros(&mut self, n: usize);
79
80    /// Append a "null" value to the array.
81    fn append_null(&mut self) {
82        self.append_nulls(1)
83    }
84
85    /// Appends n "null" values to the array.
86    fn append_nulls(&mut self, n: usize);
87
88    /// Extends the array with the provided array, canonicalizing if necessary.
89    fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()>;
90
91    /// Ensure that the builder can hold at least `capacity` number of items
92    fn ensure_capacity(&mut self, capacity: usize);
93
94    /// Override builders validity with the one provided
95    fn set_validity(&mut self, validity: Mask);
96
97    /// Constructs an Array from the builder components.
98    ///
99    /// # Panics
100    ///
101    /// This function may panic if the builder's methods are called with invalid arguments. If only
102    /// the methods on this interface are used, the builder should not panic. However, specific
103    /// builders have interfaces that may be misused. For example, if the number of values in a
104    /// [PrimitiveBuilder]'s [vortex_buffer::BufferMut] does not match the number of validity bits,
105    /// the PrimitiveBuilder's [Self::finish] will panic.
106    fn finish(&mut self) -> ArrayRef;
107}
108
109/// Construct a new canonical builder for the given [`DType`].
110///
111///
112/// # Example
113///
114/// ```
115/// use vortex_array::builders::{builder_with_capacity, ArrayBuilderExt};
116/// use vortex_dtype::{DType, Nullability};
117///
118/// // Create a new builder for string data.
119/// let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
120///
121/// builder.append_scalar(&"a".into()).unwrap();
122/// builder.append_scalar(&"b".into()).unwrap();
123/// builder.append_scalar(&"c".into()).unwrap();
124/// builder.append_scalar(&"d".into()).unwrap();
125///
126/// let strings = builder.finish();
127///
128/// assert_eq!(strings.scalar_at(0).unwrap(), "a".into());
129/// assert_eq!(strings.scalar_at(1).unwrap(), "b".into());
130/// assert_eq!(strings.scalar_at(2).unwrap(), "c".into());
131/// assert_eq!(strings.scalar_at(3).unwrap(), "d".into());
132/// ```
133pub fn builder_with_capacity(dtype: &DType, capacity: usize) -> Box<dyn ArrayBuilder> {
134    match dtype {
135        DType::Null => Box::new(NullBuilder::new()),
136        DType::Bool(n) => Box::new(BoolBuilder::with_capacity(*n, capacity)),
137        DType::Primitive(ptype, n) => {
138            match_each_native_ptype!(ptype, |P| {
139                Box::new(PrimitiveBuilder::<P>::with_capacity(*n, capacity))
140            })
141        }
142        DType::Decimal(decimal_type, n) => {
143            match_each_decimal_value_type!(smallest_storage_type(decimal_type), |D| {
144                Box::new(DecimalBuilder::with_capacity::<D>(
145                    capacity,
146                    *decimal_type,
147                    *n,
148                ))
149            })
150        }
151        DType::Utf8(n) => Box::new(VarBinViewBuilder::with_capacity(DType::Utf8(*n), capacity)),
152        DType::Binary(n) => Box::new(VarBinViewBuilder::with_capacity(
153            DType::Binary(*n),
154            capacity,
155        )),
156        DType::Struct(struct_dtype, n) => Box::new(StructBuilder::with_capacity(
157            struct_dtype.clone(),
158            *n,
159            capacity,
160        )),
161        DType::List(dtype, n) => Box::new(ListBuilder::<u64>::with_capacity(
162            dtype.clone(),
163            *n,
164            capacity,
165        )),
166        DType::Extension(ext_dtype) => {
167            Box::new(ExtensionBuilder::with_capacity(ext_dtype.clone(), capacity))
168        }
169    }
170}
171
172pub trait ArrayBuilderExt: ArrayBuilder {
173    /// A generic function to append a scalar value to the builder.
174    fn append_scalar_value(&mut self, value: ScalarValue) -> VortexResult<()> {
175        if value.is_null() {
176            self.append_null();
177            Ok(())
178        } else {
179            self.append_scalar(&Scalar::new(self.dtype().clone(), value))
180        }
181    }
182
183    /// A generic function to append a scalar to the builder.
184    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
185        if scalar.dtype() != self.dtype() {
186            vortex_bail!(
187                "Builder has dtype {:?}, scalar has {:?}",
188                self.dtype(),
189                scalar.dtype()
190            )
191        }
192        match scalar.dtype() {
193            DType::Null => self
194                .as_any_mut()
195                .downcast_mut::<NullBuilder>()
196                .ok_or_else(|| vortex_err!("Cannot append null scalar to non-null builder"))?
197                .append_null(),
198            DType::Bool(_) => self
199                .as_any_mut()
200                .downcast_mut::<BoolBuilder>()
201                .ok_or_else(|| vortex_err!("Cannot append bool scalar to non-bool builder"))?
202                .append_option(BoolScalar::try_from(scalar)?.value()),
203            DType::Primitive(ptype, ..) => {
204                match_each_native_ptype!(ptype, |P| {
205                    self.as_any_mut()
206                        .downcast_mut::<PrimitiveBuilder<P>>()
207                        .ok_or_else(|| {
208                            vortex_err!("Cannot append primitive scalar to non-primitive builder")
209                        })?
210                        .append_option(PrimitiveScalar::try_from(scalar)?.typed_value::<P>())
211                })
212            }
213            DType::Decimal(..) => {
214                let builder = self
215                    .as_any_mut()
216                    .downcast_mut::<DecimalBuilder>()
217                    .ok_or_else(|| {
218                        vortex_err!("Cannot append decimal scalar to non-decimal builder")
219                    })?;
220                match scalar.as_decimal().decimal_value() {
221                    None => builder.append_null(),
222                    Some(v) => match_each_decimal_value!(v, |dec_val| {
223                        builder.append_value(*dec_val);
224                    }),
225                }
226            }
227            DType::Utf8(_) => self
228                .as_any_mut()
229                .downcast_mut::<VarBinViewBuilder>()
230                .ok_or_else(|| vortex_err!("Cannot append utf8 scalar to non-utf8 builder"))?
231                .append_option(Utf8Scalar::try_from(scalar)?.value()),
232            DType::Binary(_) => self
233                .as_any_mut()
234                .downcast_mut::<VarBinViewBuilder>()
235                .ok_or_else(|| vortex_err!("Cannot append binary scalar to non-binary builder"))?
236                .append_option(BinaryScalar::try_from(scalar)?.value()),
237            DType::Struct(..) => self
238                .as_any_mut()
239                .downcast_mut::<StructBuilder>()
240                .ok_or_else(|| vortex_err!("Cannot append struct scalar to non-struct builder"))?
241                .append_value(StructScalar::try_from(scalar)?)?,
242            DType::List(..) => self
243                .as_any_mut()
244                .downcast_mut::<ListBuilder<u64>>()
245                .ok_or_else(|| vortex_err!("Cannot append list scalar to non-list builder"))?
246                .append_value(ListScalar::try_from(scalar)?)?,
247            DType::Extension(..) => self
248                .as_any_mut()
249                .downcast_mut::<ExtensionBuilder>()
250                .ok_or_else(|| {
251                    vortex_err!("Cannot append extension scalar to non-extension builder")
252                })?
253                .append_value(ExtScalar::try_from(scalar)?)?,
254        }
255        Ok(())
256    }
257}
258
259impl<T: ?Sized + ArrayBuilder> ArrayBuilderExt for T {}