vortex_array/builders/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Builders for Vortex arrays.
5//!
6//! Every logical type in Vortex has a canonical (uncompressed) in-memory encoding. This module
7//! provides pre-allocated builders to construct new canonical arrays.
8//!
9//! ## Example:
10//!
11//! ```
12//! use vortex_array::builders::{builder_with_capacity, ArrayBuilderExt};
13//! use vortex_dtype::{DType, Nullability};
14//!
15//! // Create a new builder for string data.
16//! let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
17//!
18//! builder.append_scalar(&"a".into()).unwrap();
19//! builder.append_scalar(&"b".into()).unwrap();
20//! builder.append_scalar(&"c".into()).unwrap();
21//! builder.append_scalar(&"d".into()).unwrap();
22//!
23//! let strings = builder.finish();
24//!
25//! assert_eq!(strings.scalar_at(0), "a".into());
26//! assert_eq!(strings.scalar_at(1), "b".into());
27//! assert_eq!(strings.scalar_at(2), "c".into());
28//! assert_eq!(strings.scalar_at(3), "d".into());
29//! ```
30
31mod bool;
32mod decimal;
33mod extension;
34mod lazy_validity_builder;
35mod list;
36mod null;
37mod primitive;
38mod struct_;
39mod varbinview;
40
41use std::any::Any;
42
43pub use bool::*;
44pub use decimal::*;
45pub use extension::*;
46pub use list::*;
47pub use null::*;
48pub use primitive::*;
49pub use struct_::*;
50pub use varbinview::*;
51use vortex_dtype::{DType, match_each_native_ptype};
52use vortex_error::{VortexResult, vortex_bail, vortex_err};
53use vortex_mask::Mask;
54use vortex_scalar::{
55    BinaryScalar, BoolScalar, DecimalValue, ExtScalar, ListScalar, PrimitiveScalar, Scalar,
56    ScalarValue, StructScalar, Utf8Scalar, match_each_decimal_value, match_each_decimal_value_type,
57};
58
59use crate::arrays::smallest_storage_type;
60use crate::{Array, ArrayRef};
61
62pub trait ArrayBuilder: Send {
63    fn as_any(&self) -> &dyn Any;
64
65    fn as_any_mut(&mut self) -> &mut dyn Any;
66
67    fn dtype(&self) -> &DType;
68
69    fn len(&self) -> usize;
70
71    fn is_empty(&self) -> bool {
72        self.len() == 0
73    }
74
75    /// Append a "zero" value to the array.
76    fn append_zero(&mut self) {
77        self.append_zeros(1)
78    }
79
80    /// Appends n "zero" values to the array.
81    fn append_zeros(&mut self, n: usize);
82
83    /// Append a "null" value to the array.
84    fn append_null(&mut self) {
85        self.append_nulls(1)
86    }
87
88    /// Appends n "null" values to the array.
89    fn append_nulls(&mut self, n: usize);
90
91    /// Extends the array with the provided array, canonicalizing if necessary.
92    fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()>;
93
94    /// Ensure that the builder can hold at least `capacity` number of items
95    fn ensure_capacity(&mut self, capacity: usize);
96
97    /// Override builders validity with the one provided
98    fn set_validity(&mut self, validity: Mask);
99
100    /// Constructs an Array from the builder components.
101    ///
102    /// # Panics
103    ///
104    /// This function may panic if the builder's methods are called with invalid arguments. If only
105    /// the methods on this interface are used, the builder should not panic. However, specific
106    /// builders have interfaces that may be misused. For example, if the number of values in a
107    /// [PrimitiveBuilder]'s [vortex_buffer::BufferMut] does not match the number of validity bits,
108    /// the PrimitiveBuilder's [Self::finish] will panic.
109    fn finish(&mut self) -> ArrayRef;
110}
111
112/// Construct a new canonical builder for the given [`DType`].
113///
114///
115/// # Example
116///
117/// ```
118/// use vortex_array::builders::{builder_with_capacity, ArrayBuilderExt};
119/// use vortex_dtype::{DType, Nullability};
120///
121/// // Create a new builder for string data.
122/// let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
123///
124/// builder.append_scalar(&"a".into()).unwrap();
125/// builder.append_scalar(&"b".into()).unwrap();
126/// builder.append_scalar(&"c".into()).unwrap();
127/// builder.append_scalar(&"d".into()).unwrap();
128///
129/// let strings = builder.finish();
130///
131/// assert_eq!(strings.scalar_at(0), "a".into());
132/// assert_eq!(strings.scalar_at(1), "b".into());
133/// assert_eq!(strings.scalar_at(2), "c".into());
134/// assert_eq!(strings.scalar_at(3), "d".into());
135/// ```
136pub fn builder_with_capacity(dtype: &DType, capacity: usize) -> Box<dyn ArrayBuilder> {
137    match dtype {
138        DType::Null => Box::new(NullBuilder::new()),
139        DType::Bool(n) => Box::new(BoolBuilder::with_capacity(*n, capacity)),
140        DType::Primitive(ptype, n) => {
141            match_each_native_ptype!(ptype, |P| {
142                Box::new(PrimitiveBuilder::<P>::with_capacity(*n, capacity))
143            })
144        }
145        DType::Decimal(decimal_type, n) => {
146            match_each_decimal_value_type!(smallest_storage_type(decimal_type), |D| {
147                Box::new(DecimalBuilder::with_capacity::<D>(
148                    capacity,
149                    *decimal_type,
150                    *n,
151                ))
152            })
153        }
154        DType::Utf8(n) => Box::new(VarBinViewBuilder::with_capacity(DType::Utf8(*n), capacity)),
155        DType::Binary(n) => Box::new(VarBinViewBuilder::with_capacity(
156            DType::Binary(*n),
157            capacity,
158        )),
159        DType::Struct(struct_dtype, n) => Box::new(StructBuilder::with_capacity(
160            struct_dtype.clone(),
161            *n,
162            capacity,
163        )),
164        DType::List(dtype, n) => Box::new(ListBuilder::<u64>::with_capacity(
165            dtype.clone(),
166            *n,
167            capacity,
168        )),
169        DType::FixedSizeList(..) => {
170            unimplemented!("TODO(connor)[FixedSizeList]")
171        }
172        DType::Extension(ext_dtype) => {
173            Box::new(ExtensionBuilder::with_capacity(ext_dtype.clone(), capacity))
174        }
175    }
176}
177
178pub trait ArrayBuilderExt: ArrayBuilder {
179    /// A generic function to append a scalar value to the builder.
180    fn append_scalar_value(&mut self, value: ScalarValue) -> VortexResult<()> {
181        if value.is_null() {
182            self.append_null();
183            Ok(())
184        } else {
185            self.append_scalar(&Scalar::new(self.dtype().clone(), value))
186        }
187    }
188
189    /// A generic function to append a scalar to the builder.
190    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
191        if scalar.dtype() != self.dtype() {
192            vortex_bail!(
193                "Builder has dtype {:?}, scalar has {:?}",
194                self.dtype(),
195                scalar.dtype()
196            )
197        }
198        match scalar.dtype() {
199            DType::Null => self
200                .as_any_mut()
201                .downcast_mut::<NullBuilder>()
202                .ok_or_else(|| vortex_err!("Cannot append null scalar to non-null builder"))?
203                .append_null(),
204            DType::Bool(_) => self
205                .as_any_mut()
206                .downcast_mut::<BoolBuilder>()
207                .ok_or_else(|| vortex_err!("Cannot append bool scalar to non-bool builder"))?
208                .append_option(BoolScalar::try_from(scalar)?.value()),
209            DType::Primitive(ptype, ..) => {
210                match_each_native_ptype!(ptype, |P| {
211                    self.as_any_mut()
212                        .downcast_mut::<PrimitiveBuilder<P>>()
213                        .ok_or_else(|| {
214                            vortex_err!("Cannot append primitive scalar to non-primitive builder")
215                        })?
216                        .append_option(PrimitiveScalar::try_from(scalar)?.typed_value::<P>())
217                })
218            }
219            DType::Decimal(..) => {
220                let builder = self
221                    .as_any_mut()
222                    .downcast_mut::<DecimalBuilder>()
223                    .ok_or_else(|| {
224                        vortex_err!("Cannot append decimal scalar to non-decimal builder")
225                    })?;
226                match scalar.as_decimal().decimal_value() {
227                    None => builder.append_null(),
228                    Some(v) => match_each_decimal_value!(v, |dec_val| {
229                        builder.append_value(dec_val);
230                    }),
231                }
232            }
233            DType::Utf8(_) => self
234                .as_any_mut()
235                .downcast_mut::<VarBinViewBuilder>()
236                .ok_or_else(|| vortex_err!("Cannot append utf8 scalar to non-utf8 builder"))?
237                .append_option(Utf8Scalar::try_from(scalar)?.value()),
238            DType::Binary(_) => self
239                .as_any_mut()
240                .downcast_mut::<VarBinViewBuilder>()
241                .ok_or_else(|| vortex_err!("Cannot append binary scalar to non-binary builder"))?
242                .append_option(BinaryScalar::try_from(scalar)?.value()),
243            DType::Struct(..) => self
244                .as_any_mut()
245                .downcast_mut::<StructBuilder>()
246                .ok_or_else(|| vortex_err!("Cannot append struct scalar to non-struct builder"))?
247                .append_value(StructScalar::try_from(scalar)?)?,
248            DType::List(..) => self
249                .as_any_mut()
250                .downcast_mut::<ListBuilder<u64>>()
251                .ok_or_else(|| vortex_err!("Cannot append list scalar to non-list builder"))?
252                .append_value(ListScalar::try_from(scalar)?)?,
253            DType::FixedSizeList(..) => {
254                unimplemented!("TODO(connor)[FixedSizeList]")
255            }
256            DType::Extension(..) => self
257                .as_any_mut()
258                .downcast_mut::<ExtensionBuilder>()
259                .ok_or_else(|| {
260                    vortex_err!("Cannot append extension scalar to non-extension builder")
261                })?
262                .append_value(ExtScalar::try_from(scalar)?)?,
263        }
264        Ok(())
265    }
266}
267
268impl<T: ?Sized + ArrayBuilder> ArrayBuilderExt for T {}