vortex_array/builders/
mod.rs

1//! Builders for Vortex arrays.
2//!
3//! Every logical type in Vortex has a canonical (uncompressed) in-memory encoding. This module
4//! provides pre-allocated builders to construct new canonical arrays.
5//!
6//! ## Example:
7//!
8//! ```
9//! use vortex_array::builders::{builder_with_capacity, ArrayBuilderExt};
10//! use vortex_array::compute::scalar_at;
11//! use vortex_dtype::{DType, Nullability};
12//!
13//! // Create a new builder for string data.
14//! let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
15//!
16//! builder.append_scalar(&"a".into()).unwrap();
17//! builder.append_scalar(&"b".into()).unwrap();
18//! builder.append_scalar(&"c".into()).unwrap();
19//! builder.append_scalar(&"d".into()).unwrap();
20//!
21//! let strings = builder.finish();
22//!
23//! assert_eq!(scalar_at(&strings, 0).unwrap(), "a".into());
24//! assert_eq!(scalar_at(&strings, 1).unwrap(), "b".into());
25//! assert_eq!(scalar_at(&strings, 2).unwrap(), "c".into());
26//! assert_eq!(scalar_at(&strings, 3).unwrap(), "d".into());
27//! ```
28
29mod bool;
30mod extension;
31mod lazy_validity_builder;
32mod list;
33mod null;
34mod primitive;
35mod struct_;
36mod varbinview;
37
38use std::any::Any;
39
40pub use bool::*;
41pub use extension::*;
42pub use list::*;
43pub use null::*;
44pub use primitive::*;
45pub use varbinview::*;
46use vortex_dtype::{DType, match_each_native_ptype};
47use vortex_error::{VortexResult, vortex_bail, vortex_err};
48use vortex_mask::Mask;
49use vortex_scalar::{
50    BinaryScalar, BoolScalar, ExtScalar, ListScalar, PrimitiveScalar, Scalar, ScalarValue,
51    StructScalar, Utf8Scalar,
52};
53
54use crate::builders::struct_::StructBuilder;
55use crate::{Array, ArrayRef};
56
57pub trait ArrayBuilder: Send {
58    fn as_any(&self) -> &dyn Any;
59
60    fn as_any_mut(&mut self) -> &mut dyn Any;
61
62    fn dtype(&self) -> &DType;
63
64    fn len(&self) -> usize;
65
66    fn is_empty(&self) -> bool {
67        self.len() == 0
68    }
69
70    /// Append a "zero" value to the array.
71    fn append_zero(&mut self) {
72        self.append_zeros(1)
73    }
74
75    /// Appends n "zero" values to the array.
76    fn append_zeros(&mut self, n: usize);
77
78    /// Append a "null" value to the array.
79    fn append_null(&mut self) {
80        self.append_nulls(1)
81    }
82
83    /// Appends n "null" values to the array.
84    fn append_nulls(&mut self, n: usize);
85
86    /// Extends the array with the provided array, canonicalizing if necessary.
87    fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()>;
88
89    /// Ensure that the builder can hold at least `capacity` number of items
90    fn ensure_capacity(&mut self, capacity: usize);
91
92    /// Override builders validity with the one provided
93    fn set_validity(&mut self, validity: Mask);
94
95    /// Constructs an Array from the builder components.
96    ///
97    /// # Panics
98    ///
99    /// This function may panic if the builder's methods are called with invalid arguments. If only
100    /// the methods on this interface are used, the builder should not panic. However, specific
101    /// builders have interfaces that may be misued. For example, if the number of values in a
102    /// [PrimitiveBuilder]'s [vortex_buffer::BufferMut] does not match the number of validity bits,
103    /// the PrimitiveBuilder's [Self::finish] will panic.
104    fn finish(&mut self) -> ArrayRef;
105}
106
107/// Construct a new canonical builder for the given [`DType`].
108///
109///
110/// # Example
111///
112/// ```
113/// use vortex_array::builders::{builder_with_capacity, ArrayBuilderExt};
114/// use vortex_array::compute::scalar_at;
115/// use vortex_dtype::{DType, Nullability};
116///
117/// // Create a new builder for string data.
118/// let mut builder = builder_with_capacity(&DType::Utf8(Nullability::NonNullable), 4);
119///
120/// builder.append_scalar(&"a".into()).unwrap();
121/// builder.append_scalar(&"b".into()).unwrap();
122/// builder.append_scalar(&"c".into()).unwrap();
123/// builder.append_scalar(&"d".into()).unwrap();
124///
125/// let strings = builder.finish();
126///
127/// assert_eq!(scalar_at(&strings, 0).unwrap(), "a".into());
128/// assert_eq!(scalar_at(&strings, 1).unwrap(), "b".into());
129/// assert_eq!(scalar_at(&strings, 2).unwrap(), "c".into());
130/// assert_eq!(scalar_at(&strings, 3).unwrap(), "d".into());
131/// ```
132pub fn builder_with_capacity(dtype: &DType, capacity: usize) -> Box<dyn ArrayBuilder> {
133    match dtype {
134        DType::Null => Box::new(NullBuilder::new()),
135        DType::Bool(n) => Box::new(BoolBuilder::with_capacity(*n, capacity)),
136        DType::Primitive(ptype, n) => {
137            match_each_native_ptype!(ptype, |$P| {
138                Box::new(PrimitiveBuilder::<$P>::with_capacity(*n, capacity))
139            })
140        }
141        DType::Utf8(n) => Box::new(VarBinViewBuilder::with_capacity(DType::Utf8(*n), capacity)),
142        DType::Binary(n) => Box::new(VarBinViewBuilder::with_capacity(
143            DType::Binary(*n),
144            capacity,
145        )),
146        DType::Struct(struct_dtype, n) => Box::new(StructBuilder::with_capacity(
147            struct_dtype.clone(),
148            *n,
149            capacity,
150        )),
151        DType::List(dtype, n) => Box::new(ListBuilder::<u64>::with_capacity(
152            dtype.clone(),
153            *n,
154            capacity,
155        )),
156        DType::Extension(ext_dtype) => {
157            Box::new(ExtensionBuilder::with_capacity(ext_dtype.clone(), capacity))
158        }
159    }
160}
161
162pub trait ArrayBuilderExt: ArrayBuilder {
163    /// A generic function to append a scalar value to the builder.
164    fn append_scalar_value(&mut self, value: ScalarValue) -> VortexResult<()> {
165        if value.is_null() {
166            self.append_null();
167            Ok(())
168        } else {
169            self.append_scalar(&Scalar::new(self.dtype().clone(), value))
170        }
171    }
172
173    /// A generic function to append a scalar to the builder.
174    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
175        if scalar.dtype() != self.dtype() {
176            vortex_bail!(
177                "Builder has dtype {:?}, scalar has {:?}",
178                self.dtype(),
179                scalar.dtype()
180            )
181        }
182        match scalar.dtype() {
183            DType::Null => self
184                .as_any_mut()
185                .downcast_mut::<NullBuilder>()
186                .ok_or_else(|| vortex_err!("Cannot append null scalar to non-null builder"))?
187                .append_null(),
188            DType::Bool(_) => self
189                .as_any_mut()
190                .downcast_mut::<BoolBuilder>()
191                .ok_or_else(|| vortex_err!("Cannot append bool scalar to non-bool builder"))?
192                .append_option(BoolScalar::try_from(scalar)?.value()),
193            DType::Primitive(ptype, ..) => {
194                match_each_native_ptype!(ptype, |$P| {
195                    self
196                    .as_any_mut()
197                    .downcast_mut::<PrimitiveBuilder<$P>>()
198                    .ok_or_else(|| {
199                        vortex_err!("Cannot append primitive scalar to non-primitive builder")
200                    })?
201                    .append_option(PrimitiveScalar::try_from(scalar)?.typed_value::<$P>())
202                })
203            }
204            DType::Utf8(_) => self
205                .as_any_mut()
206                .downcast_mut::<VarBinViewBuilder>()
207                .ok_or_else(|| vortex_err!("Cannot append utf8 scalar to non-utf8 builder"))?
208                .append_option(Utf8Scalar::try_from(scalar)?.value()),
209            DType::Binary(_) => self
210                .as_any_mut()
211                .downcast_mut::<VarBinViewBuilder>()
212                .ok_or_else(|| vortex_err!("Cannot append binary scalar to non-binary builder"))?
213                .append_option(BinaryScalar::try_from(scalar)?.value()),
214            DType::Struct(..) => self
215                .as_any_mut()
216                .downcast_mut::<StructBuilder>()
217                .ok_or_else(|| vortex_err!("Cannot append struct scalar to non-struct builder"))?
218                .append_value(StructScalar::try_from(scalar)?)?,
219            DType::List(..) => self
220                .as_any_mut()
221                .downcast_mut::<ListBuilder<u64>>()
222                .ok_or_else(|| vortex_err!("Cannot append list scalar to non-list builder"))?
223                .append_value(ListScalar::try_from(scalar)?)?,
224            DType::Extension(..) => self
225                .as_any_mut()
226                .downcast_mut::<ExtensionBuilder>()
227                .ok_or_else(|| {
228                    vortex_err!("Cannot append extension scalar to non-extension builder")
229                })?
230                .append_value(ExtScalar::try_from(scalar)?)?,
231        }
232        Ok(())
233    }
234}
235
236impl<T: ?Sized + ArrayBuilder> ArrayBuilderExt for T {}