Skip to main content

vortex_array/builders/
extension.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5
6use vortex_dtype::DType;
7use vortex_dtype::extension::ExtDTypeRef;
8use vortex_error::VortexResult;
9use vortex_error::vortex_ensure;
10use vortex_mask::Mask;
11
12use crate::Array;
13use crate::ArrayRef;
14use crate::IntoArray;
15use crate::arrays::ExtensionArray;
16use crate::builders::ArrayBuilder;
17use crate::builders::DEFAULT_BUILDER_CAPACITY;
18use crate::builders::builder_with_capacity;
19use crate::canonical::Canonical;
20use crate::canonical::ToCanonical;
21use crate::scalar::ExtScalar;
22use crate::scalar::Scalar;
23
24/// The builder for building a [`ExtensionArray`].
25pub struct ExtensionBuilder {
26    dtype: DType,
27    storage: Box<dyn ArrayBuilder>,
28}
29
30impl ExtensionBuilder {
31    /// Creates a new `ExtensionBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
32    pub fn new(ext_dtype: ExtDTypeRef) -> Self {
33        Self::with_capacity(ext_dtype, DEFAULT_BUILDER_CAPACITY)
34    }
35
36    /// Creates a new `ExtensionBuilder` with the given `capacity`.
37    pub fn with_capacity(ext_dtype: ExtDTypeRef, capacity: usize) -> Self {
38        Self {
39            storage: builder_with_capacity(ext_dtype.storage_dtype(), capacity),
40            dtype: DType::Extension(ext_dtype),
41        }
42    }
43
44    /// Appends an extension `value` to the builder.
45    pub fn append_value(&mut self, value: ExtScalar) -> VortexResult<()> {
46        self.storage.append_scalar(&value.to_storage_scalar())
47    }
48
49    /// Finishes the builder directly into a [`ExtensionArray`].
50    pub fn finish_into_extension(&mut self) -> ExtensionArray {
51        let storage = self.storage.finish();
52        ExtensionArray::new(self.ext_dtype(), storage)
53    }
54
55    /// The [`ExtDType`] of this builder.
56    fn ext_dtype(&self) -> ExtDTypeRef {
57        if let DType::Extension(ext_dtype) = &self.dtype {
58            ext_dtype.clone()
59        } else {
60            unreachable!()
61        }
62    }
63}
64
65impl ArrayBuilder for ExtensionBuilder {
66    fn as_any(&self) -> &dyn Any {
67        self
68    }
69
70    fn as_any_mut(&mut self) -> &mut dyn Any {
71        self
72    }
73
74    fn dtype(&self) -> &DType {
75        &self.dtype
76    }
77
78    fn len(&self) -> usize {
79        self.storage.len()
80    }
81
82    fn append_zeros(&mut self, n: usize) {
83        self.storage.append_zeros(n)
84    }
85
86    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
87        self.storage.append_nulls(n)
88    }
89
90    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
91        vortex_ensure!(
92            scalar.dtype() == self.dtype(),
93            "ExtensionBuilder expected scalar with dtype {}, got {}",
94            self.dtype(),
95            scalar.dtype()
96        );
97
98        self.append_value(scalar.as_extension())
99    }
100
101    unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
102        let ext_array = array.to_extension();
103        self.storage.extend_from_array(ext_array.storage())
104    }
105
106    fn reserve_exact(&mut self, capacity: usize) {
107        self.storage.reserve_exact(capacity)
108    }
109
110    unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
111        unsafe { self.storage.set_validity_unchecked(validity) };
112    }
113
114    fn finish(&mut self) -> ArrayRef {
115        self.finish_into_extension().into_array()
116    }
117
118    fn finish_into_canonical(&mut self) -> Canonical {
119        Canonical::Extension(self.finish_into_extension())
120    }
121}
122
123#[cfg(test)]
124mod tests {
125    use vortex_dtype::Nullability;
126    use vortex_dtype::datetime::Date;
127    use vortex_dtype::datetime::TimeUnit;
128
129    use super::*;
130    use crate::arrays::PrimitiveArray;
131    use crate::assert_arrays_eq;
132    use crate::builders::ArrayBuilder;
133    use crate::scalar::Scalar;
134
135    #[test]
136    fn test_append_scalar() {
137        let ext_dtype = Date::new(TimeUnit::Days, Nullability::Nullable).erased();
138
139        let mut builder = ExtensionBuilder::new(ext_dtype.clone());
140
141        // Test appending a valid extension value.
142        let storage1 = Scalar::from(Some(42i32));
143        let ext_scalar1 = Scalar::extension::<Date>(TimeUnit::Days, storage1);
144        builder.append_scalar(&ext_scalar1).unwrap();
145
146        // Test appending another value.
147        let storage2 = Scalar::from(Some(84i32));
148        let ext_scalar2 = Scalar::extension::<Date>(TimeUnit::Days, storage2);
149        builder.append_scalar(&ext_scalar2).unwrap();
150
151        // Test appending null value.
152        let null_storage = Scalar::null(DType::Primitive(
153            vortex_dtype::PType::I32,
154            Nullability::Nullable,
155        ));
156        let null_scalar = Scalar::extension::<Date>(TimeUnit::Days, null_storage);
157        builder.append_scalar(&null_scalar).unwrap();
158
159        let array = builder.finish_into_extension();
160        let expected = ExtensionArray::new(
161            ext_dtype.clone(),
162            PrimitiveArray::from_option_iter([Some(42i32), Some(84), None]).into_array(),
163        );
164
165        assert_arrays_eq!(&array, &expected);
166        assert_eq!(array.len(), 3);
167
168        // Test wrong dtype error.
169        let mut builder = ExtensionBuilder::new(ext_dtype);
170        let wrong_scalar = Scalar::from(true);
171        assert!(builder.append_scalar(&wrong_scalar).is_err());
172    }
173}