vortex_array/builders/
struct_.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5
6use itertools::Itertools;
7use vortex_dtype::DType;
8use vortex_dtype::Nullability;
9use vortex_dtype::StructFields;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_ensure;
14use vortex_error::vortex_panic;
15use vortex_mask::Mask;
16use vortex_scalar::Scalar;
17use vortex_scalar::StructScalar;
18
19use crate::Array;
20use crate::ArrayRef;
21use crate::IntoArray;
22use crate::arrays::StructArray;
23use crate::builders::ArrayBuilder;
24use crate::builders::DEFAULT_BUILDER_CAPACITY;
25use crate::builders::LazyBitBufferBuilder;
26use crate::builders::builder_with_capacity;
27use crate::canonical::Canonical;
28use crate::canonical::ToCanonical;
29
30/// The builder for building a [`StructArray`].
31pub struct StructBuilder {
32    dtype: DType,
33    builders: Vec<Box<dyn ArrayBuilder>>,
34    nulls: LazyBitBufferBuilder,
35}
36
37impl StructBuilder {
38    /// Creates a new `StructBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
39    pub fn new(struct_dtype: StructFields, nullability: Nullability) -> Self {
40        Self::with_capacity(struct_dtype, nullability, DEFAULT_BUILDER_CAPACITY)
41    }
42
43    /// Creates a new `StructBuilder` with the given `capacity`.
44    pub fn with_capacity(
45        struct_dtype: StructFields,
46        nullability: Nullability,
47        capacity: usize,
48    ) -> Self {
49        let builders = struct_dtype
50            .fields()
51            .map(|dt| builder_with_capacity(&dt, capacity))
52            .collect();
53
54        Self {
55            builders,
56            nulls: LazyBitBufferBuilder::new(capacity),
57            dtype: DType::Struct(struct_dtype, nullability),
58        }
59    }
60
61    /// Appends a struct `value` to the builder.
62    pub fn append_value(&mut self, struct_scalar: StructScalar) -> VortexResult<()> {
63        if !self.dtype.is_nullable() && struct_scalar.is_null() {
64            vortex_bail!("Tried to append a null `StructScalar` to a non-nullable struct builder",);
65        }
66
67        if struct_scalar.struct_fields() != self.struct_fields() {
68            vortex_bail!(
69                "Tried to append a `StructScalar` with fields {} to a \
70                    struct builder with fields {}",
71                struct_scalar.struct_fields(),
72                self.struct_fields()
73            );
74        }
75
76        if let Some(fields) = struct_scalar.fields() {
77            for (builder, field) in self.builders.iter_mut().zip_eq(fields) {
78                builder.append_scalar(&field)?;
79            }
80            self.nulls.append_non_null();
81        } else {
82            self.append_null()
83        }
84
85        Ok(())
86    }
87
88    /// Finishes the builder directly into a [`StructArray`].
89    pub fn finish_into_struct(&mut self) -> StructArray {
90        let len = self.len();
91        let fields = self
92            .builders
93            .iter_mut()
94            .map(|builder| builder.finish())
95            .collect::<Vec<_>>();
96
97        if fields.len() > 1 {
98            let expected_length = fields[0].len();
99            for (index, field) in fields[1..].iter().enumerate() {
100                assert_eq!(
101                    field.len(),
102                    expected_length,
103                    "Field {index} does not have expected length {expected_length}"
104                );
105            }
106        }
107
108        let validity = self.nulls.finish_with_nullability(self.dtype.nullability());
109
110        StructArray::try_new_with_dtype(fields, self.struct_fields().clone(), len, validity)
111            .vortex_expect("Fields must all have same length.")
112    }
113
114    /// The [`StructFields`] of this struct builder.
115    pub fn struct_fields(&self) -> &StructFields {
116        let DType::Struct(struct_fields, _) = &self.dtype else {
117            vortex_panic!("`StructBuilder` somehow had dtype {}", self.dtype);
118        };
119
120        struct_fields
121    }
122}
123
124impl ArrayBuilder for StructBuilder {
125    fn as_any(&self) -> &dyn Any {
126        self
127    }
128
129    fn as_any_mut(&mut self) -> &mut dyn Any {
130        self
131    }
132
133    fn dtype(&self) -> &DType {
134        &self.dtype
135    }
136
137    fn len(&self) -> usize {
138        self.nulls.len()
139    }
140
141    fn append_zeros(&mut self, n: usize) {
142        self.builders
143            .iter_mut()
144            .for_each(|builder| builder.append_zeros(n));
145        self.nulls.append_n_non_nulls(n);
146    }
147
148    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
149        self.builders
150            .iter_mut()
151            // We push zero values into our children when appending a null in case the children are
152            // themselves non-nullable.
153            .for_each(|builder| builder.append_defaults(n));
154        self.nulls.append_null();
155    }
156
157    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
158        vortex_ensure!(
159            scalar.dtype() == self.dtype(),
160            "StructBuilder expected scalar with dtype {:?}, got {:?}",
161            self.dtype(),
162            scalar.dtype()
163        );
164
165        let struct_scalar = StructScalar::try_from(scalar)?;
166        self.append_value(struct_scalar)
167    }
168
169    unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
170        let array = array.to_struct();
171
172        for (a, builder) in array
173            .fields()
174            .iter()
175            .cloned()
176            .zip_eq(self.builders.iter_mut())
177        {
178            a.append_to_builder(builder.as_mut());
179        }
180
181        self.nulls.append_validity_mask(array.validity_mask());
182    }
183
184    fn reserve_exact(&mut self, capacity: usize) {
185        self.builders.iter_mut().for_each(|builder| {
186            builder.reserve_exact(capacity);
187        });
188        self.nulls.reserve_exact(capacity);
189    }
190
191    unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
192        self.nulls = LazyBitBufferBuilder::new(validity.len());
193        self.nulls.append_validity_mask(validity);
194    }
195
196    fn finish(&mut self) -> ArrayRef {
197        self.finish_into_struct().into_array()
198    }
199
200    fn finish_into_canonical(&mut self) -> Canonical {
201        Canonical::Struct(self.finish_into_struct())
202    }
203}
204
205#[cfg(test)]
206mod tests {
207    use vortex_dtype::DType;
208    use vortex_dtype::Nullability;
209    use vortex_dtype::PType::I32;
210    use vortex_dtype::StructFields;
211    use vortex_scalar::Scalar;
212
213    use crate::IntoArray;
214    use crate::arrays::PrimitiveArray;
215    use crate::arrays::StructArray;
216    use crate::arrays::VarBinArray;
217    use crate::assert_arrays_eq;
218    use crate::builders::ArrayBuilder;
219    use crate::builders::struct_::StructBuilder;
220    use crate::validity::Validity;
221
222    #[test]
223    fn test_struct_builder() {
224        let sdt = StructFields::new(["a", "b"].into(), vec![I32.into(), I32.into()]);
225        let dtype = DType::Struct(sdt.clone(), Nullability::NonNullable);
226        let mut builder = StructBuilder::with_capacity(sdt, Nullability::NonNullable, 0);
227
228        builder
229            .append_value(Scalar::struct_(dtype.clone(), vec![1.into(), 2.into()]).as_struct())
230            .unwrap();
231
232        let struct_ = builder.finish();
233        assert_eq!(struct_.len(), 1);
234        assert_eq!(struct_.dtype(), &dtype);
235    }
236
237    #[test]
238    fn test_append_nullable_struct() {
239        let sdt = StructFields::new(["a", "b"].into(), vec![I32.into(), I32.into()]);
240        let dtype = DType::Struct(sdt.clone(), Nullability::Nullable);
241        let mut builder = StructBuilder::with_capacity(sdt, Nullability::Nullable, 0);
242
243        builder
244            .append_value(Scalar::struct_(dtype.clone(), vec![1.into(), 2.into()]).as_struct())
245            .unwrap();
246
247        let struct_ = builder.finish();
248        assert_eq!(struct_.len(), 1);
249        assert_eq!(struct_.dtype(), &dtype);
250    }
251
252    #[test]
253    fn test_append_scalar() {
254        use vortex_scalar::Scalar;
255
256        let dtype = DType::Struct(
257            StructFields::from_iter([
258                ("a", DType::Primitive(I32, Nullability::Nullable)),
259                ("b", DType::Utf8(Nullability::Nullable)),
260            ]),
261            Nullability::Nullable,
262        );
263
264        let struct_fields = match &dtype {
265            DType::Struct(fields, _) => fields.clone(),
266            _ => panic!("Expected struct dtype"),
267        };
268        let mut builder = StructBuilder::new(struct_fields, Nullability::Nullable);
269
270        // Test appending a valid struct value.
271        let struct_scalar1 = Scalar::struct_(
272            dtype.clone(),
273            vec![
274                Scalar::primitive(42i32, Nullability::Nullable),
275                Scalar::utf8("hello", Nullability::Nullable),
276            ],
277        );
278        builder.append_scalar(&struct_scalar1).unwrap();
279
280        // Test appending another struct value.
281        let struct_scalar2 = Scalar::struct_(
282            dtype.clone(),
283            vec![
284                Scalar::primitive(84i32, Nullability::Nullable),
285                Scalar::utf8("world", Nullability::Nullable),
286            ],
287        );
288        builder.append_scalar(&struct_scalar2).unwrap();
289
290        // Test appending null value.
291        let null_scalar = Scalar::null(dtype.clone());
292        builder.append_scalar(&null_scalar).unwrap();
293
294        let array = builder.finish_into_struct();
295
296        let expected = StructArray::try_from_iter_with_validity(
297            [
298                (
299                    "a",
300                    PrimitiveArray::from_option_iter([Some(42i32), Some(84), Some(123)])
301                        .into_array(),
302                ),
303                (
304                    "b",
305                    <VarBinArray as FromIterator<_>>::from_iter([
306                        Some("hello"),
307                        Some("world"),
308                        Some("x"),
309                    ])
310                    .into_array(),
311                ),
312            ],
313            Validity::from_iter([true, true, false]),
314        )
315        .unwrap();
316        assert_arrays_eq!(&array, &expected);
317
318        // Test wrong dtype error.
319        let struct_fields = match &dtype {
320            DType::Struct(fields, _) => fields.clone(),
321            _ => panic!("Expected struct dtype"),
322        };
323        let mut builder = StructBuilder::new(struct_fields, Nullability::NonNullable);
324        let wrong_scalar = Scalar::from(42i32);
325        assert!(builder.append_scalar(&wrong_scalar).is_err());
326    }
327}