vortex_array/builders/
struct_.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5
6use itertools::Itertools;
7use vortex_dtype::{DType, Nullability, StructFields};
8use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_panic};
9use vortex_mask::Mask;
10use vortex_scalar::StructScalar;
11
12use crate::arrays::StructArray;
13use crate::builders::{
14    ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyNullBufferBuilder, builder_with_capacity,
15};
16use crate::canonical::{Canonical, ToCanonical};
17use crate::{Array, ArrayRef, IntoArray};
18
19/// The builder for building a [`StructArray`].
20pub struct StructBuilder {
21    dtype: DType,
22    builders: Vec<Box<dyn ArrayBuilder>>,
23    nulls: LazyNullBufferBuilder,
24}
25
26impl StructBuilder {
27    /// Creates a new `StructBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
28    pub fn new(struct_dtype: StructFields, nullability: Nullability) -> Self {
29        Self::with_capacity(struct_dtype, nullability, DEFAULT_BUILDER_CAPACITY)
30    }
31
32    /// Creates a new `StructBuilder` with the given `capacity`.
33    pub fn with_capacity(
34        struct_dtype: StructFields,
35        nullability: Nullability,
36        capacity: usize,
37    ) -> Self {
38        let builders = struct_dtype
39            .fields()
40            .map(|dt| builder_with_capacity(&dt, capacity))
41            .collect();
42
43        Self {
44            builders,
45            nulls: LazyNullBufferBuilder::new(capacity),
46            dtype: DType::Struct(struct_dtype, nullability),
47        }
48    }
49
50    /// Appends a struct `value` to the builder.
51    pub fn append_value(&mut self, struct_scalar: StructScalar) -> VortexResult<()> {
52        if !self.dtype.is_nullable() && struct_scalar.is_null() {
53            vortex_bail!("Tried to append a null `StructScalar` to a non-nullable struct builder",);
54        }
55
56        if struct_scalar.struct_fields() != self.struct_fields() {
57            vortex_bail!(
58                "Tried to append a `StructScalar` with fields {} to a \
59                    struct builder with fields {}",
60                struct_scalar.struct_fields(),
61                self.struct_fields()
62            );
63        }
64
65        if let Some(fields) = struct_scalar.fields() {
66            for (builder, field) in self.builders.iter_mut().zip_eq(fields) {
67                builder.append_scalar(&field)?;
68            }
69            self.nulls.append_non_null();
70        } else {
71            self.append_null()
72        }
73
74        Ok(())
75    }
76
77    /// Appends an optional struct value to the builder.
78    ///
79    /// If the value is `Some`, it appends the struct. If the value is `None`, it appends a null.
80    ///
81    /// # Panics
82    ///
83    /// This method will panic if the input is `None` and the builder is non-nullable.
84    pub fn append_option(&mut self, value: Option<StructScalar>) -> VortexResult<()> {
85        match value {
86            Some(value) => self.append_value(value),
87            None => {
88                self.append_null();
89                Ok(())
90            }
91        }
92    }
93
94    /// Finishes the builder directly into a [`StructArray`].
95    pub fn finish_into_struct(&mut self) -> StructArray {
96        let len = self.len();
97        let fields = self
98            .builders
99            .iter_mut()
100            .map(|builder| builder.finish())
101            .collect::<Vec<_>>();
102
103        if fields.len() > 1 {
104            let expected_length = fields[0].len();
105            for (index, field) in fields[1..].iter().enumerate() {
106                assert_eq!(
107                    field.len(),
108                    expected_length,
109                    "Field {index} does not have expected length {expected_length}"
110                );
111            }
112        }
113
114        let validity = self.nulls.finish_with_nullability(self.dtype.nullability());
115
116        StructArray::try_new_with_dtype(fields, self.struct_fields().clone(), len, validity)
117            .vortex_expect("Fields must all have same length.")
118    }
119
120    /// The [`StructFields`] of this struct builder.
121    pub fn struct_fields(&self) -> &StructFields {
122        let DType::Struct(struct_fields, _) = &self.dtype else {
123            vortex_panic!("`StructBuilder` somehow had dtype {}", self.dtype);
124        };
125
126        struct_fields
127    }
128}
129
130impl ArrayBuilder for StructBuilder {
131    fn as_any(&self) -> &dyn Any {
132        self
133    }
134
135    fn as_any_mut(&mut self) -> &mut dyn Any {
136        self
137    }
138
139    fn dtype(&self) -> &DType {
140        &self.dtype
141    }
142
143    fn len(&self) -> usize {
144        self.nulls.len()
145    }
146
147    fn append_zeros(&mut self, n: usize) {
148        self.builders
149            .iter_mut()
150            .for_each(|builder| builder.append_zeros(n));
151        self.nulls.append_n_non_nulls(n);
152    }
153
154    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
155        self.builders
156            .iter_mut()
157            // We push zero values into our children when appending a null in case the children are
158            // themselves non-nullable.
159            .for_each(|builder| builder.append_defaults(n));
160        self.nulls.append_null();
161    }
162
163    unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
164        let array = array.to_struct();
165
166        for (a, builder) in (0..array.struct_fields().nfields())
167            .map(|i| &array.fields()[i])
168            .zip_eq(self.builders.iter_mut())
169        {
170            a.append_to_builder(builder.as_mut());
171        }
172
173        self.nulls.append_validity_mask(array.validity_mask());
174    }
175
176    fn ensure_capacity(&mut self, capacity: usize) {
177        self.builders.iter_mut().for_each(|builder| {
178            builder.ensure_capacity(capacity);
179        });
180        self.nulls.ensure_capacity(capacity);
181    }
182
183    fn set_validity(&mut self, validity: Mask) {
184        self.nulls = LazyNullBufferBuilder::new(validity.len());
185        self.nulls.append_validity_mask(validity);
186    }
187
188    fn finish(&mut self) -> ArrayRef {
189        self.finish_into_struct().into_array()
190    }
191
192    fn finish_into_canonical(&mut self) -> Canonical {
193        Canonical::Struct(self.finish_into_struct())
194    }
195}
196
197#[cfg(test)]
198mod tests {
199    use std::sync::Arc;
200
201    use vortex_dtype::PType::I32;
202    use vortex_dtype::{DType, Nullability, StructFields};
203    use vortex_scalar::Scalar;
204
205    use crate::builders::ArrayBuilder;
206    use crate::builders::struct_::StructBuilder;
207
208    #[test]
209    fn test_struct_builder() {
210        let sdt = StructFields::new(
211            vec![Arc::from("a"), Arc::from("b")].into(),
212            vec![I32.into(), I32.into()],
213        );
214        let dtype = DType::Struct(sdt.clone(), Nullability::NonNullable);
215        let mut builder = StructBuilder::with_capacity(sdt, Nullability::NonNullable, 0);
216
217        builder
218            .append_value(Scalar::struct_(dtype.clone(), vec![1.into(), 2.into()]).as_struct())
219            .unwrap();
220
221        let struct_ = builder.finish();
222        assert_eq!(struct_.len(), 1);
223        assert_eq!(struct_.dtype(), &dtype);
224    }
225
226    #[test]
227    fn test_append_nullable_struct() {
228        let sdt = StructFields::new(
229            vec![Arc::from("a"), Arc::from("b")].into(),
230            vec![I32.into(), I32.into()],
231        );
232        let dtype = DType::Struct(sdt.clone(), Nullability::Nullable);
233        let mut builder = StructBuilder::with_capacity(sdt, Nullability::Nullable, 0);
234
235        builder
236            .append_value(Scalar::struct_(dtype.clone(), vec![1.into(), 2.into()]).as_struct())
237            .unwrap();
238
239        let struct_ = builder.finish();
240        assert_eq!(struct_.len(), 1);
241        assert_eq!(struct_.dtype(), &dtype);
242    }
243}