1use std::any::Any;
5
6use itertools::Itertools;
7use vortex_dtype::{DType, Nullability, StructFields};
8use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_ensure, vortex_panic};
9use vortex_mask::Mask;
10use vortex_scalar::{Scalar, StructScalar};
11
12use crate::arrays::StructArray;
13use crate::builders::{
14 ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyBitBufferBuilder, builder_with_capacity,
15};
16use crate::canonical::{Canonical, ToCanonical};
17use crate::{Array, ArrayRef, IntoArray};
18
19pub struct StructBuilder {
21 dtype: DType,
22 builders: Vec<Box<dyn ArrayBuilder>>,
23 nulls: LazyBitBufferBuilder,
24}
25
26impl StructBuilder {
27 pub fn new(struct_dtype: StructFields, nullability: Nullability) -> Self {
29 Self::with_capacity(struct_dtype, nullability, DEFAULT_BUILDER_CAPACITY)
30 }
31
32 pub fn with_capacity(
34 struct_dtype: StructFields,
35 nullability: Nullability,
36 capacity: usize,
37 ) -> Self {
38 let builders = struct_dtype
39 .fields()
40 .map(|dt| builder_with_capacity(&dt, capacity))
41 .collect();
42
43 Self {
44 builders,
45 nulls: LazyBitBufferBuilder::new(capacity),
46 dtype: DType::Struct(struct_dtype, nullability),
47 }
48 }
49
50 pub fn append_value(&mut self, struct_scalar: StructScalar) -> VortexResult<()> {
52 if !self.dtype.is_nullable() && struct_scalar.is_null() {
53 vortex_bail!("Tried to append a null `StructScalar` to a non-nullable struct builder",);
54 }
55
56 if struct_scalar.struct_fields() != self.struct_fields() {
57 vortex_bail!(
58 "Tried to append a `StructScalar` with fields {} to a \
59 struct builder with fields {}",
60 struct_scalar.struct_fields(),
61 self.struct_fields()
62 );
63 }
64
65 if let Some(fields) = struct_scalar.fields() {
66 for (builder, field) in self.builders.iter_mut().zip_eq(fields) {
67 builder.append_scalar(&field)?;
68 }
69 self.nulls.append_non_null();
70 } else {
71 self.append_null()
72 }
73
74 Ok(())
75 }
76
77 pub fn finish_into_struct(&mut self) -> StructArray {
79 let len = self.len();
80 let fields = self
81 .builders
82 .iter_mut()
83 .map(|builder| builder.finish())
84 .collect::<Vec<_>>();
85
86 if fields.len() > 1 {
87 let expected_length = fields[0].len();
88 for (index, field) in fields[1..].iter().enumerate() {
89 assert_eq!(
90 field.len(),
91 expected_length,
92 "Field {index} does not have expected length {expected_length}"
93 );
94 }
95 }
96
97 let validity = self.nulls.finish_with_nullability(self.dtype.nullability());
98
99 StructArray::try_new_with_dtype(fields, self.struct_fields().clone(), len, validity)
100 .vortex_expect("Fields must all have same length.")
101 }
102
103 pub fn struct_fields(&self) -> &StructFields {
105 let DType::Struct(struct_fields, _) = &self.dtype else {
106 vortex_panic!("`StructBuilder` somehow had dtype {}", self.dtype);
107 };
108
109 struct_fields
110 }
111}
112
113impl ArrayBuilder for StructBuilder {
114 fn as_any(&self) -> &dyn Any {
115 self
116 }
117
118 fn as_any_mut(&mut self) -> &mut dyn Any {
119 self
120 }
121
122 fn dtype(&self) -> &DType {
123 &self.dtype
124 }
125
126 fn len(&self) -> usize {
127 self.nulls.len()
128 }
129
130 fn append_zeros(&mut self, n: usize) {
131 self.builders
132 .iter_mut()
133 .for_each(|builder| builder.append_zeros(n));
134 self.nulls.append_n_non_nulls(n);
135 }
136
137 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
138 self.builders
139 .iter_mut()
140 .for_each(|builder| builder.append_defaults(n));
143 self.nulls.append_null();
144 }
145
146 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
147 vortex_ensure!(
148 scalar.dtype() == self.dtype(),
149 "StructBuilder expected scalar with dtype {:?}, got {:?}",
150 self.dtype(),
151 scalar.dtype()
152 );
153
154 let struct_scalar = StructScalar::try_from(scalar)?;
155 self.append_value(struct_scalar)
156 }
157
158 unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
159 let array = array.to_struct();
160
161 for (a, builder) in array
162 .fields()
163 .iter()
164 .cloned()
165 .zip_eq(self.builders.iter_mut())
166 {
167 a.append_to_builder(builder.as_mut());
168 }
169
170 self.nulls.append_validity_mask(array.validity_mask());
171 }
172
173 fn reserve_exact(&mut self, capacity: usize) {
174 self.builders.iter_mut().for_each(|builder| {
175 builder.reserve_exact(capacity);
176 });
177 self.nulls.reserve_exact(capacity);
178 }
179
180 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
181 self.nulls = LazyBitBufferBuilder::new(validity.len());
182 self.nulls.append_validity_mask(validity);
183 }
184
185 fn finish(&mut self) -> ArrayRef {
186 self.finish_into_struct().into_array()
187 }
188
189 fn finish_into_canonical(&mut self) -> Canonical {
190 Canonical::Struct(self.finish_into_struct())
191 }
192}
193
194#[cfg(test)]
195mod tests {
196 use vortex_dtype::PType::I32;
197 use vortex_dtype::{DType, Nullability, StructFields};
198 use vortex_scalar::Scalar;
199
200 use crate::arrays::{PrimitiveArray, StructArray, VarBinArray};
201 use crate::builders::ArrayBuilder;
202 use crate::builders::struct_::StructBuilder;
203 use crate::validity::Validity;
204 use crate::{IntoArray, assert_arrays_eq};
205
206 #[test]
207 fn test_struct_builder() {
208 let sdt = StructFields::new(["a", "b"].into(), vec![I32.into(), I32.into()]);
209 let dtype = DType::Struct(sdt.clone(), Nullability::NonNullable);
210 let mut builder = StructBuilder::with_capacity(sdt, Nullability::NonNullable, 0);
211
212 builder
213 .append_value(Scalar::struct_(dtype.clone(), vec![1.into(), 2.into()]).as_struct())
214 .unwrap();
215
216 let struct_ = builder.finish();
217 assert_eq!(struct_.len(), 1);
218 assert_eq!(struct_.dtype(), &dtype);
219 }
220
221 #[test]
222 fn test_append_nullable_struct() {
223 let sdt = StructFields::new(["a", "b"].into(), vec![I32.into(), I32.into()]);
224 let dtype = DType::Struct(sdt.clone(), Nullability::Nullable);
225 let mut builder = StructBuilder::with_capacity(sdt, Nullability::Nullable, 0);
226
227 builder
228 .append_value(Scalar::struct_(dtype.clone(), vec![1.into(), 2.into()]).as_struct())
229 .unwrap();
230
231 let struct_ = builder.finish();
232 assert_eq!(struct_.len(), 1);
233 assert_eq!(struct_.dtype(), &dtype);
234 }
235
236 #[test]
237 fn test_append_scalar() {
238 use vortex_scalar::Scalar;
239
240 let dtype = DType::Struct(
241 StructFields::from_iter([
242 ("a", DType::Primitive(I32, Nullability::Nullable)),
243 ("b", DType::Utf8(Nullability::Nullable)),
244 ]),
245 Nullability::Nullable,
246 );
247
248 let struct_fields = match &dtype {
249 DType::Struct(fields, _) => fields.clone(),
250 _ => panic!("Expected struct dtype"),
251 };
252 let mut builder = StructBuilder::new(struct_fields, Nullability::Nullable);
253
254 let struct_scalar1 = Scalar::struct_(
256 dtype.clone(),
257 vec![
258 Scalar::primitive(42i32, Nullability::Nullable),
259 Scalar::utf8("hello", Nullability::Nullable),
260 ],
261 );
262 builder.append_scalar(&struct_scalar1).unwrap();
263
264 let struct_scalar2 = Scalar::struct_(
266 dtype.clone(),
267 vec![
268 Scalar::primitive(84i32, Nullability::Nullable),
269 Scalar::utf8("world", Nullability::Nullable),
270 ],
271 );
272 builder.append_scalar(&struct_scalar2).unwrap();
273
274 let null_scalar = Scalar::null(dtype.clone());
276 builder.append_scalar(&null_scalar).unwrap();
277
278 let array = builder.finish_into_struct();
279
280 let expected = StructArray::try_from_iter_with_validity(
281 [
282 (
283 "a",
284 PrimitiveArray::from_option_iter([Some(42i32), Some(84), Some(123)])
285 .into_array(),
286 ),
287 (
288 "b",
289 <VarBinArray as FromIterator<_>>::from_iter([
290 Some("hello"),
291 Some("world"),
292 Some("x"),
293 ])
294 .into_array(),
295 ),
296 ],
297 Validity::from_iter([true, true, false]),
298 )
299 .unwrap();
300 assert_arrays_eq!(&array, &expected);
301
302 let struct_fields = match &dtype {
304 DType::Struct(fields, _) => fields.clone(),
305 _ => panic!("Expected struct dtype"),
306 };
307 let mut builder = StructBuilder::new(struct_fields, Nullability::NonNullable);
308 let wrong_scalar = Scalar::from(42i32);
309 assert!(builder.append_scalar(&wrong_scalar).is_err());
310 }
311}