polars_arrow/array/
builder.rs

1use polars_utils::IdxSize;
2
3use crate::array::binary::BinaryArrayBuilder;
4use crate::array::binview::BinaryViewArrayGenericBuilder;
5use crate::array::boolean::BooleanArrayBuilder;
6use crate::array::fixed_size_binary::FixedSizeBinaryArrayBuilder;
7use crate::array::fixed_size_list::FixedSizeListArrayBuilder;
8use crate::array::list::ListArrayBuilder;
9use crate::array::null::NullArrayBuilder;
10use crate::array::struct_::StructArrayBuilder;
11use crate::array::{Array, PrimitiveArrayBuilder};
12use crate::datatypes::{ArrowDataType, PhysicalType};
13use crate::with_match_primitive_type_full;
14
15/// Used for arrays which can share buffers with input arrays to appends,
16/// gathers, etc.
17#[derive(Copy, Clone, Debug)]
18pub enum ShareStrategy {
19    Never,
20    Always,
21}
22
23pub trait StaticArrayBuilder: Send {
24    type Array: Array;
25
26    fn dtype(&self) -> &ArrowDataType;
27    fn reserve(&mut self, additional: usize);
28
29    /// Consume this builder returning the built array.
30    fn freeze(self) -> Self::Array;
31
32    /// Return the built array and reset to an empty state.
33    fn freeze_reset(&mut self) -> Self::Array;
34
35    /// Returns the length of this builder (so far).
36    fn len(&self) -> usize;
37
38    /// Extend this builder with the given number of null elements.
39    fn extend_nulls(&mut self, length: usize);
40
41    /// Extends this builder with the contents of the given array. May panic if
42    /// other does not match the dtype of this array.
43    fn extend(&mut self, other: &Self::Array, share: ShareStrategy) {
44        self.subslice_extend(other, 0, other.len(), share);
45    }
46
47    /// Extends this builder with the contents of the given array subslice. May
48    /// panic if other does not match the dtype of this array.
49    fn subslice_extend(
50        &mut self,
51        other: &Self::Array,
52        start: usize,
53        length: usize,
54        share: ShareStrategy,
55    );
56
57    /// The same as subslice_extend, but repeats the extension `repeats` times.
58    fn subslice_extend_repeated(
59        &mut self,
60        other: &Self::Array,
61        start: usize,
62        length: usize,
63        repeats: usize,
64        share: ShareStrategy,
65    ) {
66        self.reserve(length * repeats);
67        for _ in 0..repeats {
68            self.subslice_extend(other, start, length, share)
69        }
70    }
71
72    /// The same as subslice_extend, but repeats each element `repeats` times.
73    fn subslice_extend_each_repeated(
74        &mut self,
75        other: &Self::Array,
76        start: usize,
77        length: usize,
78        repeats: usize,
79        share: ShareStrategy,
80    );
81
82    /// Extends this builder with the contents of the given array at the given
83    /// indices. That is, `other[idxs[i]]` is appended to this array in order,
84    /// for each i=0..idxs.len(). May panic if other does not match the
85    /// dtype of this array.
86    ///
87    /// # Safety
88    /// The indices must be in-bounds.
89    unsafe fn gather_extend(&mut self, other: &Self::Array, idxs: &[IdxSize], share: ShareStrategy);
90
91    /// Extends this builder with the contents of the given array at the given
92    /// indices. That is, `other[idxs[i]]` is appended to this array in order,
93    /// for each i=0..idxs.len(). May panic if other does not match the
94    /// dtype of this array. Out-of-bounds indices are mapped to nulls.
95    fn opt_gather_extend(&mut self, other: &Self::Array, idxs: &[IdxSize], share: ShareStrategy);
96}
97
98impl<T: StaticArrayBuilder> ArrayBuilder for T {
99    #[inline(always)]
100    fn dtype(&self) -> &ArrowDataType {
101        StaticArrayBuilder::dtype(self)
102    }
103
104    #[inline(always)]
105    fn reserve(&mut self, additional: usize) {
106        StaticArrayBuilder::reserve(self, additional)
107    }
108
109    #[inline(always)]
110    fn freeze(self) -> Box<dyn Array> {
111        Box::new(StaticArrayBuilder::freeze(self))
112    }
113
114    #[inline(always)]
115    fn freeze_reset(&mut self) -> Box<dyn Array> {
116        Box::new(StaticArrayBuilder::freeze_reset(self))
117    }
118
119    #[inline(always)]
120    fn len(&self) -> usize {
121        StaticArrayBuilder::len(self)
122    }
123
124    #[inline(always)]
125    fn extend_nulls(&mut self, length: usize) {
126        StaticArrayBuilder::extend_nulls(self, length);
127    }
128
129    #[inline(always)]
130    fn subslice_extend(
131        &mut self,
132        other: &dyn Array,
133        start: usize,
134        length: usize,
135        share: ShareStrategy,
136    ) {
137        let other: &T::Array = other.as_any().downcast_ref().unwrap();
138        StaticArrayBuilder::subslice_extend(self, other, start, length, share);
139    }
140
141    #[inline(always)]
142    fn subslice_extend_repeated(
143        &mut self,
144        other: &dyn Array,
145        start: usize,
146        length: usize,
147        repeats: usize,
148        share: ShareStrategy,
149    ) {
150        let other: &T::Array = other.as_any().downcast_ref().unwrap();
151        StaticArrayBuilder::subslice_extend_repeated(self, other, start, length, repeats, share);
152    }
153
154    #[inline(always)]
155    fn subslice_extend_each_repeated(
156        &mut self,
157        other: &dyn Array,
158        start: usize,
159        length: usize,
160        repeats: usize,
161        share: ShareStrategy,
162    ) {
163        let other: &T::Array = other.as_any().downcast_ref().unwrap();
164        StaticArrayBuilder::subslice_extend_each_repeated(
165            self, other, start, length, repeats, share,
166        );
167    }
168
169    #[inline(always)]
170    unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy) {
171        let other: &T::Array = other.as_any().downcast_ref().unwrap();
172        StaticArrayBuilder::gather_extend(self, other, idxs, share);
173    }
174
175    #[inline(always)]
176    fn opt_gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy) {
177        let other: &T::Array = other.as_any().downcast_ref().unwrap();
178        StaticArrayBuilder::opt_gather_extend(self, other, idxs, share);
179    }
180}
181
182#[allow(private_bounds)]
183pub trait ArrayBuilder: ArrayBuilderBoxedHelper + Send {
184    fn dtype(&self) -> &ArrowDataType;
185    fn reserve(&mut self, additional: usize);
186
187    /// Consume this builder returning the built array.
188    fn freeze(self) -> Box<dyn Array>;
189
190    /// Return the built array and reset to an empty state.
191    fn freeze_reset(&mut self) -> Box<dyn Array>;
192
193    /// Returns the length of this builder (so far).
194    fn len(&self) -> usize;
195
196    /// Extend this builder with the given number of null elements.
197    fn extend_nulls(&mut self, length: usize);
198
199    /// Extends this builder with the contents of the given array. May panic if
200    /// other does not match the dtype of this array.
201    fn extend(&mut self, other: &dyn Array, share: ShareStrategy) {
202        self.subslice_extend(other, 0, other.len(), share);
203    }
204
205    /// Extends this builder with the contents of the given array subslice. May
206    /// panic if other does not match the dtype of this array.
207    fn subslice_extend(
208        &mut self,
209        other: &dyn Array,
210        start: usize,
211        length: usize,
212        share: ShareStrategy,
213    );
214
215    /// The same as subslice_extend, but repeats the extension `repeats` times.
216    fn subslice_extend_repeated(
217        &mut self,
218        other: &dyn Array,
219        start: usize,
220        length: usize,
221        repeats: usize,
222        share: ShareStrategy,
223    );
224
225    /// The same as subslice_extend, but repeats each element `repeats` times.
226    fn subslice_extend_each_repeated(
227        &mut self,
228        other: &dyn Array,
229        start: usize,
230        length: usize,
231        repeats: usize,
232        share: ShareStrategy,
233    );
234
235    /// Extends this builder with the contents of the given array at the given
236    /// indices. That is, `other[idxs[i]]` is appended to this array in order,
237    /// for each i=0..idxs.len(). May panic if other does not match the
238    /// dtype of this array.
239    ///
240    /// # Safety
241    /// The indices must be in-bounds.
242    unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy);
243
244    /// Extends this builder with the contents of the given array at the given
245    /// indices. That is, `other[idxs[i]]` is appended to this array in order,
246    /// for each i=0..idxs.len(). May panic if other does not match the
247    /// dtype of this array. Out-of-bounds indices are mapped to nulls.
248    fn opt_gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy);
249}
250
251/// A hack that lets us call the consuming `freeze` method on Box<dyn ArrayBuilder>.
252trait ArrayBuilderBoxedHelper {
253    fn freeze_boxed(self: Box<Self>) -> Box<dyn Array>;
254}
255
256impl<T: ArrayBuilder> ArrayBuilderBoxedHelper for T {
257    fn freeze_boxed(self: Box<Self>) -> Box<dyn Array> {
258        self.freeze()
259    }
260}
261
262impl ArrayBuilder for Box<dyn ArrayBuilder> {
263    #[inline(always)]
264    fn dtype(&self) -> &ArrowDataType {
265        (**self).dtype()
266    }
267
268    #[inline(always)]
269    fn reserve(&mut self, additional: usize) {
270        (**self).reserve(additional)
271    }
272
273    #[inline(always)]
274    fn freeze(self) -> Box<dyn Array> {
275        self.freeze_boxed()
276    }
277
278    #[inline(always)]
279    fn freeze_reset(&mut self) -> Box<dyn Array> {
280        (**self).freeze_reset()
281    }
282
283    #[inline(always)]
284    fn len(&self) -> usize {
285        (**self).len()
286    }
287
288    #[inline(always)]
289    fn extend_nulls(&mut self, length: usize) {
290        (**self).extend_nulls(length);
291    }
292
293    #[inline(always)]
294    fn subslice_extend(
295        &mut self,
296        other: &dyn Array,
297        start: usize,
298        length: usize,
299        share: ShareStrategy,
300    ) {
301        (**self).subslice_extend(other, start, length, share);
302    }
303
304    #[inline(always)]
305    fn subslice_extend_repeated(
306        &mut self,
307        other: &dyn Array,
308        start: usize,
309        length: usize,
310        repeats: usize,
311        share: ShareStrategy,
312    ) {
313        (**self).subslice_extend_repeated(other, start, length, repeats, share);
314    }
315
316    #[inline(always)]
317    fn subslice_extend_each_repeated(
318        &mut self,
319        other: &dyn Array,
320        start: usize,
321        length: usize,
322        repeats: usize,
323        share: ShareStrategy,
324    ) {
325        (**self).subslice_extend_each_repeated(other, start, length, repeats, share);
326    }
327
328    #[inline(always)]
329    unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy) {
330        (**self).gather_extend(other, idxs, share);
331    }
332
333    #[inline(always)]
334    fn opt_gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy) {
335        (**self).opt_gather_extend(other, idxs, share);
336    }
337}
338
339/// Construct an ArrayBuilder for the given type.
340pub fn make_builder(dtype: &ArrowDataType) -> Box<dyn ArrayBuilder> {
341    use PhysicalType::*;
342    match dtype.to_physical_type() {
343        Null => Box::new(NullArrayBuilder::new(dtype.clone())),
344        Boolean => Box::new(BooleanArrayBuilder::new(dtype.clone())),
345        Primitive(prim_t) => with_match_primitive_type_full!(prim_t, |$T| {
346            Box::new(PrimitiveArrayBuilder::<$T>::new(dtype.clone()))
347        }),
348        LargeBinary => Box::new(BinaryArrayBuilder::<i64>::new(dtype.clone())),
349        FixedSizeBinary => Box::new(FixedSizeBinaryArrayBuilder::new(dtype.clone())),
350        LargeList => {
351            let ArrowDataType::LargeList(inner_dt) = dtype else {
352                unreachable!()
353            };
354            Box::new(ListArrayBuilder::<i64, _>::new(
355                dtype.clone(),
356                make_builder(inner_dt.dtype()),
357            ))
358        },
359        FixedSizeList => {
360            let ArrowDataType::FixedSizeList(inner_dt, _) = dtype else {
361                unreachable!()
362            };
363            Box::new(FixedSizeListArrayBuilder::new(
364                dtype.clone(),
365                make_builder(inner_dt.dtype()),
366            ))
367        },
368        Struct => {
369            let ArrowDataType::Struct(fields) = dtype else {
370                unreachable!()
371            };
372            let builders = fields.iter().map(|f| make_builder(f.dtype())).collect();
373            Box::new(StructArrayBuilder::new(dtype.clone(), builders))
374        },
375        BinaryView => Box::new(BinaryViewArrayGenericBuilder::<[u8]>::new(dtype.clone())),
376        Utf8View => Box::new(BinaryViewArrayGenericBuilder::<str>::new(dtype.clone())),
377
378        List | Binary | Utf8 | LargeUtf8 | Map | Union | Dictionary(_) => {
379            unimplemented!()
380        },
381    }
382}