polars_arrow/array/list/
builder.rs

1use polars_utils::IdxSize;
2
3use super::ListArray;
4use crate::array::builder::{ArrayBuilder, ShareStrategy, StaticArrayBuilder};
5use crate::bitmap::OptBitmapBuilder;
6use crate::datatypes::ArrowDataType;
7use crate::offset::{Offset, Offsets, OffsetsBuffer};
8
9pub struct ListArrayBuilder<O: Offset, B: ArrayBuilder> {
10    dtype: ArrowDataType,
11    offsets: Offsets<O>,
12    inner_builder: B,
13    validity: OptBitmapBuilder,
14}
15
16impl<O: Offset, B: ArrayBuilder> ListArrayBuilder<O, B> {
17    pub fn new(dtype: ArrowDataType, inner_builder: B) -> Self {
18        Self {
19            dtype,
20            inner_builder,
21            offsets: Offsets::new(),
22            validity: OptBitmapBuilder::default(),
23        }
24    }
25}
26
27impl<O: Offset, B: ArrayBuilder> StaticArrayBuilder for ListArrayBuilder<O, B> {
28    type Array = ListArray<O>;
29
30    fn dtype(&self) -> &ArrowDataType {
31        &self.dtype
32    }
33
34    fn reserve(&mut self, additional: usize) {
35        self.offsets.reserve(additional);
36        self.validity.reserve(additional);
37        // No inner reserve, we have no idea how large it needs to be.
38    }
39
40    fn freeze(self) -> ListArray<O> {
41        let offsets = OffsetsBuffer::from(self.offsets);
42        let values = self.inner_builder.freeze();
43        let validity = self.validity.into_opt_validity();
44        ListArray::new(self.dtype, offsets, values, validity)
45    }
46
47    fn freeze_reset(&mut self) -> Self::Array {
48        let offsets = OffsetsBuffer::from(core::mem::take(&mut self.offsets));
49        let values = self.inner_builder.freeze_reset();
50        let validity = core::mem::take(&mut self.validity).into_opt_validity();
51        ListArray::new(self.dtype.clone(), offsets, values, validity)
52    }
53
54    fn len(&self) -> usize {
55        self.offsets.len_proxy()
56    }
57
58    fn extend_nulls(&mut self, length: usize) {
59        self.offsets.extend_constant(length);
60        self.validity.extend_constant(length, false);
61    }
62
63    fn subslice_extend(
64        &mut self,
65        other: &ListArray<O>,
66        start: usize,
67        length: usize,
68        share: ShareStrategy,
69    ) {
70        let start_offset = other.offsets()[start].to_usize();
71        let stop_offset = other.offsets()[start + length].to_usize();
72        self.offsets
73            .try_extend_from_slice(other.offsets(), start, length)
74            .unwrap();
75        self.inner_builder.subslice_extend(
76            &**other.values(),
77            start_offset,
78            stop_offset - start_offset,
79            share,
80        );
81        self.validity
82            .subslice_extend_from_opt_validity(other.validity(), start, length);
83    }
84
85    fn subslice_extend_each_repeated(
86        &mut self,
87        other: &ListArray<O>,
88        start: usize,
89        length: usize,
90        repeats: usize,
91        share: ShareStrategy,
92    ) {
93        let other_offsets = other.offsets();
94        let other_values = &**other.values();
95
96        let start_offset = other.offsets()[start].to_usize();
97        let stop_offset = other.offsets()[start + length].to_usize();
98        self.offsets.reserve(length * repeats);
99        self.inner_builder
100            .reserve((stop_offset - start_offset) * repeats);
101        for offset_idx in start..start + length {
102            let sublist_start = other_offsets[offset_idx].to_usize();
103            let sublist_stop = other_offsets[offset_idx + 1].to_usize();
104            for _ in 0..repeats {
105                self.offsets.try_push(sublist_stop - sublist_start).unwrap();
106            }
107            self.inner_builder.subslice_extend_repeated(
108                other_values,
109                sublist_start,
110                sublist_stop - sublist_start,
111                repeats,
112                share,
113            );
114        }
115        self.validity
116            .subslice_extend_each_repeated_from_opt_validity(
117                other.validity(),
118                start,
119                length,
120                repeats,
121            );
122    }
123
124    unsafe fn gather_extend(
125        &mut self,
126        other: &ListArray<O>,
127        idxs: &[IdxSize],
128        share: ShareStrategy,
129    ) {
130        let other_values = &**other.values();
131        let other_offsets = other.offsets();
132
133        // Pre-compute proper length for reserve.
134        let total_len: usize = idxs
135            .iter()
136            .map(|i| {
137                let start = other_offsets.get_unchecked(*i as usize).to_usize();
138                let stop = other_offsets.get_unchecked(*i as usize + 1).to_usize();
139                stop - start
140            })
141            .sum();
142        self.inner_builder.reserve(total_len);
143
144        // Group consecutive indices into larger copies.
145        let mut group_start = 0;
146        while group_start < idxs.len() {
147            let start_idx = idxs[group_start] as usize;
148            let mut group_len = 1;
149            while group_start + group_len < idxs.len()
150                && idxs[group_start + group_len] as usize == start_idx + group_len
151            {
152                group_len += 1;
153            }
154
155            let start_offset = other_offsets.get_unchecked(start_idx).to_usize();
156            let stop_offset = other_offsets
157                .get_unchecked(start_idx + group_len)
158                .to_usize();
159            self.offsets
160                .try_extend_from_slice(other_offsets, start_idx, group_len)
161                .unwrap();
162            self.inner_builder.subslice_extend(
163                other_values,
164                start_offset,
165                stop_offset - start_offset,
166                share,
167            );
168            group_start += group_len;
169        }
170
171        self.validity
172            .gather_extend_from_opt_validity(other.validity(), idxs);
173    }
174
175    fn opt_gather_extend(&mut self, other: &ListArray<O>, idxs: &[IdxSize], share: ShareStrategy) {
176        let other_values = &**other.values();
177        let other_offsets = other.offsets();
178
179        unsafe {
180            // Pre-compute proper length for reserve.
181            let total_len: usize = idxs
182                .iter()
183                .map(|idx| {
184                    if (*idx as usize) < other.len() {
185                        let start = other_offsets.get_unchecked(*idx as usize).to_usize();
186                        let stop = other_offsets.get_unchecked(*idx as usize + 1).to_usize();
187                        stop - start
188                    } else {
189                        0
190                    }
191                })
192                .sum();
193            self.inner_builder.reserve(total_len);
194
195            // Group consecutive indices into larger copies.
196            let mut group_start = 0;
197            while group_start < idxs.len() {
198                let start_idx = idxs[group_start] as usize;
199                let mut group_len = 1;
200                let in_bounds = start_idx < other.len();
201
202                if in_bounds {
203                    while group_start + group_len < idxs.len()
204                        && idxs[group_start + group_len] as usize == start_idx + group_len
205                        && start_idx + group_len < other.len()
206                    {
207                        group_len += 1;
208                    }
209
210                    let start_offset = other_offsets.get_unchecked(start_idx).to_usize();
211                    let stop_offset = other_offsets
212                        .get_unchecked(start_idx + group_len)
213                        .to_usize();
214                    self.offsets
215                        .try_extend_from_slice(other_offsets, start_idx, group_len)
216                        .unwrap();
217                    self.inner_builder.subslice_extend(
218                        other_values,
219                        start_offset,
220                        stop_offset - start_offset,
221                        share,
222                    );
223                } else {
224                    while group_start + group_len < idxs.len()
225                        && idxs[group_start + group_len] as usize >= other.len()
226                    {
227                        group_len += 1;
228                    }
229                    self.offsets.extend_constant(group_len);
230                }
231                group_start += group_len;
232            }
233
234            self.validity
235                .opt_gather_extend_from_opt_validity(other.validity(), idxs, other.len());
236        }
237    }
238}