polars_arrow/array/list/
builder.rs1use polars_utils::IdxSize;
2
3use super::ListArray;
4use crate::array::builder::{ArrayBuilder, ShareStrategy, StaticArrayBuilder};
5use crate::bitmap::OptBitmapBuilder;
6use crate::datatypes::ArrowDataType;
7use crate::offset::{Offset, Offsets, OffsetsBuffer};
8
9pub struct ListArrayBuilder<O: Offset, B: ArrayBuilder> {
10 dtype: ArrowDataType,
11 offsets: Offsets<O>,
12 inner_builder: B,
13 validity: OptBitmapBuilder,
14}
15
16impl<O: Offset, B: ArrayBuilder> ListArrayBuilder<O, B> {
17 pub fn new(dtype: ArrowDataType, inner_builder: B) -> Self {
18 Self {
19 dtype,
20 inner_builder,
21 offsets: Offsets::new(),
22 validity: OptBitmapBuilder::default(),
23 }
24 }
25}
26
27impl<O: Offset, B: ArrayBuilder> StaticArrayBuilder for ListArrayBuilder<O, B> {
28 type Array = ListArray<O>;
29
30 fn dtype(&self) -> &ArrowDataType {
31 &self.dtype
32 }
33
34 fn reserve(&mut self, additional: usize) {
35 self.offsets.reserve(additional);
36 self.validity.reserve(additional);
37 }
39
40 fn freeze(self) -> ListArray<O> {
41 let offsets = OffsetsBuffer::from(self.offsets);
42 let values = self.inner_builder.freeze();
43 let validity = self.validity.into_opt_validity();
44 ListArray::new(self.dtype, offsets, values, validity)
45 }
46
47 fn freeze_reset(&mut self) -> Self::Array {
48 let offsets = OffsetsBuffer::from(core::mem::take(&mut self.offsets));
49 let values = self.inner_builder.freeze_reset();
50 let validity = core::mem::take(&mut self.validity).into_opt_validity();
51 ListArray::new(self.dtype.clone(), offsets, values, validity)
52 }
53
54 fn len(&self) -> usize {
55 self.offsets.len_proxy()
56 }
57
58 fn extend_nulls(&mut self, length: usize) {
59 self.offsets.extend_constant(length);
60 self.validity.extend_constant(length, false);
61 }
62
63 fn subslice_extend(
64 &mut self,
65 other: &ListArray<O>,
66 start: usize,
67 length: usize,
68 share: ShareStrategy,
69 ) {
70 let start_offset = other.offsets()[start].to_usize();
71 let stop_offset = other.offsets()[start + length].to_usize();
72 self.offsets
73 .try_extend_from_slice(other.offsets(), start, length)
74 .unwrap();
75 self.inner_builder.subslice_extend(
76 &**other.values(),
77 start_offset,
78 stop_offset - start_offset,
79 share,
80 );
81 self.validity
82 .subslice_extend_from_opt_validity(other.validity(), start, length);
83 }
84
85 fn subslice_extend_each_repeated(
86 &mut self,
87 other: &ListArray<O>,
88 start: usize,
89 length: usize,
90 repeats: usize,
91 share: ShareStrategy,
92 ) {
93 let other_offsets = other.offsets();
94 let other_values = &**other.values();
95
96 let start_offset = other.offsets()[start].to_usize();
97 let stop_offset = other.offsets()[start + length].to_usize();
98 self.offsets.reserve(length * repeats);
99 self.inner_builder
100 .reserve((stop_offset - start_offset) * repeats);
101 for offset_idx in start..start + length {
102 let sublist_start = other_offsets[offset_idx].to_usize();
103 let sublist_stop = other_offsets[offset_idx + 1].to_usize();
104 for _ in 0..repeats {
105 self.offsets.try_push(sublist_stop - sublist_start).unwrap();
106 }
107 self.inner_builder.subslice_extend_repeated(
108 other_values,
109 sublist_start,
110 sublist_stop - sublist_start,
111 repeats,
112 share,
113 );
114 }
115 self.validity
116 .subslice_extend_each_repeated_from_opt_validity(
117 other.validity(),
118 start,
119 length,
120 repeats,
121 );
122 }
123
124 unsafe fn gather_extend(
125 &mut self,
126 other: &ListArray<O>,
127 idxs: &[IdxSize],
128 share: ShareStrategy,
129 ) {
130 let other_values = &**other.values();
131 let other_offsets = other.offsets();
132
133 let total_len: usize = idxs
135 .iter()
136 .map(|i| {
137 let start = other_offsets.get_unchecked(*i as usize).to_usize();
138 let stop = other_offsets.get_unchecked(*i as usize + 1).to_usize();
139 stop - start
140 })
141 .sum();
142 self.inner_builder.reserve(total_len);
143
144 let mut group_start = 0;
146 while group_start < idxs.len() {
147 let start_idx = idxs[group_start] as usize;
148 let mut group_len = 1;
149 while group_start + group_len < idxs.len()
150 && idxs[group_start + group_len] as usize == start_idx + group_len
151 {
152 group_len += 1;
153 }
154
155 let start_offset = other_offsets.get_unchecked(start_idx).to_usize();
156 let stop_offset = other_offsets
157 .get_unchecked(start_idx + group_len)
158 .to_usize();
159 self.offsets
160 .try_extend_from_slice(other_offsets, start_idx, group_len)
161 .unwrap();
162 self.inner_builder.subslice_extend(
163 other_values,
164 start_offset,
165 stop_offset - start_offset,
166 share,
167 );
168 group_start += group_len;
169 }
170
171 self.validity
172 .gather_extend_from_opt_validity(other.validity(), idxs);
173 }
174
175 fn opt_gather_extend(&mut self, other: &ListArray<O>, idxs: &[IdxSize], share: ShareStrategy) {
176 let other_values = &**other.values();
177 let other_offsets = other.offsets();
178
179 unsafe {
180 let total_len: usize = idxs
182 .iter()
183 .map(|idx| {
184 if (*idx as usize) < other.len() {
185 let start = other_offsets.get_unchecked(*idx as usize).to_usize();
186 let stop = other_offsets.get_unchecked(*idx as usize + 1).to_usize();
187 stop - start
188 } else {
189 0
190 }
191 })
192 .sum();
193 self.inner_builder.reserve(total_len);
194
195 let mut group_start = 0;
197 while group_start < idxs.len() {
198 let start_idx = idxs[group_start] as usize;
199 let mut group_len = 1;
200 let in_bounds = start_idx < other.len();
201
202 if in_bounds {
203 while group_start + group_len < idxs.len()
204 && idxs[group_start + group_len] as usize == start_idx + group_len
205 && start_idx + group_len < other.len()
206 {
207 group_len += 1;
208 }
209
210 let start_offset = other_offsets.get_unchecked(start_idx).to_usize();
211 let stop_offset = other_offsets
212 .get_unchecked(start_idx + group_len)
213 .to_usize();
214 self.offsets
215 .try_extend_from_slice(other_offsets, start_idx, group_len)
216 .unwrap();
217 self.inner_builder.subslice_extend(
218 other_values,
219 start_offset,
220 stop_offset - start_offset,
221 share,
222 );
223 } else {
224 while group_start + group_len < idxs.len()
225 && idxs[group_start + group_len] as usize >= other.len()
226 {
227 group_len += 1;
228 }
229 self.offsets.extend_constant(group_len);
230 }
231 group_start += group_len;
232 }
233
234 self.validity
235 .opt_gather_extend_from_opt_validity(other.validity(), idxs, other.len());
236 }
237 }
238}