polars_arrow/array/binary/
builder.rs1use polars_utils::IdxSize;
2
3use crate::array::BinaryArray;
4use crate::array::builder::{ShareStrategy, StaticArrayBuilder};
5use crate::bitmap::OptBitmapBuilder;
6use crate::buffer::Buffer;
7use crate::datatypes::ArrowDataType;
8use crate::offset::{Offset, Offsets, OffsetsBuffer};
9
10pub struct BinaryArrayBuilder<O: Offset> {
11 dtype: ArrowDataType,
12 offsets: Offsets<O>,
13 values: Vec<u8>,
14 validity: OptBitmapBuilder,
15}
16
17impl<O: Offset> BinaryArrayBuilder<O> {
18 pub fn new(dtype: ArrowDataType) -> Self {
19 Self {
20 dtype,
21 offsets: Offsets::new(),
22 values: Vec::new(),
23 validity: OptBitmapBuilder::default(),
24 }
25 }
26}
27
28impl<O: Offset> StaticArrayBuilder for BinaryArrayBuilder<O> {
29 type Array = BinaryArray<O>;
30
31 fn dtype(&self) -> &ArrowDataType {
32 &self.dtype
33 }
34
35 fn reserve(&mut self, additional: usize) {
36 self.offsets.reserve(additional);
37 self.validity.reserve(additional);
38 }
40
41 fn freeze(self) -> BinaryArray<O> {
42 let offsets = OffsetsBuffer::from(self.offsets);
43 let values = Buffer::from(self.values);
44 let validity = self.validity.into_opt_validity();
45 BinaryArray::new(self.dtype, offsets, values, validity)
46 }
47
48 fn freeze_reset(&mut self) -> Self::Array {
49 let offsets = OffsetsBuffer::from(core::mem::take(&mut self.offsets));
50 let values = Buffer::from(core::mem::take(&mut self.values));
51 let validity = core::mem::take(&mut self.validity).into_opt_validity();
52 BinaryArray::new(self.dtype.clone(), offsets, values, validity)
53 }
54
55 fn len(&self) -> usize {
56 self.offsets.len_proxy()
57 }
58
59 fn extend_nulls(&mut self, length: usize) {
60 self.offsets.extend_constant(length);
61 self.validity.extend_constant(length, false);
62 }
63
64 fn subslice_extend(
65 &mut self,
66 other: &BinaryArray<O>,
67 start: usize,
68 length: usize,
69 _share: ShareStrategy,
70 ) {
71 let start_offset = other.offsets()[start].to_usize();
72 let stop_offset = other.offsets()[start + length].to_usize();
73 self.offsets
74 .try_extend_from_slice(other.offsets(), start, length)
75 .unwrap();
76 self.values
77 .extend_from_slice(&other.values()[start_offset..stop_offset]);
78 self.validity
79 .subslice_extend_from_opt_validity(other.validity(), start, length);
80 }
81
82 fn subslice_extend_each_repeated(
83 &mut self,
84 other: &BinaryArray<O>,
85 start: usize,
86 length: usize,
87 repeats: usize,
88 _share: ShareStrategy,
89 ) {
90 let other_offsets = other.offsets();
91 let other_values = &**other.values();
92
93 let start_offset = other.offsets()[start].to_usize();
94 let stop_offset = other.offsets()[start + length].to_usize();
95 self.offsets.reserve(length * repeats);
96 self.values.reserve((stop_offset - start_offset) * repeats);
97 for offset_idx in start..start + length {
98 let substring_start = other_offsets[offset_idx].to_usize();
99 let substring_stop = other_offsets[offset_idx + 1].to_usize();
100 for _ in 0..repeats {
101 self.offsets
102 .try_push(substring_stop - substring_start)
103 .unwrap();
104 self.values
105 .extend_from_slice(&other_values[substring_start..substring_stop]);
106 }
107 }
108 self.validity
109 .subslice_extend_each_repeated_from_opt_validity(
110 other.validity(),
111 start,
112 length,
113 repeats,
114 );
115 }
116
117 unsafe fn gather_extend(
118 &mut self,
119 other: &BinaryArray<O>,
120 idxs: &[IdxSize],
121 _share: ShareStrategy,
122 ) {
123 let other_values = &**other.values();
124 let other_offsets = other.offsets();
125
126 let total_len: usize = idxs
128 .iter()
129 .map(|i| {
130 let start_offset = other_offsets.get_unchecked(*i as usize).to_usize();
131 let stop_offset = other_offsets.get_unchecked(*i as usize + 1).to_usize();
132 stop_offset - start_offset
133 })
134 .sum();
135 self.values.reserve(total_len);
136
137 for idx in idxs {
138 let start_offset = other_offsets.get_unchecked(*idx as usize).to_usize();
139 let stop_offset = other_offsets.get_unchecked(*idx as usize + 1).to_usize();
140 self.values
141 .extend_from_slice(other_values.get_unchecked(start_offset..stop_offset));
142 }
143
144 self.validity
145 .gather_extend_from_opt_validity(other.validity(), idxs);
146 }
147
148 fn opt_gather_extend(
149 &mut self,
150 other: &BinaryArray<O>,
151 idxs: &[IdxSize],
152 _share: ShareStrategy,
153 ) {
154 let other_values = &**other.values();
155 let other_offsets = other.offsets();
156
157 unsafe {
158 let total_len: usize = idxs
160 .iter()
161 .map(|idx| {
162 if (*idx as usize) < other.len() {
163 let start_offset = other_offsets.get_unchecked(*idx as usize).to_usize();
164 let stop_offset = other_offsets.get_unchecked(*idx as usize + 1).to_usize();
165 stop_offset - start_offset
166 } else {
167 0
168 }
169 })
170 .sum();
171 self.values.reserve(total_len);
172
173 for idx in idxs {
174 let start_offset = other_offsets.get_unchecked(*idx as usize).to_usize();
175 let stop_offset = other_offsets.get_unchecked(*idx as usize + 1).to_usize();
176 self.values
177 .extend_from_slice(other_values.get_unchecked(start_offset..stop_offset));
178 }
179
180 self.validity
181 .opt_gather_extend_from_opt_validity(other.validity(), idxs, other.len());
182 }
183 }
184}