polars_arrow/array/binary/
builder.rs

1use polars_utils::IdxSize;
2
3use crate::array::BinaryArray;
4use crate::array::builder::{ShareStrategy, StaticArrayBuilder};
5use crate::bitmap::OptBitmapBuilder;
6use crate::buffer::Buffer;
7use crate::datatypes::ArrowDataType;
8use crate::offset::{Offset, Offsets, OffsetsBuffer};
9
10pub struct BinaryArrayBuilder<O: Offset> {
11    dtype: ArrowDataType,
12    offsets: Offsets<O>,
13    values: Vec<u8>,
14    validity: OptBitmapBuilder,
15}
16
17impl<O: Offset> BinaryArrayBuilder<O> {
18    pub fn new(dtype: ArrowDataType) -> Self {
19        Self {
20            dtype,
21            offsets: Offsets::new(),
22            values: Vec::new(),
23            validity: OptBitmapBuilder::default(),
24        }
25    }
26}
27
28impl<O: Offset> StaticArrayBuilder for BinaryArrayBuilder<O> {
29    type Array = BinaryArray<O>;
30
31    fn dtype(&self) -> &ArrowDataType {
32        &self.dtype
33    }
34
35    fn reserve(&mut self, additional: usize) {
36        self.offsets.reserve(additional);
37        self.validity.reserve(additional);
38        // No values reserve, we have no idea how large it needs to be.
39    }
40
41    fn freeze(self) -> BinaryArray<O> {
42        let offsets = OffsetsBuffer::from(self.offsets);
43        let values = Buffer::from(self.values);
44        let validity = self.validity.into_opt_validity();
45        BinaryArray::new(self.dtype, offsets, values, validity)
46    }
47
48    fn freeze_reset(&mut self) -> Self::Array {
49        let offsets = OffsetsBuffer::from(core::mem::take(&mut self.offsets));
50        let values = Buffer::from(core::mem::take(&mut self.values));
51        let validity = core::mem::take(&mut self.validity).into_opt_validity();
52        BinaryArray::new(self.dtype.clone(), offsets, values, validity)
53    }
54
55    fn len(&self) -> usize {
56        self.offsets.len_proxy()
57    }
58
59    fn extend_nulls(&mut self, length: usize) {
60        self.offsets.extend_constant(length);
61        self.validity.extend_constant(length, false);
62    }
63
64    fn subslice_extend(
65        &mut self,
66        other: &BinaryArray<O>,
67        start: usize,
68        length: usize,
69        _share: ShareStrategy,
70    ) {
71        let start_offset = other.offsets()[start].to_usize();
72        let stop_offset = other.offsets()[start + length].to_usize();
73        self.offsets
74            .try_extend_from_slice(other.offsets(), start, length)
75            .unwrap();
76        self.values
77            .extend_from_slice(&other.values()[start_offset..stop_offset]);
78        self.validity
79            .subslice_extend_from_opt_validity(other.validity(), start, length);
80    }
81
82    fn subslice_extend_each_repeated(
83        &mut self,
84        other: &BinaryArray<O>,
85        start: usize,
86        length: usize,
87        repeats: usize,
88        _share: ShareStrategy,
89    ) {
90        let other_offsets = other.offsets();
91        let other_values = &**other.values();
92
93        let start_offset = other.offsets()[start].to_usize();
94        let stop_offset = other.offsets()[start + length].to_usize();
95        self.offsets.reserve(length * repeats);
96        self.values.reserve((stop_offset - start_offset) * repeats);
97        for offset_idx in start..start + length {
98            let substring_start = other_offsets[offset_idx].to_usize();
99            let substring_stop = other_offsets[offset_idx + 1].to_usize();
100            for _ in 0..repeats {
101                self.offsets
102                    .try_push(substring_stop - substring_start)
103                    .unwrap();
104                self.values
105                    .extend_from_slice(&other_values[substring_start..substring_stop]);
106            }
107        }
108        self.validity
109            .subslice_extend_each_repeated_from_opt_validity(
110                other.validity(),
111                start,
112                length,
113                repeats,
114            );
115    }
116
117    unsafe fn gather_extend(
118        &mut self,
119        other: &BinaryArray<O>,
120        idxs: &[IdxSize],
121        _share: ShareStrategy,
122    ) {
123        let other_values = &**other.values();
124        let other_offsets = other.offsets();
125
126        // Pre-compute proper length for reserve.
127        let total_len: usize = idxs
128            .iter()
129            .map(|i| {
130                let start_offset = other_offsets.get_unchecked(*i as usize).to_usize();
131                let stop_offset = other_offsets.get_unchecked(*i as usize + 1).to_usize();
132                stop_offset - start_offset
133            })
134            .sum();
135        self.values.reserve(total_len);
136
137        for idx in idxs {
138            let start_offset = other_offsets.get_unchecked(*idx as usize).to_usize();
139            let stop_offset = other_offsets.get_unchecked(*idx as usize + 1).to_usize();
140            self.values
141                .extend_from_slice(other_values.get_unchecked(start_offset..stop_offset));
142        }
143
144        self.validity
145            .gather_extend_from_opt_validity(other.validity(), idxs);
146    }
147
148    fn opt_gather_extend(
149        &mut self,
150        other: &BinaryArray<O>,
151        idxs: &[IdxSize],
152        _share: ShareStrategy,
153    ) {
154        let other_values = &**other.values();
155        let other_offsets = other.offsets();
156
157        unsafe {
158            // Pre-compute proper length for reserve.
159            let total_len: usize = idxs
160                .iter()
161                .map(|idx| {
162                    if (*idx as usize) < other.len() {
163                        let start_offset = other_offsets.get_unchecked(*idx as usize).to_usize();
164                        let stop_offset = other_offsets.get_unchecked(*idx as usize + 1).to_usize();
165                        stop_offset - start_offset
166                    } else {
167                        0
168                    }
169                })
170                .sum();
171            self.values.reserve(total_len);
172
173            for idx in idxs {
174                let start_offset = other_offsets.get_unchecked(*idx as usize).to_usize();
175                let stop_offset = other_offsets.get_unchecked(*idx as usize + 1).to_usize();
176                self.values
177                    .extend_from_slice(other_values.get_unchecked(start_offset..stop_offset));
178            }
179
180            self.validity
181                .opt_gather_extend_from_opt_validity(other.validity(), idxs, other.len());
182        }
183    }
184}