polars_arrow/array/fixed_size_binary/
mutable.rs

1use std::sync::Arc;
2
3use polars_error::{PolarsResult, polars_bail};
4
5use super::FixedSizeBinaryArray;
6use crate::array::physical_binary::extend_validity;
7use crate::array::{Array, MutableArray, TryExtendFromSelf};
8use crate::bitmap::MutableBitmap;
9use crate::datatypes::ArrowDataType;
10
11/// The Arrow's equivalent to a mutable `Vec<Option<[u8; size]>>`.
12/// Converting a [`MutableFixedSizeBinaryArray`] into a [`FixedSizeBinaryArray`] is `O(1)`.
13/// # Implementation
14/// This struct does not allocate a validity until one is required (i.e. push a null to it).
15#[derive(Debug, Clone)]
16pub struct MutableFixedSizeBinaryArray {
17    dtype: ArrowDataType,
18    size: usize,
19    values: Vec<u8>,
20    validity: Option<MutableBitmap>,
21}
22
23impl From<MutableFixedSizeBinaryArray> for FixedSizeBinaryArray {
24    fn from(other: MutableFixedSizeBinaryArray) -> Self {
25        FixedSizeBinaryArray::new(
26            other.dtype,
27            other.values.into(),
28            other.validity.map(|x| x.into()),
29        )
30    }
31}
32
33impl MutableFixedSizeBinaryArray {
34    /// Creates a new [`MutableFixedSizeBinaryArray`].
35    ///
36    /// # Errors
37    /// This function returns an error iff:
38    /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeBinary`]
39    /// * The length of `values` is not a multiple of `size` in `dtype`
40    /// * the validity's length is not equal to `values.len() / size`.
41    pub fn try_new(
42        dtype: ArrowDataType,
43        values: Vec<u8>,
44        validity: Option<MutableBitmap>,
45    ) -> PolarsResult<Self> {
46        let size = FixedSizeBinaryArray::maybe_get_size(&dtype)?;
47
48        if values.len() % size != 0 {
49            polars_bail!(ComputeError:
50                "values (of len {}) must be a multiple of size ({}) in FixedSizeBinaryArray.",
51                values.len(),
52                size
53            )
54        }
55        let len = values.len() / size;
56
57        if validity
58            .as_ref()
59            .is_some_and(|validity| validity.len() != len)
60        {
61            polars_bail!(ComputeError: "validity mask length must be equal to the number of values divided by size")
62        }
63
64        Ok(Self {
65            size,
66            dtype,
67            values,
68            validity,
69        })
70    }
71
72    /// Creates a new empty [`MutableFixedSizeBinaryArray`].
73    pub fn new(size: usize) -> Self {
74        Self::with_capacity(size, 0)
75    }
76
77    /// Creates a new [`MutableFixedSizeBinaryArray`] with capacity for `capacity` entries.
78    pub fn with_capacity(size: usize, capacity: usize) -> Self {
79        Self::try_new(
80            ArrowDataType::FixedSizeBinary(size),
81            Vec::<u8>::with_capacity(capacity * size),
82            None,
83        )
84        .unwrap()
85    }
86
87    /// Creates a new [`MutableFixedSizeBinaryArray`] from a slice of optional `[u8]`.
88    // Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
89    pub fn from<const N: usize, P: AsRef<[Option<[u8; N]>]>>(slice: P) -> Self {
90        let values = slice
91            .as_ref()
92            .iter()
93            .copied()
94            .flat_map(|x| x.unwrap_or([0; N]))
95            .collect::<Vec<_>>();
96        let validity = slice
97            .as_ref()
98            .iter()
99            .map(|x| x.is_some())
100            .collect::<MutableBitmap>();
101        Self::try_new(ArrowDataType::FixedSizeBinary(N), values, validity.into()).unwrap()
102    }
103
104    /// tries to push a new entry to [`MutableFixedSizeBinaryArray`].
105    /// # Error
106    /// Errors iff the size of `value` is not equal to its own size.
107    #[inline]
108    pub fn try_push<P: AsRef<[u8]>>(&mut self, value: Option<P>) -> PolarsResult<()> {
109        match value {
110            Some(bytes) => {
111                let bytes = bytes.as_ref();
112                if self.size != bytes.len() {
113                    polars_bail!(ComputeError: "FixedSizeBinaryArray requires every item to be of its length")
114                }
115                self.values.extend_from_slice(bytes);
116
117                if let Some(validity) = &mut self.validity {
118                    validity.push(true)
119                }
120            },
121            None => {
122                self.values.resize(self.values.len() + self.size, 0);
123                match &mut self.validity {
124                    Some(validity) => validity.push(false),
125                    None => self.init_validity(),
126                }
127            },
128        }
129        Ok(())
130    }
131
132    /// pushes a new entry to [`MutableFixedSizeBinaryArray`].
133    /// # Panics
134    /// Panics iff the size of `value` is not equal to its own size.
135    #[inline]
136    pub fn push<P: AsRef<[u8]>>(&mut self, value: Option<P>) {
137        self.try_push(value).unwrap()
138    }
139
140    /// Returns the length of this array
141    #[inline]
142    pub fn len(&self) -> usize {
143        self.values.len() / self.size
144    }
145
146    /// Pop the last entry from [`MutableFixedSizeBinaryArray`].
147    /// This function returns `None` iff this array is empty
148    pub fn pop(&mut self) -> Option<Vec<u8>> {
149        if self.values.len() < self.size {
150            return None;
151        }
152        let value_start = self.values.len() - self.size;
153        let value = self.values.split_off(value_start);
154        self.validity
155            .as_mut()
156            .map(|x| x.pop()?.then(|| ()))
157            .unwrap_or_else(|| Some(()))
158            .map(|_| value)
159    }
160
161    /// Creates a new [`MutableFixedSizeBinaryArray`] from an iterator of values.
162    /// # Errors
163    /// Errors iff the size of any of the `value` is not equal to its own size.
164    pub fn try_from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = Option<P>>>(
165        iter: I,
166        size: usize,
167    ) -> PolarsResult<Self> {
168        let iterator = iter.into_iter();
169        let (lower, _) = iterator.size_hint();
170        let mut primitive = Self::with_capacity(size, lower);
171        for item in iterator {
172            primitive.try_push(item)?
173        }
174        Ok(primitive)
175    }
176
177    /// returns the (fixed) size of the [`MutableFixedSizeBinaryArray`].
178    #[inline]
179    pub fn size(&self) -> usize {
180        self.size
181    }
182
183    /// Returns the capacity of this array
184    pub fn capacity(&self) -> usize {
185        self.values.capacity() / self.size
186    }
187
188    fn init_validity(&mut self) {
189        let mut validity = MutableBitmap::new();
190        validity.extend_constant(self.len(), true);
191        validity.set(self.len() - 1, false);
192        self.validity = Some(validity)
193    }
194
195    /// Returns the element at index `i` as `&[u8]`
196    #[inline]
197    pub fn value(&self, i: usize) -> &[u8] {
198        &self.values[i * self.size..(i + 1) * self.size]
199    }
200
201    /// Returns the element at index `i` as `&[u8]`
202    ///
203    /// # Safety
204    /// Assumes that the `i < self.len`.
205    #[inline]
206    pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
207        std::slice::from_raw_parts(self.values.as_ptr().add(i * self.size), self.size)
208    }
209
210    /// Reserves `additional` slots.
211    pub fn reserve(&mut self, additional: usize) {
212        self.values.reserve(additional * self.size);
213        if let Some(x) = self.validity.as_mut() {
214            x.reserve(additional)
215        }
216    }
217
218    /// Shrinks the capacity of the [`MutableFixedSizeBinaryArray`] to fit its current length.
219    pub fn shrink_to_fit(&mut self) {
220        self.values.shrink_to_fit();
221        if let Some(validity) = &mut self.validity {
222            validity.shrink_to_fit()
223        }
224    }
225
226    pub fn freeze(self) -> FixedSizeBinaryArray {
227        FixedSizeBinaryArray::new(
228            ArrowDataType::FixedSizeBinary(self.size),
229            self.values.into(),
230            self.validity.map(|x| x.into()),
231        )
232    }
233}
234
235/// Accessors
236impl MutableFixedSizeBinaryArray {
237    /// Returns its values.
238    pub fn values(&self) -> &Vec<u8> {
239        &self.values
240    }
241
242    /// Returns a mutable slice of values.
243    pub fn values_mut_slice(&mut self) -> &mut [u8] {
244        self.values.as_mut_slice()
245    }
246}
247
248impl MutableArray for MutableFixedSizeBinaryArray {
249    fn len(&self) -> usize {
250        self.values.len() / self.size
251    }
252
253    fn validity(&self) -> Option<&MutableBitmap> {
254        self.validity.as_ref()
255    }
256
257    fn as_box(&mut self) -> Box<dyn Array> {
258        FixedSizeBinaryArray::new(
259            ArrowDataType::FixedSizeBinary(self.size),
260            std::mem::take(&mut self.values).into(),
261            std::mem::take(&mut self.validity).map(|x| x.into()),
262        )
263        .boxed()
264    }
265
266    fn as_arc(&mut self) -> Arc<dyn Array> {
267        FixedSizeBinaryArray::new(
268            ArrowDataType::FixedSizeBinary(self.size),
269            std::mem::take(&mut self.values).into(),
270            std::mem::take(&mut self.validity).map(|x| x.into()),
271        )
272        .arced()
273    }
274
275    fn dtype(&self) -> &ArrowDataType {
276        &self.dtype
277    }
278
279    fn as_any(&self) -> &dyn std::any::Any {
280        self
281    }
282
283    fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
284        self
285    }
286
287    fn push_null(&mut self) {
288        self.push::<&[u8]>(None);
289    }
290
291    fn reserve(&mut self, additional: usize) {
292        self.reserve(additional)
293    }
294
295    fn shrink_to_fit(&mut self) {
296        self.shrink_to_fit()
297    }
298}
299
300impl PartialEq for MutableFixedSizeBinaryArray {
301    fn eq(&self, other: &Self) -> bool {
302        self.iter().eq(other.iter())
303    }
304}
305
306impl TryExtendFromSelf for MutableFixedSizeBinaryArray {
307    fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
308        extend_validity(self.len(), &mut self.validity, &other.validity);
309
310        let slice = other.values.as_slice();
311        self.values.extend_from_slice(slice);
312        Ok(())
313    }
314}