polars_arrow/array/fixed_size_binary/
mod.rs

1use super::{Array, Splitable};
2use crate::bitmap::Bitmap;
3use crate::buffer::Buffer;
4use crate::datatypes::ArrowDataType;
5
6mod builder;
7mod ffi;
8pub(super) mod fmt;
9mod iterator;
10pub use builder::*;
11mod mutable;
12pub use mutable::*;
13use polars_error::{PolarsResult, polars_bail, polars_ensure};
14
15/// The Arrow's equivalent to an immutable `Vec<Option<[u8; size]>>`.
16/// Cloning and slicing this struct is `O(1)`.
17#[derive(Clone)]
18pub struct FixedSizeBinaryArray {
19    size: usize, // this is redundant with `dtype`, but useful to not have to deconstruct the dtype.
20    dtype: ArrowDataType,
21    values: Buffer<u8>,
22    validity: Option<Bitmap>,
23}
24
25impl FixedSizeBinaryArray {
26    /// Creates a new [`FixedSizeBinaryArray`].
27    ///
28    /// # Errors
29    /// This function returns an error iff:
30    /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeBinary`]
31    /// * The length of `values` is not a multiple of `size` in `dtype`
32    /// * the validity's length is not equal to `values.len() / size`.
33    pub fn try_new(
34        dtype: ArrowDataType,
35        values: Buffer<u8>,
36        validity: Option<Bitmap>,
37    ) -> PolarsResult<Self> {
38        let size = Self::maybe_get_size(&dtype)?;
39
40        if values.len() % size != 0 {
41            polars_bail!(ComputeError:
42                "values (of len {}) must be a multiple of size ({}) in FixedSizeBinaryArray.",
43                values.len(),
44                size
45            )
46        }
47        let len = values.len() / size;
48
49        if validity
50            .as_ref()
51            .is_some_and(|validity| validity.len() != len)
52        {
53            polars_bail!(ComputeError: "validity mask length must be equal to the number of values divided by size")
54        }
55
56        Ok(Self {
57            size,
58            dtype,
59            values,
60            validity,
61        })
62    }
63
64    /// Creates a new [`FixedSizeBinaryArray`].
65    /// # Panics
66    /// This function panics iff:
67    /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeBinary`]
68    /// * The length of `values` is not a multiple of `size` in `dtype`
69    /// * the validity's length is not equal to `values.len() / size`.
70    pub fn new(dtype: ArrowDataType, values: Buffer<u8>, validity: Option<Bitmap>) -> Self {
71        Self::try_new(dtype, values, validity).unwrap()
72    }
73
74    /// Returns a new empty [`FixedSizeBinaryArray`].
75    pub fn new_empty(dtype: ArrowDataType) -> Self {
76        Self::new(dtype, Buffer::new(), None)
77    }
78
79    /// Returns a new null [`FixedSizeBinaryArray`].
80    pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
81        let size = Self::maybe_get_size(&dtype).unwrap();
82        Self::new(
83            dtype,
84            vec![0u8; length * size].into(),
85            Some(Bitmap::new_zeroed(length)),
86        )
87    }
88}
89
90// must use
91impl FixedSizeBinaryArray {
92    /// Slices this [`FixedSizeBinaryArray`].
93    /// # Implementation
94    /// This operation is `O(1)`.
95    /// # Panics
96    /// panics iff `offset + length > self.len()`
97    pub fn slice(&mut self, offset: usize, length: usize) {
98        assert!(
99            offset + length <= self.len(),
100            "the offset of the new Buffer cannot exceed the existing length"
101        );
102        unsafe { self.slice_unchecked(offset, length) }
103    }
104
105    /// Slices this [`FixedSizeBinaryArray`].
106    /// # Implementation
107    /// This operation is `O(1)`.
108    ///
109    /// # Safety
110    /// The caller must ensure that `offset + length <= self.len()`.
111    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
112        self.validity = self
113            .validity
114            .take()
115            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
116            .filter(|bitmap| bitmap.unset_bits() > 0);
117        self.values
118            .slice_unchecked(offset * self.size, length * self.size);
119    }
120
121    impl_sliced!();
122    impl_mut_validity!();
123    impl_into_array!();
124}
125
126// accessors
127impl FixedSizeBinaryArray {
128    /// Returns the length of this array
129    #[inline]
130    pub fn len(&self) -> usize {
131        self.values.len() / self.size
132    }
133
134    /// The optional validity.
135    #[inline]
136    pub fn validity(&self) -> Option<&Bitmap> {
137        self.validity.as_ref()
138    }
139
140    /// Returns the values allocated on this [`FixedSizeBinaryArray`].
141    pub fn values(&self) -> &Buffer<u8> {
142        &self.values
143    }
144
145    /// Returns value at position `i`.
146    /// # Panic
147    /// Panics iff `i >= self.len()`.
148    #[inline]
149    pub fn value(&self, i: usize) -> &[u8] {
150        assert!(i < self.len());
151        unsafe { self.value_unchecked(i) }
152    }
153
154    /// Returns the element at index `i` as &str
155    ///
156    /// # Safety
157    /// Assumes that the `i < self.len`.
158    #[inline]
159    pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
160        // soundness: invariant of the function.
161        self.values
162            .get_unchecked(i * self.size..(i + 1) * self.size)
163    }
164
165    /// Returns the element at index `i` or `None` if it is null
166    /// # Panics
167    /// iff `i >= self.len()`
168    #[inline]
169    pub fn get(&self, i: usize) -> Option<&[u8]> {
170        if !self.is_null(i) {
171            // soundness: Array::is_null panics if i >= self.len
172            unsafe { Some(self.value_unchecked(i)) }
173        } else {
174            None
175        }
176    }
177
178    /// Returns a new [`FixedSizeBinaryArray`] with a different logical type.
179    /// This is `O(1)`.
180    /// # Panics
181    /// Panics iff the dtype is not supported for the physical type.
182    #[inline]
183    pub fn to(self, dtype: ArrowDataType) -> Self {
184        match (dtype.to_logical_type(), self.dtype().to_logical_type()) {
185            (ArrowDataType::FixedSizeBinary(size_a), ArrowDataType::FixedSizeBinary(size_b))
186                if size_a == size_b => {},
187            _ => panic!("Wrong DataType"),
188        }
189
190        Self {
191            size: self.size,
192            dtype,
193            values: self.values,
194            validity: self.validity,
195        }
196    }
197
198    /// Returns the size
199    pub fn size(&self) -> usize {
200        self.size
201    }
202}
203
204impl FixedSizeBinaryArray {
205    pub(crate) fn maybe_get_size(dtype: &ArrowDataType) -> PolarsResult<usize> {
206        match dtype.to_logical_type() {
207            ArrowDataType::FixedSizeBinary(size) => {
208                polars_ensure!(*size != 0, ComputeError: "FixedSizeBinaryArray expects a positive size");
209                Ok(*size)
210            },
211            other => {
212                polars_bail!(ComputeError: "FixedSizeBinaryArray expects DataType::FixedSizeBinary. found {other:?}")
213            },
214        }
215    }
216
217    pub fn get_size(dtype: &ArrowDataType) -> usize {
218        Self::maybe_get_size(dtype).unwrap()
219    }
220}
221
222impl Array for FixedSizeBinaryArray {
223    impl_common_array!();
224
225    fn validity(&self) -> Option<&Bitmap> {
226        self.validity.as_ref()
227    }
228
229    #[inline]
230    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
231        Box::new(self.clone().with_validity(validity))
232    }
233}
234
235impl Splitable for FixedSizeBinaryArray {
236    fn check_bound(&self, offset: usize) -> bool {
237        offset < self.len()
238    }
239
240    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
241        let (lhs_values, rhs_values) = unsafe { self.values.split_at_unchecked(offset) };
242        let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
243
244        let size = self.size;
245
246        (
247            Self {
248                dtype: self.dtype.clone(),
249                values: lhs_values,
250                validity: lhs_validity,
251                size,
252            },
253            Self {
254                dtype: self.dtype.clone(),
255                values: rhs_values,
256                validity: rhs_validity,
257                size,
258            },
259        )
260    }
261}
262
263impl FixedSizeBinaryArray {
264    /// Creates a [`FixedSizeBinaryArray`] from an fallible iterator of optional `[u8]`.
265    pub fn try_from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = Option<P>>>(
266        iter: I,
267        size: usize,
268    ) -> PolarsResult<Self> {
269        MutableFixedSizeBinaryArray::try_from_iter(iter, size).map(|x| x.into())
270    }
271
272    /// Creates a [`FixedSizeBinaryArray`] from an iterator of optional `[u8]`.
273    pub fn from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = Option<P>>>(
274        iter: I,
275        size: usize,
276    ) -> Self {
277        MutableFixedSizeBinaryArray::try_from_iter(iter, size)
278            .unwrap()
279            .into()
280    }
281
282    /// Creates a [`FixedSizeBinaryArray`] from a slice of arrays of bytes
283    pub fn from_slice<const N: usize, P: AsRef<[[u8; N]]>>(a: P) -> Self {
284        let values = a.as_ref().iter().flatten().copied().collect::<Vec<_>>();
285        Self::new(ArrowDataType::FixedSizeBinary(N), values.into(), None)
286    }
287
288    /// Creates a new [`FixedSizeBinaryArray`] from a slice of optional `[u8]`.
289    // Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
290    pub fn from<const N: usize, P: AsRef<[Option<[u8; N]>]>>(slice: P) -> Self {
291        MutableFixedSizeBinaryArray::from(slice).into()
292    }
293}