polars_arrow/array/fixed_size_binary/
mod.rs

1use super::{Array, Splitable};
2use crate::bitmap::Bitmap;
3use crate::buffer::Buffer;
4use crate::datatypes::ArrowDataType;
5
6mod builder;
7mod ffi;
8pub(super) mod fmt;
9mod iterator;
10pub use builder::*;
11mod mutable;
12pub use mutable::*;
13use polars_error::{PolarsResult, polars_bail, polars_ensure};
14
15/// The Arrow's equivalent to an immutable `Vec<Option<[u8; size]>>`.
16/// Cloning and slicing this struct is `O(1)`.
17#[derive(Clone)]
18pub struct FixedSizeBinaryArray {
19    size: usize, // this is redundant with `dtype`, but useful to not have to deconstruct the dtype.
20    dtype: ArrowDataType,
21    values: Buffer<u8>,
22    validity: Option<Bitmap>,
23}
24
25impl FixedSizeBinaryArray {
26    /// Creates a new [`FixedSizeBinaryArray`].
27    ///
28    /// # Errors
29    /// This function returns an error iff:
30    /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeBinary`]
31    /// * The length of `values` is not a multiple of `size` in `dtype`
32    /// * the validity's length is not equal to `values.len() / size`.
33    pub fn try_new(
34        dtype: ArrowDataType,
35        values: Buffer<u8>,
36        validity: Option<Bitmap>,
37    ) -> PolarsResult<Self> {
38        let size = Self::maybe_get_size(&dtype)?;
39
40        if !values.len().is_multiple_of(size) {
41            polars_bail!(ComputeError:
42                "values (of len {}) must be a multiple of size ({}) in FixedSizeBinaryArray.",
43                values.len(),
44                size
45            )
46        }
47        let len = values.len() / size;
48
49        if validity
50            .as_ref()
51            .is_some_and(|validity| validity.len() != len)
52        {
53            polars_bail!(ComputeError: "validity mask length must be equal to the number of values divided by size")
54        }
55
56        Ok(Self {
57            size,
58            dtype,
59            values,
60            validity,
61        })
62    }
63
64    /// Creates a new [`FixedSizeBinaryArray`].
65    /// # Panics
66    /// This function panics iff:
67    /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeBinary`]
68    /// * The length of `values` is not a multiple of `size` in `dtype`
69    /// * the validity's length is not equal to `values.len() / size`.
70    pub fn new(dtype: ArrowDataType, values: Buffer<u8>, validity: Option<Bitmap>) -> Self {
71        Self::try_new(dtype, values, validity).unwrap()
72    }
73
74    /// Returns a new empty [`FixedSizeBinaryArray`].
75    pub fn new_empty(dtype: ArrowDataType) -> Self {
76        Self::new(dtype, Buffer::new(), None)
77    }
78
79    /// Returns a new null [`FixedSizeBinaryArray`].
80    pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
81        let size = Self::maybe_get_size(&dtype).unwrap();
82        Self::new(
83            dtype,
84            vec![0u8; length * size].into(),
85            Some(Bitmap::new_zeroed(length)),
86        )
87    }
88
89    pub fn into_inner(self) -> (ArrowDataType, Buffer<u8>, Option<Bitmap>) {
90        (self.dtype, self.values, self.validity)
91    }
92}
93
94// must use
95impl FixedSizeBinaryArray {
96    /// Slices this [`FixedSizeBinaryArray`].
97    /// # Implementation
98    /// This operation is `O(1)`.
99    /// # Panics
100    /// panics iff `offset + length > self.len()`
101    pub fn slice(&mut self, offset: usize, length: usize) {
102        assert!(
103            offset + length <= self.len(),
104            "the offset of the new Buffer cannot exceed the existing length"
105        );
106        unsafe { self.slice_unchecked(offset, length) }
107    }
108
109    /// Slices this [`FixedSizeBinaryArray`].
110    /// # Implementation
111    /// This operation is `O(1)`.
112    ///
113    /// # Safety
114    /// The caller must ensure that `offset + length <= self.len()`.
115    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
116        self.validity = self
117            .validity
118            .take()
119            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
120            .filter(|bitmap| bitmap.unset_bits() > 0);
121        self.values
122            .slice_unchecked(offset * self.size, length * self.size);
123    }
124
125    impl_sliced!();
126    impl_mut_validity!();
127    impl_into_array!();
128}
129
130// accessors
131impl FixedSizeBinaryArray {
132    /// Returns the length of this array
133    #[inline]
134    pub fn len(&self) -> usize {
135        self.values.len() / self.size
136    }
137
138    /// The optional validity.
139    #[inline]
140    pub fn validity(&self) -> Option<&Bitmap> {
141        self.validity.as_ref()
142    }
143
144    /// Returns the values allocated on this [`FixedSizeBinaryArray`].
145    pub fn values(&self) -> &Buffer<u8> {
146        &self.values
147    }
148
149    /// Returns value at position `i`.
150    /// # Panic
151    /// Panics iff `i >= self.len()`.
152    #[inline]
153    pub fn value(&self, i: usize) -> &[u8] {
154        assert!(i < self.len());
155        unsafe { self.value_unchecked(i) }
156    }
157
158    /// Returns the element at index `i` as &str
159    ///
160    /// # Safety
161    /// Assumes that the `i < self.len`.
162    #[inline]
163    pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
164        // soundness: invariant of the function.
165        self.values
166            .get_unchecked(i * self.size..(i + 1) * self.size)
167    }
168
169    /// Returns the element at index `i` or `None` if it is null
170    /// # Panics
171    /// iff `i >= self.len()`
172    #[inline]
173    pub fn get(&self, i: usize) -> Option<&[u8]> {
174        if !self.is_null(i) {
175            // soundness: Array::is_null panics if i >= self.len
176            unsafe { Some(self.value_unchecked(i)) }
177        } else {
178            None
179        }
180    }
181
182    /// Returns a new [`FixedSizeBinaryArray`] with a different logical type.
183    /// This is `O(1)`.
184    /// # Panics
185    /// Panics iff the dtype is not supported for the physical type.
186    #[inline]
187    pub fn to(self, dtype: ArrowDataType) -> Self {
188        match (dtype.to_logical_type(), self.dtype().to_logical_type()) {
189            (ArrowDataType::FixedSizeBinary(size_a), ArrowDataType::FixedSizeBinary(size_b))
190                if size_a == size_b => {},
191            _ => panic!("Wrong DataType"),
192        }
193
194        Self {
195            size: self.size,
196            dtype,
197            values: self.values,
198            validity: self.validity,
199        }
200    }
201
202    /// Returns the size
203    pub fn size(&self) -> usize {
204        self.size
205    }
206}
207
208impl FixedSizeBinaryArray {
209    pub(crate) fn maybe_get_size(dtype: &ArrowDataType) -> PolarsResult<usize> {
210        match dtype.to_logical_type() {
211            ArrowDataType::FixedSizeBinary(size) => {
212                polars_ensure!(*size != 0, ComputeError: "FixedSizeBinaryArray expects a positive size");
213                Ok(*size)
214            },
215            other => {
216                polars_bail!(ComputeError: "FixedSizeBinaryArray expects DataType::FixedSizeBinary. found {other:?}")
217            },
218        }
219    }
220
221    pub fn get_size(dtype: &ArrowDataType) -> usize {
222        Self::maybe_get_size(dtype).unwrap()
223    }
224}
225
226impl Array for FixedSizeBinaryArray {
227    impl_common_array!();
228
229    fn validity(&self) -> Option<&Bitmap> {
230        self.validity.as_ref()
231    }
232
233    #[inline]
234    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
235        Box::new(self.clone().with_validity(validity))
236    }
237}
238
239impl Splitable for FixedSizeBinaryArray {
240    fn check_bound(&self, offset: usize) -> bool {
241        offset < self.len()
242    }
243
244    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
245        let (lhs_values, rhs_values) = unsafe { self.values.split_at_unchecked(offset) };
246        let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
247
248        let size = self.size;
249
250        (
251            Self {
252                dtype: self.dtype.clone(),
253                values: lhs_values,
254                validity: lhs_validity,
255                size,
256            },
257            Self {
258                dtype: self.dtype.clone(),
259                values: rhs_values,
260                validity: rhs_validity,
261                size,
262            },
263        )
264    }
265}
266
267impl FixedSizeBinaryArray {
268    /// Creates a [`FixedSizeBinaryArray`] from an fallible iterator of optional `[u8]`.
269    pub fn try_from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = Option<P>>>(
270        iter: I,
271        size: usize,
272    ) -> PolarsResult<Self> {
273        MutableFixedSizeBinaryArray::try_from_iter(iter, size).map(|x| x.into())
274    }
275
276    /// Creates a [`FixedSizeBinaryArray`] from an iterator of optional `[u8]`.
277    pub fn from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = Option<P>>>(
278        iter: I,
279        size: usize,
280    ) -> Self {
281        MutableFixedSizeBinaryArray::try_from_iter(iter, size)
282            .unwrap()
283            .into()
284    }
285
286    /// Creates a [`FixedSizeBinaryArray`] from a slice of arrays of bytes
287    pub fn from_slice<const N: usize, P: AsRef<[[u8; N]]>>(a: P) -> Self {
288        let values = a.as_ref().iter().flatten().copied().collect::<Vec<_>>();
289        Self::new(ArrowDataType::FixedSizeBinary(N), values.into(), None)
290    }
291
292    /// Creates a new [`FixedSizeBinaryArray`] from a slice of optional `[u8]`.
293    // Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
294    pub fn from<const N: usize, P: AsRef<[Option<[u8; N]>]>>(slice: P) -> Self {
295        MutableFixedSizeBinaryArray::from(slice).into()
296    }
297}