Skip to main content

polars_arrow/array/fixed_size_binary/
mod.rs

1use polars_buffer::Buffer;
2
3use super::{Array, Splitable};
4use crate::bitmap::Bitmap;
5use crate::datatypes::ArrowDataType;
6
7mod builder;
8mod ffi;
9pub(super) mod fmt;
10mod iterator;
11pub use builder::*;
12mod mutable;
13pub use mutable::*;
14use polars_error::{PolarsResult, polars_bail, polars_ensure};
15
16/// The Arrow's equivalent to an immutable `Vec<Option<[u8; size]>>`.
17/// Cloning and slicing this struct is `O(1)`.
18#[derive(Clone)]
19pub struct FixedSizeBinaryArray {
20    size: usize, // this is redundant with `dtype`, but useful to not have to deconstruct the dtype.
21    dtype: ArrowDataType,
22    values: Buffer<u8>,
23    validity: Option<Bitmap>,
24}
25
26impl FixedSizeBinaryArray {
27    /// Creates a new [`FixedSizeBinaryArray`].
28    ///
29    /// # Errors
30    /// This function returns an error iff:
31    /// * The `dtype`'s logical type is not in [`FixedSizeBinary`, `Float16`], or
32    /// * The length of `values` is not a multiple of `size` in `dtype`, or
33    /// * the validity's length is not equal to `values.len() / size`.
34    pub fn try_new(
35        dtype: ArrowDataType,
36        values: Buffer<u8>,
37        validity: Option<Bitmap>,
38    ) -> PolarsResult<Self> {
39        let size = Self::maybe_get_size(&dtype)?;
40
41        if !values.len().is_multiple_of(size) {
42            polars_bail!(ComputeError:
43                "values (of len {}) must be a multiple of size ({}) in FixedSizeBinaryArray.",
44                values.len(),
45                size
46            )
47        }
48        let len = values.len() / size;
49
50        if validity
51            .as_ref()
52            .is_some_and(|validity| validity.len() != len)
53        {
54            polars_bail!(ComputeError: "validity mask length must be equal to the number of values divided by size")
55        }
56
57        Ok(Self {
58            size,
59            dtype,
60            values,
61            validity,
62        })
63    }
64
65    /// Creates a new [`FixedSizeBinaryArray`].
66    /// # Panics
67    /// This function panics iff:
68    /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeBinary`]
69    /// * The length of `values` is not a multiple of `size` in `dtype`
70    /// * the validity's length is not equal to `values.len() / size`.
71    pub fn new(dtype: ArrowDataType, values: Buffer<u8>, validity: Option<Bitmap>) -> Self {
72        Self::try_new(dtype, values, validity).unwrap()
73    }
74
75    /// Returns a new empty [`FixedSizeBinaryArray`].
76    pub fn new_empty(dtype: ArrowDataType) -> Self {
77        Self::new(dtype, Buffer::new(), None)
78    }
79
80    /// Returns a new null [`FixedSizeBinaryArray`].
81    pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
82        let size = Self::maybe_get_size(&dtype).unwrap();
83        Self::new(
84            dtype,
85            vec![0u8; length * size].into(),
86            Some(Bitmap::new_zeroed(length)),
87        )
88    }
89
90    pub fn into_inner(self) -> (ArrowDataType, Buffer<u8>, Option<Bitmap>) {
91        (self.dtype, self.values, self.validity)
92    }
93}
94
95// must use
96impl FixedSizeBinaryArray {
97    /// Slices this [`FixedSizeBinaryArray`].
98    /// # Implementation
99    /// This operation is `O(1)`.
100    /// # Panics
101    /// panics iff `offset + length > self.len()`
102    pub fn slice(&mut self, offset: usize, length: usize) {
103        assert!(
104            offset + length <= self.len(),
105            "the offset of the new Buffer cannot exceed the existing length"
106        );
107        unsafe { self.slice_unchecked(offset, length) }
108    }
109
110    /// Slices this [`FixedSizeBinaryArray`].
111    /// # Implementation
112    /// This operation is `O(1)`.
113    ///
114    /// # Safety
115    /// The caller must ensure that `offset + length <= self.len()`.
116    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
117        self.validity = self
118            .validity
119            .take()
120            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
121            .filter(|bitmap| bitmap.unset_bits() > 0);
122        let start = offset * self.size;
123        self.values
124            .slice_in_place_unchecked(start..start + length * self.size);
125    }
126
127    impl_sliced!();
128    impl_mut_validity!();
129    impl_into_array!();
130}
131
132// accessors
133impl FixedSizeBinaryArray {
134    /// Returns the length of this array
135    #[inline]
136    pub fn len(&self) -> usize {
137        self.values.len() / self.size
138    }
139
140    /// The optional validity.
141    #[inline]
142    pub fn validity(&self) -> Option<&Bitmap> {
143        self.validity.as_ref()
144    }
145
146    /// Returns the values allocated on this [`FixedSizeBinaryArray`].
147    pub fn values(&self) -> &Buffer<u8> {
148        &self.values
149    }
150
151    /// Returns value at position `i`.
152    /// # Panic
153    /// Panics iff `i >= self.len()`.
154    #[inline]
155    pub fn value(&self, i: usize) -> &[u8] {
156        assert!(i < self.len());
157        unsafe { self.value_unchecked(i) }
158    }
159
160    /// Returns the element at index `i` as &str
161    ///
162    /// # Safety
163    /// Assumes that the `i < self.len`.
164    #[inline]
165    pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
166        // soundness: invariant of the function.
167        self.values
168            .get_unchecked(i * self.size..(i + 1) * self.size)
169    }
170
171    /// Returns the element at index `i` or `None` if it is null
172    /// # Panics
173    /// iff `i >= self.len()`
174    #[inline]
175    pub fn get(&self, i: usize) -> Option<&[u8]> {
176        if !self.is_null(i) {
177            // soundness: Array::is_null panics if i >= self.len
178            unsafe { Some(self.value_unchecked(i)) }
179        } else {
180            None
181        }
182    }
183
184    /// Returns a new [`FixedSizeBinaryArray`] with a different logical type.
185    /// This is `O(1)`.
186    /// # Panics
187    /// Panics iff the dtype is not supported for the physical type.
188    #[inline]
189    pub fn to(self, dtype: ArrowDataType) -> Self {
190        match (dtype.to_storage(), self.dtype().to_storage()) {
191            (ArrowDataType::FixedSizeBinary(size_a), ArrowDataType::FixedSizeBinary(size_b))
192                if size_a == size_b => {},
193            _ => panic!("Wrong DataType"),
194        }
195
196        Self {
197            size: self.size,
198            dtype,
199            values: self.values,
200            validity: self.validity,
201        }
202    }
203
204    /// Returns the size
205    pub fn size(&self) -> usize {
206        self.size
207    }
208}
209
210impl FixedSizeBinaryArray {
211    pub(crate) fn maybe_get_size(dtype: &ArrowDataType) -> PolarsResult<usize> {
212        match dtype.to_storage() {
213            ArrowDataType::Float16 => Ok(2),
214            ArrowDataType::FixedSizeBinary(size) => {
215                polars_ensure!(*size != 0, ComputeError: "FixedSizeBinaryArray expects a positive size");
216                Ok(*size)
217            },
218            other => {
219                polars_bail!(ComputeError: "FixedSizeBinaryArray expects DataType::FixedSizeBinary. found {other:?}")
220            },
221        }
222    }
223
224    pub fn get_size(dtype: &ArrowDataType) -> usize {
225        Self::maybe_get_size(dtype).unwrap()
226    }
227}
228
229impl Array for FixedSizeBinaryArray {
230    impl_common_array!();
231
232    fn validity(&self) -> Option<&Bitmap> {
233        self.validity.as_ref()
234    }
235
236    #[inline]
237    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
238        Box::new(self.clone().with_validity(validity))
239    }
240}
241
242impl Splitable for FixedSizeBinaryArray {
243    fn check_bound(&self, offset: usize) -> bool {
244        offset < self.len()
245    }
246
247    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
248        let (lhs_values, rhs_values) = unsafe { self.values.split_at_unchecked(offset) };
249        let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
250
251        let size = self.size;
252
253        (
254            Self {
255                dtype: self.dtype.clone(),
256                values: lhs_values,
257                validity: lhs_validity,
258                size,
259            },
260            Self {
261                dtype: self.dtype.clone(),
262                values: rhs_values,
263                validity: rhs_validity,
264                size,
265            },
266        )
267    }
268}
269
270impl FixedSizeBinaryArray {
271    /// Creates a [`FixedSizeBinaryArray`] from an fallible iterator of optional `[u8]`.
272    pub fn try_from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = Option<P>>>(
273        iter: I,
274        size: usize,
275    ) -> PolarsResult<Self> {
276        MutableFixedSizeBinaryArray::try_from_iter(iter, size).map(|x| x.into())
277    }
278
279    /// Creates a [`FixedSizeBinaryArray`] from an iterator of optional `[u8]`.
280    pub fn from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = Option<P>>>(
281        iter: I,
282        size: usize,
283    ) -> Self {
284        MutableFixedSizeBinaryArray::try_from_iter(iter, size)
285            .unwrap()
286            .into()
287    }
288
289    /// Creates a [`FixedSizeBinaryArray`] from a slice of arrays of bytes
290    pub fn from_slice<const N: usize, P: AsRef<[[u8; N]]>>(a: P) -> Self {
291        let values = a.as_ref().iter().flatten().copied().collect::<Vec<_>>();
292        Self::new(ArrowDataType::FixedSizeBinary(N), values.into(), None)
293    }
294
295    /// Creates a new [`FixedSizeBinaryArray`] from a slice of optional `[u8]`.
296    // Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
297    pub fn from<const N: usize, P: AsRef<[Option<[u8; N]>]>>(slice: P) -> Self {
298        MutableFixedSizeBinaryArray::from(slice).into()
299    }
300}