Skip to main content

arrow_array/array/
fixed_size_binary_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::array::print_long_array;
19use crate::iterator::FixedSizeBinaryIter;
20use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray, Scalar};
21use arrow_buffer::buffer::NullBuffer;
22use arrow_buffer::{ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, bit_util};
23use arrow_data::{ArrayData, ArrayDataBuilder};
24use arrow_schema::{ArrowError, DataType};
25use std::any::Any;
26use std::sync::Arc;
27
28/// An array of [fixed size binary arrays](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout)
29///
30/// # Examples
31///
32/// Create an array from an iterable argument of byte slices.
33///
34/// ```
35///    use arrow_array::{Array, FixedSizeBinaryArray};
36///    let input_arg = vec![ vec![1, 2], vec![3, 4], vec![5, 6] ];
37///    let arr = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap();
38///
39///    assert_eq!(3, arr.len());
40///
41/// ```
42/// Create an array from an iterable argument of sparse byte slices.
43/// Sparsity means that the input argument can contain `None` items.
44/// ```
45///    use arrow_array::{Array, FixedSizeBinaryArray};
46///    let input_arg = vec![ None, Some(vec![7, 8]), Some(vec![9, 10]), None, Some(vec![13, 14]) ];
47///    let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap();
48///    assert_eq!(5, arr.len())
49///
50/// ```
51///
52#[derive(Clone)]
53pub struct FixedSizeBinaryArray {
54    data_type: DataType, // Must be DataType::FixedSizeBinary(value_length)
55    value_data: Buffer,
56    nulls: Option<NullBuffer>,
57    len: usize,
58    value_length: i32,
59}
60
61impl FixedSizeBinaryArray {
62    /// Create a new [`FixedSizeBinaryArray`] with `size` element size, panicking on failure
63    ///
64    /// # Panics
65    ///
66    /// Panics if [`Self::try_new`] returns an error
67    pub fn new(size: i32, values: Buffer, nulls: Option<NullBuffer>) -> Self {
68        Self::try_new(size, values, nulls).unwrap()
69    }
70
71    /// Create a new [`Scalar`] from `value`
72    pub fn new_scalar(value: impl AsRef<[u8]>) -> Scalar<Self> {
73        let v = value.as_ref();
74        let size = i32::try_from(v.len()).expect("FixedSizeBinaryArray value length exceeds i32");
75        Scalar::new(Self::new(size, Buffer::from(v), None))
76    }
77
78    /// Create a new [`FixedSizeBinaryArray`] from the provided parts, returning an error on failure
79    ///
80    /// Creating an arrow with `size == 0` will try to get the length from the null buffer. If
81    /// no null buffer is provided, the resulting array will have length zero.
82    ///
83    /// # Errors
84    ///
85    /// * `size < 0`
86    /// * `values.len() / size != nulls.len()`
87    /// * `size == 0 && values.len() != 0`
88    /// * `len * size > i32::MAX`
89    pub fn try_new(
90        size: i32,
91        values: Buffer,
92        nulls: Option<NullBuffer>,
93    ) -> Result<Self, ArrowError> {
94        let data_type = DataType::FixedSizeBinary(size);
95        let s = size.to_usize().ok_or_else(|| {
96            ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {size}"))
97        })?;
98
99        let len = if s == 0 {
100            if !values.is_empty() {
101                return Err(ArrowError::InvalidArgumentError(
102                    "Buffer cannot have non-zero length if the item size is zero".to_owned(),
103                ));
104            }
105
106            // If the item size is zero, try to determine the length from the null buffer
107            nulls.as_ref().map(|n| n.len()).unwrap_or(0)
108        } else {
109            values.len() / s
110        };
111        if let Some(n) = nulls.as_ref() {
112            if n.len() != len {
113                return Err(ArrowError::InvalidArgumentError(format!(
114                    "Incorrect length of null buffer for FixedSizeBinaryArray, expected {} got {}",
115                    len,
116                    n.len(),
117                )));
118            }
119        }
120
121        Self::validate_lengths(s, len)?;
122
123        Ok(Self {
124            data_type,
125            value_data: values,
126            value_length: size,
127            nulls,
128            len,
129        })
130    }
131
132    /// Some calculations below use i32 arithmetic which can overflow when
133    /// valid offsets are past i32::MAX. Until that is solved for real do not
134    /// permit constructing any FixedSizeBinaryArray that has a valid offset
135    /// past i32::MAX
136    fn validate_lengths(value_size: usize, len: usize) -> Result<(), ArrowError> {
137        // the offset is also calculated for the next element (i + 1) so
138        // check `len` (not last element index) to ensure that all offsets are valid
139        let max_offset = value_size.checked_mul(len).ok_or_else(|| {
140            ArrowError::InvalidArgumentError(format!(
141                "FixedSizeBinaryArray error: value size {value_size} * len {len} exceeds maximum valid offset"
142            ))
143        })?;
144
145        let max_valid_offset: usize = i32::MAX.try_into().map_err(|_| {
146            ArrowError::InvalidArgumentError(format!(
147                "FixedSizeBinaryArray error: maximum valid offset exceeds i32::MAX, got {max_offset}"
148            ))
149        })?;
150
151        if max_offset > max_valid_offset {
152            return Err(ArrowError::InvalidArgumentError(format!(
153                "FixedSizeBinaryArray error: value size {value_size} * length {len} exceeds maximum valid offset of {max_valid_offset}"
154            )));
155        };
156        Ok(())
157    }
158
159    /// Create a new [`FixedSizeBinaryArray`] of length `len` where all values are null
160    ///
161    /// # Panics
162    ///
163    /// Panics if
164    ///
165    /// * `size < 0`
166    /// * `size * len` would overflow `usize`
167    /// * `size * len > i32::MAX`
168    /// * `size * len * 8` would overflow `usize`
169    pub fn new_null(size: i32, len: usize) -> Self {
170        const BITS_IN_A_BYTE: usize = 8;
171        let size_usize = size.to_usize().unwrap();
172        Self::validate_lengths(size_usize, len).unwrap();
173        let capacity_in_bytes = size_usize.checked_mul(len).unwrap();
174        let capacity_in_bits = capacity_in_bytes.checked_mul(BITS_IN_A_BYTE).unwrap();
175        Self {
176            data_type: DataType::FixedSizeBinary(size),
177            value_data: MutableBuffer::new_null(capacity_in_bits).into(),
178            nulls: Some(NullBuffer::new_null(len)),
179            value_length: size,
180            len,
181        }
182    }
183
184    /// Deconstruct this array into its constituent parts
185    pub fn into_parts(self) -> (i32, Buffer, Option<NullBuffer>) {
186        (self.value_length, self.value_data, self.nulls)
187    }
188
189    /// Returns the element at index `i` as a byte slice.
190    ///
191    /// Note: This method does not check for nulls and the value is arbitrary
192    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
193    ///
194    /// # Panics
195    /// Panics if index `i` is out of bounds.
196    pub fn value(&self, i: usize) -> &[u8] {
197        assert!(
198            i < self.len(),
199            "Trying to access an element at index {} from a FixedSizeBinaryArray of length {}",
200            i,
201            self.len()
202        );
203        let offset = i + self.offset();
204        unsafe {
205            let pos = self.value_offset_at(offset);
206            std::slice::from_raw_parts(
207                self.value_data.as_ptr().offset(pos as isize),
208                (self.value_offset_at(offset + 1) - pos) as usize,
209            )
210        }
211    }
212
213    /// Returns the element at index `i` as a byte slice.
214    ///
215    /// Note: This method does not check for nulls and the value is arbitrary
216    /// if [`is_null`](Self::is_null) returns true for the index.
217    ///
218    /// # Safety
219    ///
220    /// Caller is responsible for ensuring that the index is within the bounds
221    /// of the array
222    pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
223        let offset = i + self.offset();
224        let pos = self.value_offset_at(offset);
225        unsafe {
226            std::slice::from_raw_parts(
227                self.value_data.as_ptr().offset(pos as isize),
228                (self.value_offset_at(offset + 1) - pos) as usize,
229            )
230        }
231    }
232
233    /// Returns the offset for the element at index `i`.
234    ///
235    /// Note this doesn't do any bound checking, for performance reason.
236    #[inline]
237    pub fn value_offset(&self, i: usize) -> i32 {
238        self.value_offset_at(self.offset() + i)
239    }
240
241    /// Returns the length for an element.
242    ///
243    /// All elements have the same length as the array is a fixed size.
244    #[inline]
245    pub fn value_length(&self) -> i32 {
246        self.value_length
247    }
248
249    /// Returns the values of this array.
250    ///
251    /// Unlike [`Self::value_data`] this returns the [`Buffer`]
252    /// allowing for zero-copy cloning.
253    #[inline]
254    pub fn values(&self) -> &Buffer {
255        &self.value_data
256    }
257
258    /// Returns the raw value data.
259    pub fn value_data(&self) -> &[u8] {
260        self.value_data.as_slice()
261    }
262
263    /// Returns a zero-copy slice of this array with the indicated offset and length.
264    pub fn slice(&self, offset: usize, len: usize) -> Self {
265        assert!(
266            offset.saturating_add(len) <= self.len,
267            "the length + offset of the sliced FixedSizeBinaryArray cannot exceed the existing length"
268        );
269
270        let size = self.value_length as usize;
271
272        Self {
273            data_type: self.data_type.clone(),
274            nulls: self.nulls.as_ref().map(|n| n.slice(offset, len)),
275            value_length: self.value_length,
276            value_data: self.value_data.slice_with_length(offset * size, len * size),
277            len,
278        }
279    }
280
281    /// Create an array from an iterable argument of sparse byte slices.
282    /// Sparsity means that items returned by the iterator are optional, i.e input argument can
283    /// contain `None` items.
284    ///
285    /// # Examples
286    ///
287    /// ```
288    /// use arrow_array::FixedSizeBinaryArray;
289    /// let input_arg = vec![
290    ///     None,
291    ///     Some(vec![7, 8]),
292    ///     Some(vec![9, 10]),
293    ///     None,
294    ///     Some(vec![13, 14]),
295    ///     None,
296    /// ];
297    /// let array = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap();
298    /// ```
299    ///
300    /// # Errors
301    ///
302    /// Returns error if argument has length zero, or sizes of nested slices don't match.
303    #[deprecated(
304        since = "28.0.0",
305        note = "This function will fail if the iterator produces only None values; prefer `try_from_sparse_iter_with_size`"
306    )]
307    pub fn try_from_sparse_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
308    where
309        T: Iterator<Item = Option<U>>,
310        U: AsRef<[u8]>,
311    {
312        let mut len = 0;
313        let mut size = None;
314        let mut byte = 0;
315
316        let iter_size_hint = iter.size_hint().0;
317        let mut null_buf = MutableBuffer::new(bit_util::ceil(iter_size_hint, 8));
318        let mut buffer = MutableBuffer::new(0);
319
320        let mut prepend = 0;
321        iter.try_for_each(|item| -> Result<(), ArrowError> {
322            // extend null bitmask by one byte per each 8 items
323            if byte == 0 {
324                null_buf.push(0u8);
325                byte = 8;
326            }
327            byte -= 1;
328
329            if let Some(slice) = item {
330                let slice = slice.as_ref();
331                if let Some(size) = size {
332                    if size != slice.len() {
333                        return Err(ArrowError::InvalidArgumentError(format!(
334                            "Nested array size mismatch: one is {}, and the other is {}",
335                            size,
336                            slice.len()
337                        )));
338                    }
339                } else {
340                    let len = slice.len();
341                    size = Some(len);
342                    // Now that we know how large each element is we can reserve
343                    // sufficient capacity in the underlying mutable buffer for
344                    // the data.
345                    if let Some(capacity) = iter_size_hint.checked_mul(len) {
346                        buffer.reserve(capacity);
347                    }
348                    let prepend_zeros = slice.len().checked_mul(prepend).ok_or_else(|| {
349                        ArrowError::InvalidArgumentError(format!(
350                            "FixedSizeBinaryArray error: value size {} * prepend {prepend} exceeds usize",
351                            slice.len()
352                        ))
353                    })?;
354                    buffer.extend_zeros(prepend_zeros);
355                }
356                bit_util::set_bit(null_buf.as_slice_mut(), len);
357                buffer.extend_from_slice(slice);
358            } else if let Some(size) = size {
359                buffer.extend_zeros(size);
360            } else {
361                prepend += 1;
362            }
363
364            len += 1;
365
366            Ok(())
367        })?;
368
369        if len == 0 {
370            return Err(ArrowError::InvalidArgumentError(
371                "Input iterable argument has no data".to_owned(),
372            ));
373        }
374
375        let null_buf = BooleanBuffer::new(null_buf.into(), 0, len);
376        let nulls = Some(NullBuffer::new(null_buf)).filter(|n| n.null_count() > 0);
377
378        let size = size.unwrap_or(0);
379        Self::validate_lengths(size, len)?;
380        let size = size.try_into().map_err(|_| {
381            ArrowError::InvalidArgumentError(format!(
382                "FixedSizeBinaryArray value length exceeds i32, got {size}"
383            ))
384        })?;
385        Ok(Self {
386            data_type: DataType::FixedSizeBinary(size),
387            value_data: buffer.into(),
388            nulls,
389            value_length: size,
390            len,
391        })
392    }
393
394    /// Create an array from an iterable argument of sparse byte slices.
395    /// Sparsity means that items returned by the iterator are optional, i.e input argument can
396    /// contain `None` items. In cases where the iterator returns only `None` values, this
397    /// also takes a size parameter to ensure that the a valid FixedSizeBinaryArray is still
398    /// created.
399    ///
400    /// # Examples
401    ///
402    /// ```
403    /// use arrow_array::FixedSizeBinaryArray;
404    /// let input_arg = vec![
405    ///     None,
406    ///     Some(vec![7, 8]),
407    ///     Some(vec![9, 10]),
408    ///     None,
409    ///     Some(vec![13, 14]),
410    ///     None,
411    /// ];
412    /// let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap();
413    /// ```
414    ///
415    /// # Errors
416    ///
417    /// Returns error if argument has length zero, or sizes of nested slices don't match.
418    pub fn try_from_sparse_iter_with_size<T, U>(mut iter: T, size: i32) -> Result<Self, ArrowError>
419    where
420        T: Iterator<Item = Option<U>>,
421        U: AsRef<[u8]>,
422    {
423        let size_usize = size.to_usize().ok_or_else(|| {
424            ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {size}"))
425        })?;
426        let mut len = 0;
427        let mut byte = 0;
428
429        let iter_size_hint = iter.size_hint().0;
430        let mut null_buf = MutableBuffer::new(bit_util::ceil(iter_size_hint, 8));
431        let capacity = iter_size_hint.checked_mul(size_usize).ok_or_else(|| {
432            ArrowError::InvalidArgumentError(format!(
433                "FixedSizeBinaryArray error: value size {size_usize} * len hint {iter_size_hint} exceeds usize"
434            ))
435        })?;
436        let mut buffer = MutableBuffer::new(capacity);
437
438        iter.try_for_each(|item| -> Result<(), ArrowError> {
439            // extend null bitmask by one byte per each 8 items
440            if byte == 0 {
441                null_buf.push(0u8);
442                byte = 8;
443            }
444            byte -= 1;
445
446            if let Some(slice) = item {
447                let slice = slice.as_ref();
448                if size_usize != slice.len() {
449                    return Err(ArrowError::InvalidArgumentError(format!(
450                        "Nested array size mismatch: one is {}, and the other is {}",
451                        size,
452                        slice.len()
453                    )));
454                }
455
456                bit_util::set_bit(null_buf.as_slice_mut(), len);
457                buffer.extend_from_slice(slice);
458            } else {
459                buffer.extend_zeros(size_usize);
460            }
461
462            len += 1;
463
464            Ok(())
465        })?;
466
467        let null_buf = BooleanBuffer::new(null_buf.into(), 0, len);
468        let nulls = Some(NullBuffer::new(null_buf)).filter(|n| n.null_count() > 0);
469        Self::validate_lengths(size_usize, len)?;
470
471        Ok(Self {
472            data_type: DataType::FixedSizeBinary(size),
473            value_data: buffer.into(),
474            nulls,
475            len,
476            value_length: size,
477        })
478    }
479
480    /// Create an array from an iterable argument of byte slices.
481    ///
482    /// # Examples
483    ///
484    /// ```
485    /// use arrow_array::FixedSizeBinaryArray;
486    /// let input_arg = vec![
487    ///     vec![1, 2],
488    ///     vec![3, 4],
489    ///     vec![5, 6],
490    /// ];
491    /// let array = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap();
492    /// ```
493    ///
494    /// # Errors
495    ///
496    /// Returns error if argument has length zero, or sizes of nested slices don't match.
497    pub fn try_from_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
498    where
499        T: Iterator<Item = U>,
500        U: AsRef<[u8]>,
501    {
502        let mut len = 0;
503        let mut size = None;
504        let iter_size_hint = iter.size_hint().0;
505        let mut buffer = MutableBuffer::new(0);
506
507        iter.try_for_each(|item| -> Result<(), ArrowError> {
508            let slice = item.as_ref();
509            if let Some(size) = size {
510                if size != slice.len() {
511                    return Err(ArrowError::InvalidArgumentError(format!(
512                        "Nested array size mismatch: one is {}, and the other is {}",
513                        size,
514                        slice.len()
515                    )));
516                }
517            } else {
518                let len = slice.len();
519                size = Some(len);
520                if let Some(capacity) = iter_size_hint.checked_mul(len) {
521                    buffer.reserve(capacity);
522                }
523            }
524
525            buffer.extend_from_slice(slice);
526
527            len += 1;
528
529            Ok(())
530        })?;
531
532        if len == 0 {
533            return Err(ArrowError::InvalidArgumentError(
534                "Input iterable argument has no data".to_owned(),
535            ));
536        }
537
538        let size = size.unwrap_or(0);
539        Self::validate_lengths(size, len)?;
540        let size = size.try_into().map_err(|_| {
541            ArrowError::InvalidArgumentError(format!(
542                "FixedSizeBinaryArray value length exceeds i32, got {size}"
543            ))
544        })?;
545        Ok(Self {
546            data_type: DataType::FixedSizeBinary(size),
547            value_data: buffer.into(),
548            nulls: None,
549            value_length: size,
550            len,
551        })
552    }
553
554    #[inline]
555    fn value_offset_at(&self, i: usize) -> i32 {
556        self.value_length * i as i32
557    }
558
559    /// constructs a new iterator
560    pub fn iter(&self) -> FixedSizeBinaryIter<'_> {
561        FixedSizeBinaryIter::new(self)
562    }
563}
564
565impl From<ArrayData> for FixedSizeBinaryArray {
566    fn from(data: ArrayData) -> Self {
567        assert_eq!(
568            data.buffers().len(),
569            1,
570            "FixedSizeBinaryArray data should contain 1 buffer only (values)"
571        );
572        let value_length = match data.data_type() {
573            DataType::FixedSizeBinary(len) => *len,
574            _ => panic!("Expected data type to be FixedSizeBinary"),
575        };
576
577        let size = value_length
578            .to_usize()
579            .expect("FixedSizeBinaryArray value length must be non-negative");
580        Self::validate_lengths(size, data.len())
581            .expect("FixedSizeBinaryArray offsets must fit within i32");
582        let value_data = data.buffers()[0].slice_with_length(
583            data.offset().checked_mul(size).expect("offset overflow"),
584            data.len().checked_mul(size).expect("length overflow"),
585        );
586
587        Self {
588            data_type: data.data_type().clone(),
589            nulls: data.nulls().cloned(),
590            len: data.len(),
591            value_data,
592            value_length,
593        }
594    }
595}
596
597impl From<FixedSizeBinaryArray> for ArrayData {
598    fn from(array: FixedSizeBinaryArray) -> Self {
599        let builder = ArrayDataBuilder::new(array.data_type)
600            .len(array.len)
601            .buffers(vec![array.value_data])
602            .nulls(array.nulls);
603
604        unsafe { builder.build_unchecked() }
605    }
606}
607
608/// Creates a `FixedSizeBinaryArray` from `FixedSizeList<u8>` array
609impl From<FixedSizeListArray> for FixedSizeBinaryArray {
610    fn from(v: FixedSizeListArray) -> Self {
611        let value_len = v.value_length();
612        let v = v.into_data();
613        assert_eq!(
614            v.child_data().len(),
615            1,
616            "FixedSizeBinaryArray can only be created from list array of u8 values \
617             (i.e. FixedSizeList<PrimitiveArray<u8>>)."
618        );
619        let child_data = &v.child_data()[0];
620
621        assert_eq!(
622            child_data.child_data().len(),
623            0,
624            "FixedSizeBinaryArray can only be created from list array of u8 values \
625             (i.e. FixedSizeList<PrimitiveArray<u8>>)."
626        );
627        assert_eq!(
628            child_data.data_type(),
629            &DataType::UInt8,
630            "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays, mismatched data types."
631        );
632        assert_eq!(
633            child_data.null_count(),
634            0,
635            "The child array cannot contain null values."
636        );
637
638        let builder = ArrayData::builder(DataType::FixedSizeBinary(value_len))
639            .len(v.len())
640            .offset(v.offset())
641            .add_buffer(child_data.buffers()[0].slice(child_data.offset()))
642            .nulls(v.nulls().cloned());
643
644        let data = unsafe { builder.build_unchecked() };
645        Self::from(data)
646    }
647}
648
649impl From<Vec<Option<&[u8]>>> for FixedSizeBinaryArray {
650    fn from(v: Vec<Option<&[u8]>>) -> Self {
651        #[allow(deprecated)]
652        Self::try_from_sparse_iter(v.into_iter()).unwrap()
653    }
654}
655
656impl From<Vec<&[u8]>> for FixedSizeBinaryArray {
657    fn from(v: Vec<&[u8]>) -> Self {
658        Self::try_from_iter(v.into_iter()).unwrap()
659    }
660}
661
662impl<const N: usize> From<Vec<&[u8; N]>> for FixedSizeBinaryArray {
663    fn from(v: Vec<&[u8; N]>) -> Self {
664        Self::try_from_iter(v.into_iter()).unwrap()
665    }
666}
667
668impl std::fmt::Debug for FixedSizeBinaryArray {
669    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
670        write!(f, "FixedSizeBinaryArray<{}>\n[\n", self.value_length())?;
671        print_long_array(self, f, |array, index, f| {
672            std::fmt::Debug::fmt(&array.value(index), f)
673        })?;
674        write!(f, "]")
675    }
676}
677
678/// SAFETY: Correctly implements the contract of Arrow Arrays
679unsafe impl Array for FixedSizeBinaryArray {
680    fn as_any(&self) -> &dyn Any {
681        self
682    }
683
684    fn to_data(&self) -> ArrayData {
685        self.clone().into()
686    }
687
688    fn into_data(self) -> ArrayData {
689        self.into()
690    }
691
692    fn data_type(&self) -> &DataType {
693        &self.data_type
694    }
695
696    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
697        Arc::new(self.slice(offset, length))
698    }
699
700    fn len(&self) -> usize {
701        self.len
702    }
703
704    fn is_empty(&self) -> bool {
705        self.len == 0
706    }
707
708    fn shrink_to_fit(&mut self) {
709        self.value_data.shrink_to_fit();
710        if let Some(nulls) = &mut self.nulls {
711            nulls.shrink_to_fit();
712        }
713    }
714
715    fn offset(&self) -> usize {
716        0
717    }
718
719    fn nulls(&self) -> Option<&NullBuffer> {
720        self.nulls.as_ref()
721    }
722
723    fn logical_null_count(&self) -> usize {
724        // More efficient that the default implementation
725        self.null_count()
726    }
727
728    fn get_buffer_memory_size(&self) -> usize {
729        let mut sum = self.value_data.capacity();
730        if let Some(n) = &self.nulls {
731            sum += n.buffer().capacity();
732        }
733        sum
734    }
735
736    fn get_array_memory_size(&self) -> usize {
737        std::mem::size_of::<Self>() + self.get_buffer_memory_size()
738    }
739}
740
741impl<'a> ArrayAccessor for &'a FixedSizeBinaryArray {
742    type Item = &'a [u8];
743
744    fn value(&self, index: usize) -> Self::Item {
745        FixedSizeBinaryArray::value(self, index)
746    }
747
748    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
749        unsafe { FixedSizeBinaryArray::value_unchecked(self, index) }
750    }
751}
752
753impl<'a> IntoIterator for &'a FixedSizeBinaryArray {
754    type Item = Option<&'a [u8]>;
755    type IntoIter = FixedSizeBinaryIter<'a>;
756
757    fn into_iter(self) -> Self::IntoIter {
758        FixedSizeBinaryIter::<'a>::new(self)
759    }
760}
761
762#[cfg(test)]
763mod tests {
764    use super::*;
765    use crate::RecordBatch;
766    use arrow_schema::{Field, Schema};
767
768    #[test]
769    fn test_fixed_size_binary_array() {
770        let values: [u8; 15] = *b"hellotherearrow";
771
772        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
773            .len(3)
774            .add_buffer(Buffer::from(&values))
775            .build()
776            .unwrap();
777        let fixed_size_binary_array = FixedSizeBinaryArray::from(array_data);
778        assert_eq!(3, fixed_size_binary_array.len());
779        assert_eq!(0, fixed_size_binary_array.null_count());
780        assert_eq!(
781            [b'h', b'e', b'l', b'l', b'o'],
782            fixed_size_binary_array.value(0)
783        );
784        assert_eq!(
785            [b't', b'h', b'e', b'r', b'e'],
786            fixed_size_binary_array.value(1)
787        );
788        assert_eq!(
789            [b'a', b'r', b'r', b'o', b'w'],
790            fixed_size_binary_array.value(2)
791        );
792        assert_eq!(5, fixed_size_binary_array.value_length());
793        assert_eq!(10, fixed_size_binary_array.value_offset(2));
794        for i in 0..3 {
795            assert!(fixed_size_binary_array.is_valid(i));
796            assert!(!fixed_size_binary_array.is_null(i));
797        }
798
799        // Test binary array with offset
800        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
801            .len(2)
802            .offset(1)
803            .add_buffer(Buffer::from(&values))
804            .build()
805            .unwrap();
806        let fixed_size_binary_array = FixedSizeBinaryArray::from(array_data);
807        assert_eq!(
808            [b't', b'h', b'e', b'r', b'e'],
809            fixed_size_binary_array.value(0)
810        );
811        assert_eq!(
812            [b'a', b'r', b'r', b'o', b'w'],
813            fixed_size_binary_array.value(1)
814        );
815        assert_eq!(2, fixed_size_binary_array.len());
816        assert_eq!(0, fixed_size_binary_array.value_offset(0));
817        assert_eq!(5, fixed_size_binary_array.value_length());
818        assert_eq!(5, fixed_size_binary_array.value_offset(1));
819    }
820
821    #[test]
822    fn test_fixed_size_binary_array_from_fixed_size_list_array() {
823        let values = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13];
824        let values_data = ArrayData::builder(DataType::UInt8)
825            .len(12)
826            .offset(2)
827            .add_buffer(Buffer::from_slice_ref(values))
828            .build()
829            .unwrap();
830        // [null, [10, 11, 12, 13]]
831        let array_data = unsafe {
832            ArrayData::builder(DataType::FixedSizeList(
833                Arc::new(Field::new_list_field(DataType::UInt8, false)),
834                4,
835            ))
836            .len(2)
837            .offset(1)
838            .add_child_data(values_data)
839            .null_bit_buffer(Some(Buffer::from_slice_ref([0b101])))
840            .build_unchecked()
841        };
842        let list_array = FixedSizeListArray::from(array_data);
843        let binary_array = FixedSizeBinaryArray::from(list_array);
844
845        assert_eq!(2, binary_array.len());
846        assert_eq!(1, binary_array.null_count());
847        assert!(binary_array.is_null(0));
848        assert!(binary_array.is_valid(1));
849        assert_eq!(&[10, 11, 12, 13], binary_array.value(1));
850    }
851
852    #[test]
853    #[should_panic(
854        expected = "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays"
855    )]
856    // Different error messages, so skip for now
857    // https://github.com/apache/arrow-rs/issues/1545
858    #[cfg(not(feature = "force_validate"))]
859    fn test_fixed_size_binary_array_from_incorrect_fixed_size_list_array() {
860        let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
861        let values_data = ArrayData::builder(DataType::UInt32)
862            .len(12)
863            .add_buffer(Buffer::from_slice_ref(values))
864            .build()
865            .unwrap();
866
867        let array_data = unsafe {
868            ArrayData::builder(DataType::FixedSizeList(
869                Arc::new(Field::new_list_field(DataType::Binary, false)),
870                4,
871            ))
872            .len(3)
873            .add_child_data(values_data)
874            .build_unchecked()
875        };
876        let list_array = FixedSizeListArray::from(array_data);
877        drop(FixedSizeBinaryArray::from(list_array));
878    }
879
880    #[test]
881    #[should_panic(expected = "The child array cannot contain null values.")]
882    fn test_fixed_size_binary_array_from_fixed_size_list_array_with_child_nulls_failed() {
883        let values = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
884        let values_data = ArrayData::builder(DataType::UInt8)
885            .len(12)
886            .add_buffer(Buffer::from_slice_ref(values))
887            .null_bit_buffer(Some(Buffer::from_slice_ref([0b101010101010])))
888            .build()
889            .unwrap();
890
891        let array_data = unsafe {
892            ArrayData::builder(DataType::FixedSizeList(
893                Arc::new(Field::new_list_field(DataType::UInt8, false)),
894                4,
895            ))
896            .len(3)
897            .add_child_data(values_data)
898            .build_unchecked()
899        };
900        let list_array = FixedSizeListArray::from(array_data);
901        drop(FixedSizeBinaryArray::from(list_array));
902    }
903
904    #[test]
905    fn test_fixed_size_binary_array_fmt_debug() {
906        let values: [u8; 15] = *b"hellotherearrow";
907
908        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
909            .len(3)
910            .add_buffer(Buffer::from(&values))
911            .build()
912            .unwrap();
913        let arr = FixedSizeBinaryArray::from(array_data);
914        assert_eq!(
915            "FixedSizeBinaryArray<5>\n[\n  [104, 101, 108, 108, 111],\n  [116, 104, 101, 114, 101],\n  [97, 114, 114, 111, 119],\n]",
916            format!("{arr:?}")
917        );
918    }
919
920    #[test]
921    fn test_fixed_size_binary_array_from_iter() {
922        let input_arg = vec![vec![1, 2], vec![3, 4], vec![5, 6]];
923        let arr = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap();
924
925        assert_eq!(2, arr.value_length());
926        assert_eq!(3, arr.len())
927    }
928
929    #[test]
930    fn test_all_none_fixed_size_binary_array_from_sparse_iter() {
931        let none_option: Option<[u8; 32]> = None;
932        let input_arg = vec![none_option, none_option, none_option];
933        #[allow(deprecated)]
934        let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap();
935        assert_eq!(0, arr.value_length());
936        assert_eq!(3, arr.len())
937    }
938
939    #[test]
940    fn test_fixed_size_binary_array_from_sparse_iter() {
941        let input_arg = vec![
942            None,
943            Some(vec![7, 8]),
944            Some(vec![9, 10]),
945            None,
946            Some(vec![13, 14]),
947        ];
948        #[allow(deprecated)]
949        let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.iter().cloned()).unwrap();
950        assert_eq!(2, arr.value_length());
951        assert_eq!(5, arr.len());
952
953        let arr =
954            FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap();
955        assert_eq!(2, arr.value_length());
956        assert_eq!(5, arr.len());
957    }
958
959    #[test]
960    fn test_fixed_size_binary_array_from_sparse_iter_with_size_all_none() {
961        let input_arg = vec![None, None, None, None, None] as Vec<Option<Vec<u8>>>;
962
963        let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 16)
964            .unwrap();
965        assert_eq!(16, arr.value_length());
966        assert_eq!(5, arr.len())
967    }
968
969    #[test]
970    fn test_fixed_size_binary_array_from_vec() {
971        let values = vec!["one".as_bytes(), b"two", b"six", b"ten"];
972        let array = FixedSizeBinaryArray::from(values);
973        assert_eq!(array.len(), 4);
974        assert_eq!(array.null_count(), 0);
975        assert_eq!(array.logical_null_count(), 0);
976        assert_eq!(array.value(0), b"one");
977        assert_eq!(array.value(1), b"two");
978        assert_eq!(array.value(2), b"six");
979        assert_eq!(array.value(3), b"ten");
980        assert!(!array.is_null(0));
981        assert!(!array.is_null(1));
982        assert!(!array.is_null(2));
983        assert!(!array.is_null(3));
984    }
985
986    #[test]
987    #[should_panic(expected = "Nested array size mismatch: one is 3, and the other is 5")]
988    fn test_fixed_size_binary_array_from_vec_incorrect_length() {
989        let values = vec!["one".as_bytes(), b"two", b"three", b"four"];
990        let _ = FixedSizeBinaryArray::from(values);
991    }
992
993    #[test]
994    fn test_fixed_size_binary_array_from_opt_vec() {
995        let values = vec![
996            Some("one".as_bytes()),
997            Some(b"two"),
998            None,
999            Some(b"six"),
1000            Some(b"ten"),
1001        ];
1002        let array = FixedSizeBinaryArray::from(values);
1003        assert_eq!(array.len(), 5);
1004        assert_eq!(array.value(0), b"one");
1005        assert_eq!(array.value(1), b"two");
1006        assert_eq!(array.value(3), b"six");
1007        assert_eq!(array.value(4), b"ten");
1008        assert!(!array.is_null(0));
1009        assert!(!array.is_null(1));
1010        assert!(array.is_null(2));
1011        assert!(!array.is_null(3));
1012        assert!(!array.is_null(4));
1013    }
1014
1015    #[test]
1016    #[should_panic(expected = "Nested array size mismatch: one is 3, and the other is 5")]
1017    fn test_fixed_size_binary_array_from_opt_vec_incorrect_length() {
1018        let values = vec![
1019            Some("one".as_bytes()),
1020            Some(b"two"),
1021            None,
1022            Some(b"three"),
1023            Some(b"four"),
1024        ];
1025        let _ = FixedSizeBinaryArray::from(values);
1026    }
1027
1028    #[test]
1029    fn fixed_size_binary_array_all_null() {
1030        let data = vec![None] as Vec<Option<String>>;
1031        let array =
1032            FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.into_iter(), 0).unwrap();
1033        array
1034            .into_data()
1035            .validate_full()
1036            .expect("All null array has valid array data");
1037    }
1038
1039    #[test]
1040    // Test for https://github.com/apache/arrow-rs/issues/1390
1041    fn fixed_size_binary_array_all_null_in_batch_with_schema() {
1042        let schema = Schema::new(vec![Field::new("a", DataType::FixedSizeBinary(2), true)]);
1043
1044        let none_option: Option<[u8; 2]> = None;
1045        let item = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1046            vec![none_option, none_option, none_option].into_iter(),
1047            2,
1048        )
1049        .unwrap();
1050
1051        // Should not panic
1052        RecordBatch::try_new(Arc::new(schema), vec![Arc::new(item)]).unwrap();
1053    }
1054
1055    #[test]
1056    #[should_panic(
1057        expected = "Trying to access an element at index 4 from a FixedSizeBinaryArray of length 3"
1058    )]
1059    fn test_fixed_size_binary_array_get_value_index_out_of_bound() {
1060        let values = vec![Some("one".as_bytes()), Some(b"two"), None];
1061        let array = FixedSizeBinaryArray::from(values);
1062
1063        array.value(4);
1064    }
1065
1066    #[test]
1067    fn test_validate_lengths_allows_empty_array() {
1068        FixedSizeBinaryArray::validate_lengths(1024, 0).unwrap();
1069    }
1070
1071    #[test]
1072    fn test_validate_lengths_allows_i32_max_offset() {
1073        FixedSizeBinaryArray::validate_lengths(1, i32::MAX as usize).unwrap();
1074        FixedSizeBinaryArray::validate_lengths(262_176, 8191).unwrap();
1075    }
1076
1077    #[test]
1078    fn test_validate_lengths_rejects_offset_past_i32_max() {
1079        let err = FixedSizeBinaryArray::validate_lengths(262_177, 8192).unwrap_err();
1080        assert_eq!(
1081            err.to_string(),
1082            "Invalid argument error: FixedSizeBinaryArray error: value size 262177 * length 8192 exceeds maximum valid offset of 2147483647",
1083        );
1084    }
1085
1086    #[test]
1087    fn test_constructors() {
1088        let buffer = Buffer::from_vec(vec![0_u8; 10]);
1089        let a = FixedSizeBinaryArray::new(2, buffer.clone(), None);
1090        assert_eq!(a.len(), 5);
1091
1092        let nulls = NullBuffer::new_null(5);
1093        FixedSizeBinaryArray::new(2, buffer.clone(), Some(nulls));
1094
1095        let null_array = FixedSizeBinaryArray::new_null(4, 3);
1096        assert_eq!(null_array.len(), 3);
1097        assert_eq!(null_array.values().len(), 12);
1098
1099        let a = FixedSizeBinaryArray::new(3, buffer.clone(), None);
1100        assert_eq!(a.len(), 3);
1101
1102        let nulls = NullBuffer::new_null(3);
1103        FixedSizeBinaryArray::new(3, buffer.clone(), Some(nulls));
1104
1105        let err = FixedSizeBinaryArray::try_new(-1, buffer.clone(), None).unwrap_err();
1106
1107        assert_eq!(
1108            err.to_string(),
1109            "Invalid argument error: Size cannot be negative, got -1"
1110        );
1111
1112        let nulls = NullBuffer::new_null(3);
1113        let err = FixedSizeBinaryArray::try_new(2, buffer.clone(), Some(nulls)).unwrap_err();
1114        assert_eq!(
1115            err.to_string(),
1116            "Invalid argument error: Incorrect length of null buffer for FixedSizeBinaryArray, expected 5 got 3"
1117        );
1118
1119        let zero_sized = FixedSizeBinaryArray::new(0, Buffer::default(), None);
1120        assert_eq!(zero_sized.len(), 0);
1121
1122        let nulls = NullBuffer::new_null(3);
1123        let zero_sized_with_nulls = FixedSizeBinaryArray::new(0, Buffer::default(), Some(nulls));
1124        assert_eq!(zero_sized_with_nulls.len(), 3);
1125
1126        let zero_sized_with_non_empty_buffer_err =
1127            FixedSizeBinaryArray::try_new(0, buffer, None).unwrap_err();
1128        assert_eq!(
1129            zero_sized_with_non_empty_buffer_err.to_string(),
1130            "Invalid argument error: Buffer cannot have non-zero length if the item size is zero"
1131        );
1132    }
1133}