Skip to main content

arrow_array/array/
fixed_size_binary_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::array::print_long_array;
19use crate::iterator::FixedSizeBinaryIter;
20use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray, Scalar};
21use arrow_buffer::buffer::NullBuffer;
22use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer, bit_util};
23use arrow_data::{ArrayData, ArrayDataBuilder};
24use arrow_schema::{ArrowError, DataType};
25use std::any::Any;
26use std::sync::Arc;
27
28/// An array of [fixed-size binary values](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout)
29///
30/// Each element in a [`FixedSizeBinaryArray`] has `value_length` bytes, where
31/// `value_length` is defined by the schema.
32///
33/// This array type is useful for storing fixed-length values such as 16-byte
34/// UUIDs (`value_length = 16`).
35///
36/// # Layout
37///
38/// Values in a [`FixedSizeBinaryArray`] are stored contiguously in a single
39/// buffer. The byte offset for the `i`-th element can be calculated as
40/// `i * value_length`.
41///
42/// Nulls are stored in a standard optional Arrow [`NullBuffer`].
43///
44/// For example, a 100-value [`FixedSizeBinaryArray`] with `value_length = 12`
45/// is shown below.
46///
47/// ```text
48/// ┌──────────────────────────────────────────┐
49/// │ Computed byte offsets                    │
50/// │          ┌──────────────────────┐ ┌────┐ │
51/// │          │┌────────────────────┐│ │    │ │
52/// │       0  ││value 0  (12 bytes) ││ │ 1  │ │
53/// │          │├────────────────────┤│ │    │ │
54/// │       12 ││value 1  (12 bytes) ││ │ 0  │ │
55/// │          │├────────────────────┤│ │    │ │
56/// │       24 ││value 2  (12 bytes) ││ │ 1  │ │
57/// │          │└────────────────────┘│ │    │ │
58/// │          │         ...          │ │... │ │
59/// │          │┌───────────────────┐ │ │    │ │
60/// │     1188 ││value 99 (12 bytes)│ │ │ 1  │ │
61/// │          │└───────────────────┘ │ │    │ │
62/// │          └──────────────────────┘ └────┘ │
63/// │           value_data              nulls  │
64/// └──────────────────────────────────────────┘
65/// ```
66///
67/// # Examples
68///
69/// Create an array from an iterable argument of byte slices.
70///
71/// ```
72///    use arrow_array::{Array, FixedSizeBinaryArray};
73///    let input_arg = vec![ vec![1, 2], vec![3, 4], vec![5, 6] ];
74///    let arr = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap();
75///
76///    assert_eq!(3, arr.len());
77///
78/// ```
79/// Create an array from an iterable argument of sparse byte slices.
80/// Sparsity means that the input argument can contain `None` items.
81/// ```
82///    use arrow_array::{Array, FixedSizeBinaryArray};
83///    let input_arg = vec![ None, Some(vec![7, 8]), Some(vec![9, 10]), None, Some(vec![13, 14]) ];
84///    let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap();
85///    assert_eq!(5, arr.len())
86///
87/// ```
88///
89#[derive(Clone)]
90pub struct FixedSizeBinaryArray {
91    data_type: DataType, // Must be DataType::FixedSizeBinary(value_length)
92    value_data: Buffer,
93    nulls: Option<NullBuffer>,
94    len: usize,
95    value_length: i32,
96}
97
98impl FixedSizeBinaryArray {
99    /// Create a new [`FixedSizeBinaryArray`] with `value_length` bytes per element, panicking on
100    /// failure
101    ///
102    /// # Panics
103    ///
104    /// Panics if [`Self::try_new`] returns an error
105    pub fn new(value_length: i32, values: Buffer, nulls: Option<NullBuffer>) -> Self {
106        Self::try_new(value_length, values, nulls).unwrap()
107    }
108
109    /// Create a new [`Scalar`] from `value`
110    pub fn new_scalar(value: impl AsRef<[u8]>) -> Scalar<Self> {
111        let v = value.as_ref();
112        let value_length =
113            i32::try_from(v.len()).expect("FixedSizeBinaryArray value length exceeds i32");
114        Scalar::new(Self::new(value_length, Buffer::from(v), None))
115    }
116
117    /// Create a new [`FixedSizeBinaryArray`] from the provided parts, returning an error on failure
118    ///
119    /// Creating an array with `value_length == 0` will try to get the length from the null
120    /// buffer. If no null buffer is provided, the resulting array will have length zero.
121    ///
122    /// # Errors
123    ///
124    /// * `value_length < 0`
125    /// * `values.len() / value_length != nulls.len()`
126    /// * `value_length == 0 && values.len() != 0`
127    /// * `len * value_length > i32::MAX`
128    pub fn try_new(
129        value_length: i32,
130        values: Buffer,
131        nulls: Option<NullBuffer>,
132    ) -> Result<Self, ArrowError> {
133        let data_type = DataType::FixedSizeBinary(value_length);
134        let value_size = value_length.to_usize().ok_or_else(|| {
135            ArrowError::InvalidArgumentError(format!(
136                "Value length cannot be negative, got {value_length}"
137            ))
138        })?;
139
140        let len = match values.len().checked_div(value_size) {
141            Some(len) => {
142                if let Some(n) = nulls.as_ref() {
143                    if n.len() != len {
144                        return Err(ArrowError::InvalidArgumentError(format!(
145                            "Incorrect length of null buffer for FixedSizeBinaryArray, expected {} got {}",
146                            len,
147                            n.len(),
148                        )));
149                    }
150                }
151
152                len
153            }
154            None => {
155                if !values.is_empty() {
156                    return Err(ArrowError::InvalidArgumentError(
157                        "Buffer cannot have non-zero length if the value length is zero".to_owned(),
158                    ));
159                }
160
161                // If the value length is zero, try to determine the length from the null buffer
162                nulls.as_ref().map(|n| n.len()).unwrap_or(0)
163            }
164        };
165
166        Self::validate_lengths(value_size, len)?;
167
168        Ok(Self {
169            data_type,
170            value_data: values,
171            value_length,
172            nulls,
173            len,
174        })
175    }
176
177    /// Some calculations below use i32 arithmetic which can overflow when
178    /// valid offsets are past i32::MAX. Until that is solved for real do not
179    /// permit constructing any FixedSizeBinaryArray that has a valid offset
180    /// past i32::MAX
181    fn validate_lengths(value_size: usize, len: usize) -> Result<(), ArrowError> {
182        // the offset is also calculated for the next element (i + 1) so
183        // check `len` (not last element index) to ensure that all offsets are valid
184        let max_offset = value_size.checked_mul(len).ok_or_else(|| {
185            ArrowError::InvalidArgumentError(format!(
186                "FixedSizeBinaryArray error: value size {value_size} * len {len} exceeds maximum valid offset"
187            ))
188        })?;
189
190        let max_valid_offset: usize = i32::MAX.try_into().map_err(|_| {
191            ArrowError::InvalidArgumentError(format!(
192                "FixedSizeBinaryArray error: maximum valid offset exceeds i32::MAX, got {max_offset}"
193            ))
194        })?;
195
196        if max_offset > max_valid_offset {
197            return Err(ArrowError::InvalidArgumentError(format!(
198                "FixedSizeBinaryArray error: value size {value_size} * length {len} exceeds maximum valid offset of {max_valid_offset}"
199            )));
200        };
201        Ok(())
202    }
203
204    /// Create a new [`FixedSizeBinaryArray`] of length `len` where all values are null
205    ///
206    /// # Panics
207    ///
208    /// Panics if
209    ///
210    /// * `value_length < 0`
211    /// * `value_length * len` would overflow `usize`
212    /// * `value_length * len > i32::MAX`
213    /// * `value_length * len * 8` would overflow `usize`
214    pub fn new_null(value_length: i32, len: usize) -> Self {
215        const BITS_IN_A_BYTE: usize = 8;
216        let value_size = value_length.to_usize().unwrap();
217        Self::validate_lengths(value_size, len).unwrap();
218        let capacity_in_bytes = value_size.checked_mul(len).unwrap();
219        let capacity_in_bits = capacity_in_bytes.checked_mul(BITS_IN_A_BYTE).unwrap();
220        Self {
221            data_type: DataType::FixedSizeBinary(value_length),
222            value_data: MutableBuffer::new_null(capacity_in_bits).into(),
223            nulls: Some(NullBuffer::new_null(len)),
224            value_length,
225            len,
226        }
227    }
228
229    /// Deconstruct this array into its constituent parts
230    pub fn into_parts(self) -> (i32, Buffer, Option<NullBuffer>) {
231        (self.value_length, self.value_data, self.nulls)
232    }
233
234    /// Returns the element at index `i` as a byte slice.
235    ///
236    /// Note: This method does not check for nulls and the value is arbitrary
237    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
238    ///
239    /// # Panics
240    /// Panics if index `i` is out of bounds.
241    pub fn value(&self, i: usize) -> &[u8] {
242        assert!(
243            i < self.len(),
244            "Trying to access an element at index {} from a FixedSizeBinaryArray of length {}",
245            i,
246            self.len()
247        );
248        let offset = i + self.offset();
249        unsafe {
250            let pos = self.value_offset_at(offset);
251            std::slice::from_raw_parts(
252                self.value_data.as_ptr().offset(pos as isize),
253                (self.value_offset_at(offset + 1) - pos) as usize,
254            )
255        }
256    }
257
258    /// Returns the element at index `i` as a byte slice.
259    ///
260    /// Note: This method does not check for nulls and the value is arbitrary
261    /// if [`is_null`](Self::is_null) returns true for the index.
262    ///
263    /// # Safety
264    ///
265    /// Caller is responsible for ensuring that the index is within the bounds
266    /// of the array
267    pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
268        let offset = i + self.offset();
269        let pos = self.value_offset_at(offset);
270        unsafe {
271            std::slice::from_raw_parts(
272                self.value_data.as_ptr().offset(pos as isize),
273                (self.value_offset_at(offset + 1) - pos) as usize,
274            )
275        }
276    }
277
278    /// Returns the offset for the element at index `i`.
279    ///
280    /// Note this doesn't do any bound checking, for performance reason.
281    #[inline]
282    pub fn value_offset(&self, i: usize) -> i32 {
283        self.value_offset_at(self.offset() + i)
284    }
285
286    /// Returns the length for an element.
287    ///
288    /// All elements have the same length as the array is a fixed size.
289    #[inline]
290    pub fn value_length(&self) -> i32 {
291        self.value_length
292    }
293
294    /// Returns the values of this array.
295    ///
296    /// Unlike [`Self::value_data`] this returns the [`Buffer`]
297    /// allowing for zero-copy cloning.
298    #[inline]
299    pub fn values(&self) -> &Buffer {
300        &self.value_data
301    }
302
303    /// Returns the raw value data.
304    pub fn value_data(&self) -> &[u8] {
305        self.value_data.as_slice()
306    }
307
308    /// Returns a zero-copy slice of this array with the indicated offset and length.
309    pub fn slice(&self, offset: usize, len: usize) -> Self {
310        assert!(
311            offset.saturating_add(len) <= self.len,
312            "the length + offset of the sliced FixedSizeBinaryArray cannot exceed the existing length"
313        );
314
315        let size = self.value_length as usize;
316
317        Self {
318            data_type: self.data_type.clone(),
319            nulls: self.nulls.as_ref().map(|n| n.slice(offset, len)),
320            value_length: self.value_length,
321            value_data: self.value_data.slice_with_length(offset * size, len * size),
322            len,
323        }
324    }
325
326    /// Create an array from an iterable argument of sparse byte slices.
327    /// Sparsity means that items returned by the iterator are optional, i.e input argument can
328    /// contain `None` items.
329    ///
330    /// # Examples
331    ///
332    /// ```
333    /// use arrow_array::FixedSizeBinaryArray;
334    /// let input_arg = vec![
335    ///     None,
336    ///     Some(vec![7, 8]),
337    ///     Some(vec![9, 10]),
338    ///     None,
339    ///     Some(vec![13, 14]),
340    ///     None,
341    /// ];
342    /// let array = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap();
343    /// ```
344    ///
345    /// # Errors
346    ///
347    /// Returns error if argument has length zero, or sizes of nested slices don't match.
348    #[deprecated(
349        since = "28.0.0",
350        note = "This function will fail if the iterator produces only None values; prefer `try_from_sparse_iter_with_size`"
351    )]
352    pub fn try_from_sparse_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
353    where
354        T: Iterator<Item = Option<U>>,
355        U: AsRef<[u8]>,
356    {
357        let mut len = 0;
358        let mut value_size = None;
359        let mut byte = 0;
360
361        let iter_size_hint = iter.size_hint().0;
362        let mut null_buf = MutableBuffer::new(bit_util::ceil(iter_size_hint, 8));
363        let mut buffer = MutableBuffer::new(0);
364
365        let mut prepend = 0;
366        iter.try_for_each(|item| -> Result<(), ArrowError> {
367            // extend null bitmask by one byte per each 8 items
368            if byte == 0 {
369                null_buf.push(0u8);
370                byte = 8;
371            }
372            byte -= 1;
373
374            if let Some(slice) = item {
375                let slice = slice.as_ref();
376                if let Some(size) = value_size {
377                    if size != slice.len() {
378                        return Err(ArrowError::InvalidArgumentError(format!(
379                            "Nested array size mismatch: one is {}, and the other is {}",
380                            size,
381                            slice.len()
382                        )));
383                    }
384                } else {
385                    let len = slice.len();
386                    value_size = Some(len);
387                    // Now that we know how large each element is we can reserve
388                    // sufficient capacity in the underlying mutable buffer for
389                    // the data.
390                    if let Some(capacity) = iter_size_hint.checked_mul(len) {
391                        buffer.reserve(capacity);
392                    }
393                    let prepend_zeros = slice.len().checked_mul(prepend).ok_or_else(|| {
394                        ArrowError::InvalidArgumentError(format!(
395                            "FixedSizeBinaryArray error: value size {} * prepend {prepend} exceeds usize",
396                            slice.len()
397                        ))
398                    })?;
399                    buffer.extend_zeros(prepend_zeros);
400                }
401                bit_util::set_bit(null_buf.as_slice_mut(), len);
402                buffer.extend_from_slice(slice);
403            } else if let Some(size) = value_size {
404                buffer.extend_zeros(size);
405            } else {
406                prepend += 1;
407            }
408
409            len += 1;
410
411            Ok(())
412        })?;
413
414        if len == 0 {
415            return Err(ArrowError::InvalidArgumentError(
416                "Input iterable argument has no data".to_owned(),
417            ));
418        }
419
420        let nulls = NullBuffer::from_unsliced_buffer(null_buf, len);
421
422        let value_size = value_size.unwrap_or(0);
423        Self::validate_lengths(value_size, len)?;
424        let value_length = value_size.try_into().map_err(|_| {
425            ArrowError::InvalidArgumentError(format!(
426                "FixedSizeBinaryArray value length exceeds i32, got {value_size}"
427            ))
428        })?;
429        Ok(Self {
430            data_type: DataType::FixedSizeBinary(value_length),
431            value_data: buffer.into(),
432            nulls,
433            value_length,
434            len,
435        })
436    }
437
438    /// Create an array from an iterable argument of sparse byte slices.
439    /// Sparsity means that items returned by the iterator are optional, i.e input argument can
440    /// contain `None` items. In cases where the iterator returns only `None` values, this
441    /// also takes a `value_length` parameter to ensure that a valid
442    /// [`FixedSizeBinaryArray`] is still created.
443    ///
444    /// # Examples
445    ///
446    /// ```
447    /// use arrow_array::FixedSizeBinaryArray;
448    /// let input_arg = vec![
449    ///     None,
450    ///     Some(vec![7, 8]),
451    ///     Some(vec![9, 10]),
452    ///     None,
453    ///     Some(vec![13, 14]),
454    ///     None,
455    /// ];
456    /// let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap();
457    /// ```
458    ///
459    /// # Errors
460    ///
461    /// Returns error if argument has length zero, or sizes of nested slices don't match.
462    pub fn try_from_sparse_iter_with_size<T, U>(
463        mut iter: T,
464        value_length: i32,
465    ) -> Result<Self, ArrowError>
466    where
467        T: Iterator<Item = Option<U>>,
468        U: AsRef<[u8]>,
469    {
470        let value_size = value_length.to_usize().ok_or_else(|| {
471            ArrowError::InvalidArgumentError(format!(
472                "Value length cannot be negative, got {value_length}"
473            ))
474        })?;
475        let mut len = 0;
476        let mut byte = 0;
477
478        let iter_size_hint = iter.size_hint().0;
479        let mut null_buf = MutableBuffer::new(bit_util::ceil(iter_size_hint, 8));
480        let capacity = iter_size_hint.checked_mul(value_size).ok_or_else(|| {
481            ArrowError::InvalidArgumentError(format!(
482                "FixedSizeBinaryArray error: value size {value_size} * len hint {iter_size_hint} exceeds usize"
483            ))
484        })?;
485        let mut buffer = MutableBuffer::new(capacity);
486
487        iter.try_for_each(|item| -> Result<(), ArrowError> {
488            // extend null bitmask by one byte per each 8 items
489            if byte == 0 {
490                null_buf.push(0u8);
491                byte = 8;
492            }
493            byte -= 1;
494
495            if let Some(slice) = item {
496                let slice = slice.as_ref();
497                if value_size != slice.len() {
498                    return Err(ArrowError::InvalidArgumentError(format!(
499                        "Nested array size mismatch: one is {}, and the other is {}",
500                        value_length,
501                        slice.len()
502                    )));
503                }
504
505                bit_util::set_bit(null_buf.as_slice_mut(), len);
506                buffer.extend_from_slice(slice);
507            } else {
508                buffer.extend_zeros(value_size);
509            }
510
511            len += 1;
512
513            Ok(())
514        })?;
515
516        let nulls = NullBuffer::from_unsliced_buffer(null_buf, len);
517        Self::validate_lengths(value_size, len)?;
518
519        Ok(Self {
520            data_type: DataType::FixedSizeBinary(value_length),
521            value_data: buffer.into(),
522            nulls,
523            len,
524            value_length,
525        })
526    }
527
528    /// Create an array from an iterable argument of byte slices.
529    ///
530    /// # Examples
531    ///
532    /// ```
533    /// use arrow_array::FixedSizeBinaryArray;
534    /// let input_arg = vec![
535    ///     vec![1, 2],
536    ///     vec![3, 4],
537    ///     vec![5, 6],
538    /// ];
539    /// let array = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap();
540    /// ```
541    ///
542    /// # Errors
543    ///
544    /// Returns error if argument has length zero, or sizes of nested slices don't match.
545    pub fn try_from_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
546    where
547        T: Iterator<Item = U>,
548        U: AsRef<[u8]>,
549    {
550        let mut len = 0;
551        let mut value_size = None;
552        let iter_size_hint = iter.size_hint().0;
553        let mut buffer = MutableBuffer::new(0);
554
555        iter.try_for_each(|item| -> Result<(), ArrowError> {
556            let slice = item.as_ref();
557            if let Some(value_size) = value_size {
558                if value_size != slice.len() {
559                    return Err(ArrowError::InvalidArgumentError(format!(
560                        "Nested array size mismatch: one is {value_size}, and the other is {}",
561                        slice.len()
562                    )));
563                }
564            } else {
565                let len = slice.len();
566                value_size = Some(len);
567                if let Some(capacity) = iter_size_hint.checked_mul(len) {
568                    buffer.reserve(capacity);
569                }
570            }
571
572            buffer.extend_from_slice(slice);
573
574            len += 1;
575
576            Ok(())
577        })?;
578
579        if len == 0 {
580            return Err(ArrowError::InvalidArgumentError(
581                "Input iterable argument has no data".to_owned(),
582            ));
583        }
584
585        let value_size = value_size.unwrap_or(0);
586        Self::validate_lengths(value_size, len)?;
587        let value_length = value_size.try_into().map_err(|_| {
588            ArrowError::InvalidArgumentError(format!(
589                "FixedSizeBinaryArray value length exceeds i32, got {value_size}"
590            ))
591        })?;
592        Ok(Self {
593            data_type: DataType::FixedSizeBinary(value_length),
594            value_data: buffer.into(),
595            nulls: None,
596            value_length,
597            len,
598        })
599    }
600
601    #[inline]
602    fn value_offset_at(&self, i: usize) -> i32 {
603        self.value_length * i as i32
604    }
605
606    /// constructs a new iterator
607    pub fn iter(&self) -> FixedSizeBinaryIter<'_> {
608        FixedSizeBinaryIter::new(self)
609    }
610}
611
612impl From<ArrayData> for FixedSizeBinaryArray {
613    fn from(data: ArrayData) -> Self {
614        let (data_type, len, nulls, offset, buffers, _child_data) = data.into_parts();
615
616        assert_eq!(
617            buffers.len(),
618            1,
619            "FixedSizeBinaryArray data should contain 1 buffer only (values)"
620        );
621        let value_length = match data_type {
622            DataType::FixedSizeBinary(len) => len,
623            _ => panic!("Expected data type to be FixedSizeBinary"),
624        };
625
626        let value_size = value_length
627            .to_usize()
628            .expect("FixedSizeBinaryArray value length must be non-negative");
629        Self::validate_lengths(value_size, len)
630            .expect("FixedSizeBinaryArray offsets must fit within i32");
631        let value_data = buffers[0].slice_with_length(
632            offset.checked_mul(value_size).expect("offset overflow"),
633            len.checked_mul(value_size).expect("length overflow"),
634        );
635
636        Self {
637            data_type,
638            nulls,
639            len,
640            value_data,
641            value_length,
642        }
643    }
644}
645
646impl From<FixedSizeBinaryArray> for ArrayData {
647    fn from(array: FixedSizeBinaryArray) -> Self {
648        let builder = ArrayDataBuilder::new(array.data_type)
649            .len(array.len)
650            .buffers(vec![array.value_data])
651            .nulls(array.nulls);
652
653        unsafe { builder.build_unchecked() }
654    }
655}
656
657/// Creates a `FixedSizeBinaryArray` from `FixedSizeList<u8>` array
658impl From<FixedSizeListArray> for FixedSizeBinaryArray {
659    fn from(v: FixedSizeListArray) -> Self {
660        let value_len = v.value_length();
661        let v = v.into_data();
662        assert_eq!(
663            v.child_data().len(),
664            1,
665            "FixedSizeBinaryArray can only be created from list array of u8 values \
666             (i.e. FixedSizeList<PrimitiveArray<u8>>)."
667        );
668        let child_data = &v.child_data()[0];
669
670        assert_eq!(
671            child_data.child_data().len(),
672            0,
673            "FixedSizeBinaryArray can only be created from list array of u8 values \
674             (i.e. FixedSizeList<PrimitiveArray<u8>>)."
675        );
676        assert_eq!(
677            child_data.data_type(),
678            &DataType::UInt8,
679            "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays, mismatched data types."
680        );
681        assert_eq!(
682            child_data.null_count(),
683            0,
684            "The child array cannot contain null values."
685        );
686
687        let builder = ArrayData::builder(DataType::FixedSizeBinary(value_len))
688            .len(v.len())
689            .offset(v.offset())
690            .add_buffer(child_data.buffers()[0].slice(child_data.offset()))
691            .nulls(v.nulls().cloned());
692
693        let data = unsafe { builder.build_unchecked() };
694        Self::from(data)
695    }
696}
697
698impl From<Vec<Option<&[u8]>>> for FixedSizeBinaryArray {
699    fn from(v: Vec<Option<&[u8]>>) -> Self {
700        #[allow(deprecated)]
701        Self::try_from_sparse_iter(v.into_iter()).unwrap()
702    }
703}
704
705impl From<Vec<&[u8]>> for FixedSizeBinaryArray {
706    fn from(v: Vec<&[u8]>) -> Self {
707        Self::try_from_iter(v.into_iter()).unwrap()
708    }
709}
710
711impl<const N: usize> From<Vec<&[u8; N]>> for FixedSizeBinaryArray {
712    fn from(v: Vec<&[u8; N]>) -> Self {
713        Self::try_from_iter(v.into_iter()).unwrap()
714    }
715}
716
717impl std::fmt::Debug for FixedSizeBinaryArray {
718    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
719        write!(f, "FixedSizeBinaryArray<{}>\n[\n", self.value_length())?;
720        print_long_array(self, f, |array, index, f| {
721            std::fmt::Debug::fmt(&array.value(index), f)
722        })?;
723        write!(f, "]")
724    }
725}
726
727/// SAFETY: Correctly implements the contract of Arrow Arrays
728unsafe impl Array for FixedSizeBinaryArray {
729    fn as_any(&self) -> &dyn Any {
730        self
731    }
732
733    fn to_data(&self) -> ArrayData {
734        self.clone().into()
735    }
736
737    fn into_data(self) -> ArrayData {
738        self.into()
739    }
740
741    fn data_type(&self) -> &DataType {
742        &self.data_type
743    }
744
745    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
746        Arc::new(self.slice(offset, length))
747    }
748
749    fn len(&self) -> usize {
750        self.len
751    }
752
753    fn is_empty(&self) -> bool {
754        self.len == 0
755    }
756
757    fn shrink_to_fit(&mut self) {
758        self.value_data.shrink_to_fit();
759        if let Some(nulls) = &mut self.nulls {
760            nulls.shrink_to_fit();
761        }
762    }
763
764    fn offset(&self) -> usize {
765        // Slices are normalized by slicing `value_data`/`nulls` directly;
766        // FSB does not retain a separate logical element offset.
767        0
768    }
769
770    fn nulls(&self) -> Option<&NullBuffer> {
771        self.nulls.as_ref()
772    }
773
774    fn logical_null_count(&self) -> usize {
775        // More efficient that the default implementation
776        self.null_count()
777    }
778
779    fn get_buffer_memory_size(&self) -> usize {
780        let mut sum = self.value_data.capacity();
781        if let Some(n) = &self.nulls {
782            sum += n.buffer().capacity();
783        }
784        sum
785    }
786
787    fn get_array_memory_size(&self) -> usize {
788        std::mem::size_of::<Self>() + self.get_buffer_memory_size()
789    }
790
791    #[cfg(feature = "pool")]
792    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
793        self.value_data.claim(pool);
794        if let Some(nulls) = &self.nulls {
795            nulls.claim(pool);
796        }
797    }
798}
799
800impl<'a> ArrayAccessor for &'a FixedSizeBinaryArray {
801    type Item = &'a [u8];
802
803    fn value(&self, index: usize) -> Self::Item {
804        FixedSizeBinaryArray::value(self, index)
805    }
806
807    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
808        unsafe { FixedSizeBinaryArray::value_unchecked(self, index) }
809    }
810}
811
812impl<'a> IntoIterator for &'a FixedSizeBinaryArray {
813    type Item = Option<&'a [u8]>;
814    type IntoIter = FixedSizeBinaryIter<'a>;
815
816    fn into_iter(self) -> Self::IntoIter {
817        FixedSizeBinaryIter::<'a>::new(self)
818    }
819}
820
821#[cfg(test)]
822mod tests {
823    use super::*;
824    use crate::RecordBatch;
825    use arrow_schema::{Field, Schema};
826
827    #[test]
828    fn test_fixed_size_binary_array() {
829        let values: [u8; 15] = *b"hellotherearrow";
830
831        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
832            .len(3)
833            .add_buffer(Buffer::from(&values))
834            .build()
835            .unwrap();
836        let fixed_size_binary_array = FixedSizeBinaryArray::from(array_data);
837        assert_eq!(3, fixed_size_binary_array.len());
838        assert_eq!(0, fixed_size_binary_array.null_count());
839        assert_eq!(
840            [b'h', b'e', b'l', b'l', b'o'],
841            fixed_size_binary_array.value(0)
842        );
843        assert_eq!(
844            [b't', b'h', b'e', b'r', b'e'],
845            fixed_size_binary_array.value(1)
846        );
847        assert_eq!(
848            [b'a', b'r', b'r', b'o', b'w'],
849            fixed_size_binary_array.value(2)
850        );
851        assert_eq!(5, fixed_size_binary_array.value_length());
852        assert_eq!(10, fixed_size_binary_array.value_offset(2));
853        for i in 0..3 {
854            assert!(fixed_size_binary_array.is_valid(i));
855            assert!(!fixed_size_binary_array.is_null(i));
856        }
857
858        // Test binary array with offset
859        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
860            .len(2)
861            .offset(1)
862            .add_buffer(Buffer::from(&values))
863            .build()
864            .unwrap();
865        let fixed_size_binary_array = FixedSizeBinaryArray::from(array_data);
866        assert_eq!(
867            [b't', b'h', b'e', b'r', b'e'],
868            fixed_size_binary_array.value(0)
869        );
870        assert_eq!(
871            [b'a', b'r', b'r', b'o', b'w'],
872            fixed_size_binary_array.value(1)
873        );
874        assert_eq!(2, fixed_size_binary_array.len());
875        assert_eq!(0, fixed_size_binary_array.value_offset(0));
876        assert_eq!(5, fixed_size_binary_array.value_length());
877        assert_eq!(5, fixed_size_binary_array.value_offset(1));
878    }
879
880    #[test]
881    fn test_fixed_size_binary_array_from_fixed_size_list_array() {
882        let values = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13];
883        let values_data = ArrayData::builder(DataType::UInt8)
884            .len(12)
885            .offset(2)
886            .add_buffer(Buffer::from_slice_ref(values))
887            .build()
888            .unwrap();
889        // [null, [10, 11, 12, 13]]
890        let array_data = unsafe {
891            ArrayData::builder(DataType::FixedSizeList(
892                Arc::new(Field::new_list_field(DataType::UInt8, false)),
893                4,
894            ))
895            .len(2)
896            .offset(1)
897            .add_child_data(values_data)
898            .null_bit_buffer(Some(Buffer::from_slice_ref([0b101])))
899            .build_unchecked()
900        };
901        let list_array = FixedSizeListArray::from(array_data);
902        let binary_array = FixedSizeBinaryArray::from(list_array);
903
904        assert_eq!(2, binary_array.len());
905        assert_eq!(1, binary_array.null_count());
906        assert!(binary_array.is_null(0));
907        assert!(binary_array.is_valid(1));
908        assert_eq!(&[10, 11, 12, 13], binary_array.value(1));
909    }
910
911    #[test]
912    #[should_panic(
913        expected = "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays"
914    )]
915    // Different error messages, so skip for now
916    // https://github.com/apache/arrow-rs/issues/1545
917    #[cfg(not(feature = "force_validate"))]
918    fn test_fixed_size_binary_array_from_incorrect_fixed_size_list_array() {
919        let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
920        let values_data = ArrayData::builder(DataType::UInt32)
921            .len(12)
922            .add_buffer(Buffer::from_slice_ref(values))
923            .build()
924            .unwrap();
925
926        let array_data = unsafe {
927            ArrayData::builder(DataType::FixedSizeList(
928                Arc::new(Field::new_list_field(DataType::Binary, false)),
929                4,
930            ))
931            .len(3)
932            .add_child_data(values_data)
933            .build_unchecked()
934        };
935        let list_array = FixedSizeListArray::from(array_data);
936        drop(FixedSizeBinaryArray::from(list_array));
937    }
938
939    #[test]
940    #[should_panic(expected = "The child array cannot contain null values.")]
941    fn test_fixed_size_binary_array_from_fixed_size_list_array_with_child_nulls_failed() {
942        let values = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
943        let values_data = ArrayData::builder(DataType::UInt8)
944            .len(12)
945            .add_buffer(Buffer::from_slice_ref(values))
946            .null_bit_buffer(Some(Buffer::from_slice_ref([0b101010101010])))
947            .build()
948            .unwrap();
949
950        let array_data = unsafe {
951            ArrayData::builder(DataType::FixedSizeList(
952                Arc::new(Field::new_list_field(DataType::UInt8, false)),
953                4,
954            ))
955            .len(3)
956            .add_child_data(values_data)
957            .build_unchecked()
958        };
959        let list_array = FixedSizeListArray::from(array_data);
960        drop(FixedSizeBinaryArray::from(list_array));
961    }
962
963    #[test]
964    fn test_fixed_size_binary_array_fmt_debug() {
965        let values: [u8; 15] = *b"hellotherearrow";
966
967        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
968            .len(3)
969            .add_buffer(Buffer::from(&values))
970            .build()
971            .unwrap();
972        let arr = FixedSizeBinaryArray::from(array_data);
973        assert_eq!(
974            "FixedSizeBinaryArray<5>\n[\n  [104, 101, 108, 108, 111],\n  [116, 104, 101, 114, 101],\n  [97, 114, 114, 111, 119],\n]",
975            format!("{arr:?}")
976        );
977    }
978
979    #[test]
980    fn test_fixed_size_binary_array_from_iter() {
981        let input_arg = vec![vec![1, 2], vec![3, 4], vec![5, 6]];
982        let arr = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap();
983
984        assert_eq!(2, arr.value_length());
985        assert_eq!(3, arr.len())
986    }
987
988    #[test]
989    fn test_all_none_fixed_size_binary_array_from_sparse_iter() {
990        let none_option: Option<[u8; 32]> = None;
991        let input_arg = vec![none_option, none_option, none_option];
992        #[allow(deprecated)]
993        let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap();
994        assert_eq!(0, arr.value_length());
995        assert_eq!(3, arr.len())
996    }
997
998    #[test]
999    fn test_fixed_size_binary_array_from_sparse_iter() {
1000        let input_arg = vec![
1001            None,
1002            Some(vec![7, 8]),
1003            Some(vec![9, 10]),
1004            None,
1005            Some(vec![13, 14]),
1006        ];
1007        #[allow(deprecated)]
1008        let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.iter().cloned()).unwrap();
1009        assert_eq!(2, arr.value_length());
1010        assert_eq!(5, arr.len());
1011
1012        let arr =
1013            FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap();
1014        assert_eq!(2, arr.value_length());
1015        assert_eq!(5, arr.len());
1016    }
1017
1018    #[test]
1019    fn test_fixed_size_binary_array_from_sparse_iter_with_size_all_none() {
1020        let input_arg = vec![None, None, None, None, None] as Vec<Option<Vec<u8>>>;
1021
1022        let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 16)
1023            .unwrap();
1024        assert_eq!(16, arr.value_length());
1025        assert_eq!(5, arr.len())
1026    }
1027
1028    #[test]
1029    fn test_fixed_size_binary_array_from_vec() {
1030        let values = vec!["one".as_bytes(), b"two", b"six", b"ten"];
1031        let array = FixedSizeBinaryArray::from(values);
1032        assert_eq!(array.len(), 4);
1033        assert_eq!(array.null_count(), 0);
1034        assert_eq!(array.logical_null_count(), 0);
1035        assert_eq!(array.value(0), b"one");
1036        assert_eq!(array.value(1), b"two");
1037        assert_eq!(array.value(2), b"six");
1038        assert_eq!(array.value(3), b"ten");
1039        assert!(!array.is_null(0));
1040        assert!(!array.is_null(1));
1041        assert!(!array.is_null(2));
1042        assert!(!array.is_null(3));
1043    }
1044
1045    #[test]
1046    #[should_panic(expected = "Nested array size mismatch: one is 3, and the other is 5")]
1047    fn test_fixed_size_binary_array_from_vec_incorrect_length() {
1048        let values = vec!["one".as_bytes(), b"two", b"three", b"four"];
1049        let _ = FixedSizeBinaryArray::from(values);
1050    }
1051
1052    #[test]
1053    fn test_fixed_size_binary_array_from_opt_vec() {
1054        let values = vec![
1055            Some("one".as_bytes()),
1056            Some(b"two"),
1057            None,
1058            Some(b"six"),
1059            Some(b"ten"),
1060        ];
1061        let array = FixedSizeBinaryArray::from(values);
1062        assert_eq!(array.len(), 5);
1063        assert_eq!(array.value(0), b"one");
1064        assert_eq!(array.value(1), b"two");
1065        assert_eq!(array.value(3), b"six");
1066        assert_eq!(array.value(4), b"ten");
1067        assert!(!array.is_null(0));
1068        assert!(!array.is_null(1));
1069        assert!(array.is_null(2));
1070        assert!(!array.is_null(3));
1071        assert!(!array.is_null(4));
1072    }
1073
1074    #[test]
1075    #[should_panic(expected = "Nested array size mismatch: one is 3, and the other is 5")]
1076    fn test_fixed_size_binary_array_from_opt_vec_incorrect_length() {
1077        let values = vec![
1078            Some("one".as_bytes()),
1079            Some(b"two"),
1080            None,
1081            Some(b"three"),
1082            Some(b"four"),
1083        ];
1084        let _ = FixedSizeBinaryArray::from(values);
1085    }
1086
1087    #[test]
1088    fn fixed_size_binary_array_all_null() {
1089        let data = vec![None] as Vec<Option<String>>;
1090        let array =
1091            FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.into_iter(), 0).unwrap();
1092        array
1093            .into_data()
1094            .validate_full()
1095            .expect("All null array has valid array data");
1096    }
1097
1098    #[test]
1099    // Test for https://github.com/apache/arrow-rs/issues/1390
1100    fn fixed_size_binary_array_all_null_in_batch_with_schema() {
1101        let schema = Schema::new(vec![Field::new("a", DataType::FixedSizeBinary(2), true)]);
1102
1103        let none_option: Option<[u8; 2]> = None;
1104        let item = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1105            vec![none_option, none_option, none_option].into_iter(),
1106            2,
1107        )
1108        .unwrap();
1109
1110        // Should not panic
1111        RecordBatch::try_new(Arc::new(schema), vec![Arc::new(item)]).unwrap();
1112    }
1113
1114    #[test]
1115    #[should_panic(
1116        expected = "Trying to access an element at index 4 from a FixedSizeBinaryArray of length 3"
1117    )]
1118    fn test_fixed_size_binary_array_get_value_index_out_of_bound() {
1119        let values = vec![Some("one".as_bytes()), Some(b"two"), None];
1120        let array = FixedSizeBinaryArray::from(values);
1121
1122        array.value(4);
1123    }
1124
1125    #[test]
1126    fn test_validate_lengths_allows_empty_array() {
1127        FixedSizeBinaryArray::validate_lengths(1024, 0).unwrap();
1128    }
1129
1130    #[test]
1131    fn test_validate_lengths_allows_i32_max_offset() {
1132        FixedSizeBinaryArray::validate_lengths(1, i32::MAX as usize).unwrap();
1133        FixedSizeBinaryArray::validate_lengths(262_176, 8191).unwrap();
1134    }
1135
1136    #[test]
1137    fn test_validate_lengths_rejects_offset_past_i32_max() {
1138        let err = FixedSizeBinaryArray::validate_lengths(262_177, 8192).unwrap_err();
1139        assert_eq!(
1140            err.to_string(),
1141            "Invalid argument error: FixedSizeBinaryArray error: value size 262177 * length 8192 exceeds maximum valid offset of 2147483647",
1142        );
1143    }
1144
1145    #[test]
1146    fn test_constructors() {
1147        let buffer = Buffer::from_vec(vec![0_u8; 10]);
1148        let a = FixedSizeBinaryArray::new(2, buffer.clone(), None);
1149        assert_eq!(a.len(), 5);
1150
1151        let nulls = NullBuffer::new_null(5);
1152        FixedSizeBinaryArray::new(2, buffer.clone(), Some(nulls));
1153
1154        let null_array = FixedSizeBinaryArray::new_null(4, 3);
1155        assert_eq!(null_array.len(), 3);
1156        assert_eq!(null_array.values().len(), 12);
1157
1158        let a = FixedSizeBinaryArray::new(3, buffer.clone(), None);
1159        assert_eq!(a.len(), 3);
1160
1161        let nulls = NullBuffer::new_null(3);
1162        FixedSizeBinaryArray::new(3, buffer.clone(), Some(nulls));
1163
1164        let err = FixedSizeBinaryArray::try_new(-1, buffer.clone(), None).unwrap_err();
1165
1166        assert_eq!(
1167            err.to_string(),
1168            "Invalid argument error: Value length cannot be negative, got -1"
1169        );
1170
1171        let nulls = NullBuffer::new_null(3);
1172        let err = FixedSizeBinaryArray::try_new(2, buffer.clone(), Some(nulls)).unwrap_err();
1173        assert_eq!(
1174            err.to_string(),
1175            "Invalid argument error: Incorrect length of null buffer for FixedSizeBinaryArray, expected 5 got 3"
1176        );
1177
1178        let zero_sized = FixedSizeBinaryArray::new(0, Buffer::default(), None);
1179        assert_eq!(zero_sized.len(), 0);
1180        assert_eq!(zero_sized.null_count(), 0);
1181        assert_eq!(zero_sized.values().len(), 0);
1182
1183        let nulls = NullBuffer::new_null(3);
1184        let zero_sized_with_nulls = FixedSizeBinaryArray::new(0, Buffer::default(), Some(nulls));
1185        assert_eq!(zero_sized_with_nulls.len(), 3);
1186        assert_eq!(zero_sized_with_nulls.null_count(), 3);
1187        assert_eq!(zero_sized_with_nulls.values().len(), 0);
1188
1189        let zero_sized_with_non_empty_buffer_err =
1190            FixedSizeBinaryArray::try_new(0, buffer, None).unwrap_err();
1191        assert_eq!(
1192            zero_sized_with_non_empty_buffer_err.to_string(),
1193            "Invalid argument error: Buffer cannot have non-zero length if the value length is zero"
1194        );
1195    }
1196}