arrow_buffer/buffer/
immutable.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::alloc::Layout;
19use std::fmt::Debug;
20use std::ptr::NonNull;
21use std::sync::Arc;
22
23use crate::alloc::{Allocation, Deallocation, ALIGNMENT};
24use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
25use crate::BufferBuilder;
26use crate::{bytes::Bytes, native::ArrowNativeType};
27
28use super::ops::bitwise_unary_op_helper;
29use super::{MutableBuffer, ScalarBuffer};
30
31/// Buffer represents a contiguous memory region that can be shared with other buffers and across
32/// thread boundaries.
33#[derive(Clone, Debug)]
34pub struct Buffer {
35    /// the internal byte buffer.
36    data: Arc<Bytes>,
37
38    /// Pointer into `data` valid
39    ///
40    /// We store a pointer instead of an offset to avoid pointer arithmetic
41    /// which causes LLVM to fail to vectorise code correctly
42    ptr: *const u8,
43
44    /// Byte length of the buffer.
45    ///
46    /// Must be less than or equal to `data.len()`
47    length: usize,
48}
49
50impl PartialEq for Buffer {
51    fn eq(&self, other: &Self) -> bool {
52        self.as_slice().eq(other.as_slice())
53    }
54}
55
56impl Eq for Buffer {}
57
58unsafe impl Send for Buffer where Bytes: Send {}
59unsafe impl Sync for Buffer where Bytes: Sync {}
60
61impl Buffer {
62    /// Auxiliary method to create a new Buffer
63    #[inline]
64    pub fn from_bytes(bytes: Bytes) -> Self {
65        let length = bytes.len();
66        let ptr = bytes.as_ptr();
67        Buffer {
68            data: Arc::new(bytes),
69            ptr,
70            length,
71        }
72    }
73
74    /// Returns the offset, in bytes, of `Self::ptr` to `Self::data`
75    ///
76    /// self.ptr and self.data can be different after slicing or advancing the buffer.
77    pub fn ptr_offset(&self) -> usize {
78        // Safety: `ptr` is always in bounds of `data`.
79        unsafe { self.ptr.offset_from(self.data.ptr().as_ptr()) as usize }
80    }
81
82    /// Returns the pointer to the start of the buffer without the offset.
83    pub fn data_ptr(&self) -> NonNull<u8> {
84        self.data.ptr()
85    }
86
87    /// Create a [`Buffer`] from the provided [`Vec`] without copying
88    #[inline]
89    pub fn from_vec<T: ArrowNativeType>(vec: Vec<T>) -> Self {
90        MutableBuffer::from(vec).into()
91    }
92
93    /// Initializes a [Buffer] from a slice of items.
94    pub fn from_slice_ref<U: ArrowNativeType, T: AsRef<[U]>>(items: T) -> Self {
95        let slice = items.as_ref();
96        let capacity = std::mem::size_of_val(slice);
97        let mut buffer = MutableBuffer::with_capacity(capacity);
98        buffer.extend_from_slice(slice);
99        buffer.into()
100    }
101
102    /// Creates a buffer from an existing aligned memory region (must already be byte-aligned), this
103    /// `Buffer` will free this piece of memory when dropped.
104    ///
105    /// # Arguments
106    ///
107    /// * `ptr` - Pointer to raw parts
108    /// * `len` - Length of raw parts in **bytes**
109    /// * `capacity` - Total allocated memory for the pointer `ptr`, in **bytes**
110    ///
111    /// # Safety
112    ///
113    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
114    /// bytes. If the `ptr` and `capacity` come from a `Buffer`, then this is guaranteed.
115    #[deprecated(note = "Use Buffer::from_vec")]
116    pub unsafe fn from_raw_parts(ptr: NonNull<u8>, len: usize, capacity: usize) -> Self {
117        assert!(len <= capacity);
118        let layout = Layout::from_size_align(capacity, ALIGNMENT).unwrap();
119        Buffer::build_with_arguments(ptr, len, Deallocation::Standard(layout))
120    }
121
122    /// Creates a buffer from an existing memory region. Ownership of the memory is tracked via reference counting
123    /// and the memory will be freed using the `drop` method of [crate::alloc::Allocation] when the reference count reaches zero.
124    ///
125    /// # Arguments
126    ///
127    /// * `ptr` - Pointer to raw parts
128    /// * `len` - Length of raw parts in **bytes**
129    /// * `owner` - A [crate::alloc::Allocation] which is responsible for freeing that data
130    ///
131    /// # Safety
132    ///
133    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` bytes
134    pub unsafe fn from_custom_allocation(
135        ptr: NonNull<u8>,
136        len: usize,
137        owner: Arc<dyn Allocation>,
138    ) -> Self {
139        Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner, len))
140    }
141
142    /// Auxiliary method to create a new Buffer
143    unsafe fn build_with_arguments(
144        ptr: NonNull<u8>,
145        len: usize,
146        deallocation: Deallocation,
147    ) -> Self {
148        let bytes = Bytes::new(ptr, len, deallocation);
149        let ptr = bytes.as_ptr();
150        Buffer {
151            ptr,
152            data: Arc::new(bytes),
153            length: len,
154        }
155    }
156
157    /// Returns the number of bytes in the buffer
158    #[inline]
159    pub fn len(&self) -> usize {
160        self.length
161    }
162
163    /// Returns the capacity of this buffer.
164    /// For externally owned buffers, this returns zero
165    #[inline]
166    pub fn capacity(&self) -> usize {
167        self.data.capacity()
168    }
169
170    /// Tried to shrink the capacity of the buffer as much as possible, freeing unused memory.
171    ///
172    /// If the buffer is shared, this is a no-op.
173    ///
174    /// If the memory was allocated with a custom allocator, this is a no-op.
175    ///
176    /// If the capacity is already less than or equal to the desired capacity, this is a no-op.
177    ///
178    /// The memory region will be reallocated using `std::alloc::realloc`.
179    pub fn shrink_to_fit(&mut self) {
180        let offset = self.ptr_offset();
181        let is_empty = self.is_empty();
182        let desired_capacity = if is_empty {
183            0
184        } else {
185            // For realloc to work, we cannot free the elements before the offset
186            offset + self.len()
187        };
188        if desired_capacity < self.capacity() {
189            if let Some(bytes) = Arc::get_mut(&mut self.data) {
190                if bytes.try_realloc(desired_capacity).is_ok() {
191                    // Realloc complete - update our pointer into `bytes`:
192                    self.ptr = if is_empty {
193                        bytes.as_ptr()
194                    } else {
195                        // SAFETY: we kept all elements leading up to the offset
196                        unsafe { bytes.as_ptr().add(offset) }
197                    }
198                } else {
199                    // Failure to reallocate is fine; we just failed to free up memory.
200                }
201            }
202        }
203    }
204
205    /// Returns whether the buffer is empty.
206    #[inline]
207    pub fn is_empty(&self) -> bool {
208        self.length == 0
209    }
210
211    /// Returns the byte slice stored in this buffer
212    pub fn as_slice(&self) -> &[u8] {
213        unsafe { std::slice::from_raw_parts(self.ptr, self.length) }
214    }
215
216    pub(crate) fn deallocation(&self) -> &Deallocation {
217        self.data.deallocation()
218    }
219
220    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`.
221    /// Doing so allows the same memory region to be shared between buffers.
222    ///
223    /// # Panics
224    ///
225    /// Panics iff `offset` is larger than `len`.
226    pub fn slice(&self, offset: usize) -> Self {
227        let mut s = self.clone();
228        s.advance(offset);
229        s
230    }
231
232    /// Increases the offset of this buffer by `offset`
233    ///
234    /// # Panics
235    ///
236    /// Panics iff `offset` is larger than `len`.
237    #[inline]
238    pub fn advance(&mut self, offset: usize) {
239        assert!(
240            offset <= self.length,
241            "the offset of the new Buffer cannot exceed the existing length: offset={} length={}",
242            offset,
243            self.length
244        );
245        self.length -= offset;
246        // Safety:
247        // This cannot overflow as
248        // `self.offset + self.length < self.data.len()`
249        // `offset < self.length`
250        self.ptr = unsafe { self.ptr.add(offset) };
251    }
252
253    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`,
254    /// with `length` bytes.
255    /// Doing so allows the same memory region to be shared between buffers.
256    /// # Panics
257    /// Panics iff `(offset + length)` is larger than the existing length.
258    pub fn slice_with_length(&self, offset: usize, length: usize) -> Self {
259        assert!(
260            offset.saturating_add(length) <= self.length,
261            "the offset of the new Buffer cannot exceed the existing length: slice offset={offset} length={length} selflen={}",
262            self.length
263        );
264        // Safety:
265        // offset + length <= self.length
266        let ptr = unsafe { self.ptr.add(offset) };
267        Self {
268            data: self.data.clone(),
269            ptr,
270            length,
271        }
272    }
273
274    /// Returns a pointer to the start of this buffer.
275    ///
276    /// Note that this should be used cautiously, and the returned pointer should not be
277    /// stored anywhere, to avoid dangling pointers.
278    #[inline]
279    pub fn as_ptr(&self) -> *const u8 {
280        self.ptr
281    }
282
283    /// View buffer as a slice of a specific type.
284    ///
285    /// # Panics
286    ///
287    /// This function panics if the underlying buffer is not aligned
288    /// correctly for type `T`.
289    pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
290        // SAFETY
291        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
292        // implementation outside this crate, and this method checks alignment
293        let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::<T>() };
294        assert!(prefix.is_empty() && suffix.is_empty());
295        offsets
296    }
297
298    /// Returns a slice of this buffer starting at a certain bit offset.
299    /// If the offset is byte-aligned the returned buffer is a shallow clone,
300    /// otherwise a new buffer is allocated and filled with a copy of the bits in the range.
301    pub fn bit_slice(&self, offset: usize, len: usize) -> Self {
302        if offset % 8 == 0 {
303            return self.slice(offset / 8);
304        }
305
306        bitwise_unary_op_helper(self, offset, len, |a| a)
307    }
308
309    /// Returns a `BitChunks` instance which can be used to iterate over this buffers bits
310    /// in larger chunks and starting at arbitrary bit offsets.
311    /// Note that both `offset` and `length` are measured in bits.
312    pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks {
313        BitChunks::new(self.as_slice(), offset, len)
314    }
315
316    /// Returns the number of 1-bits in this buffer.
317    #[deprecated(note = "use count_set_bits_offset instead")]
318    pub fn count_set_bits(&self) -> usize {
319        let len_in_bits = self.len() * 8;
320        // self.offset is already taken into consideration by the bit_chunks implementation
321        self.count_set_bits_offset(0, len_in_bits)
322    }
323
324    /// Returns the number of 1-bits in this buffer, starting from `offset` with `length` bits
325    /// inspected. Note that both `offset` and `length` are measured in bits.
326    pub fn count_set_bits_offset(&self, offset: usize, len: usize) -> usize {
327        UnalignedBitChunk::new(self.as_slice(), offset, len).count_ones()
328    }
329
330    /// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared.
331    /// Returns `Err` if this is shared or its allocation is from an external source or
332    /// it is not allocated with alignment [`ALIGNMENT`]
333    pub fn into_mutable(self) -> Result<MutableBuffer, Self> {
334        let ptr = self.ptr;
335        let length = self.length;
336        Arc::try_unwrap(self.data)
337            .and_then(|bytes| {
338                // The pointer of underlying buffer should not be offset.
339                assert_eq!(ptr, bytes.ptr().as_ptr());
340                MutableBuffer::from_bytes(bytes).map_err(Arc::new)
341            })
342            .map_err(|bytes| Buffer {
343                data: bytes,
344                ptr,
345                length,
346            })
347    }
348
349    /// Returns `Vec` for mutating the buffer
350    ///
351    /// Returns `Err(self)` if this buffer does not have the same [`Layout`] as
352    /// the destination Vec or contains a non-zero offset
353    pub fn into_vec<T: ArrowNativeType>(self) -> Result<Vec<T>, Self> {
354        let layout = match self.data.deallocation() {
355            Deallocation::Standard(l) => l,
356            _ => return Err(self), // Custom allocation
357        };
358
359        if self.ptr != self.data.as_ptr() {
360            return Err(self); // Data is offset
361        }
362
363        let v_capacity = layout.size() / std::mem::size_of::<T>();
364        match Layout::array::<T>(v_capacity) {
365            Ok(expected) if layout == &expected => {}
366            _ => return Err(self), // Incorrect layout
367        }
368
369        let length = self.length;
370        let ptr = self.ptr;
371        let v_len = self.length / std::mem::size_of::<T>();
372
373        Arc::try_unwrap(self.data)
374            .map(|bytes| unsafe {
375                let ptr = bytes.ptr().as_ptr() as _;
376                std::mem::forget(bytes);
377                // Safety
378                // Verified that bytes layout matches that of Vec
379                Vec::from_raw_parts(ptr, v_len, v_capacity)
380            })
381            .map_err(|bytes| Buffer {
382                data: bytes,
383                ptr,
384                length,
385            })
386    }
387
388    /// Returns true if this [`Buffer`] is equal to `other`, using pointer comparisons
389    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
390    /// return false when the arrays are logically equal
391    #[inline]
392    pub fn ptr_eq(&self, other: &Self) -> bool {
393        self.ptr == other.ptr && self.length == other.length
394    }
395}
396
397/// Note that here we deliberately do not implement
398/// `impl<T: AsRef<[u8]>> From<T> for Buffer`
399/// As it would accept `Buffer::from(vec![...])` that would cause an unexpected copy.
400/// Instead, we ask user to be explicit when copying is occurring, e.g., `Buffer::from(vec![...].to_byte_slice())`.
401/// For zero-copy conversion, user should use `Buffer::from_vec(vec![...])`.
402///
403/// Since we removed impl for `AsRef<u8>`, we added the following three specific implementations to reduce API breakage.
404/// See <https://github.com/apache/arrow-rs/issues/6033> for more discussion on this.
405impl From<&[u8]> for Buffer {
406    fn from(p: &[u8]) -> Self {
407        Self::from_slice_ref(p)
408    }
409}
410
411impl<const N: usize> From<[u8; N]> for Buffer {
412    fn from(p: [u8; N]) -> Self {
413        Self::from_slice_ref(p)
414    }
415}
416
417impl<const N: usize> From<&[u8; N]> for Buffer {
418    fn from(p: &[u8; N]) -> Self {
419        Self::from_slice_ref(p)
420    }
421}
422
423impl<T: ArrowNativeType> From<Vec<T>> for Buffer {
424    fn from(value: Vec<T>) -> Self {
425        Self::from_vec(value)
426    }
427}
428
429impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Buffer {
430    fn from(value: ScalarBuffer<T>) -> Self {
431        value.into_inner()
432    }
433}
434
435/// Creating a `Buffer` instance by storing the boolean values into the buffer
436impl FromIterator<bool> for Buffer {
437    fn from_iter<I>(iter: I) -> Self
438    where
439        I: IntoIterator<Item = bool>,
440    {
441        MutableBuffer::from_iter(iter).into()
442    }
443}
444
445impl std::ops::Deref for Buffer {
446    type Target = [u8];
447
448    fn deref(&self) -> &[u8] {
449        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len()) }
450    }
451}
452
453impl From<MutableBuffer> for Buffer {
454    #[inline]
455    fn from(buffer: MutableBuffer) -> Self {
456        buffer.into_buffer()
457    }
458}
459
460impl<T: ArrowNativeType> From<BufferBuilder<T>> for Buffer {
461    fn from(mut value: BufferBuilder<T>) -> Self {
462        value.finish()
463    }
464}
465
466impl Buffer {
467    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length.
468    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
469    /// # Example
470    /// ```
471    /// # use arrow_buffer::buffer::Buffer;
472    /// let v = vec![1u32];
473    /// let iter = v.iter().map(|x| x * 2);
474    /// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
475    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
476    /// ```
477    /// # Safety
478    /// This method assumes that the iterator's size is correct and is undefined behavior
479    /// to use it on an iterator that reports an incorrect length.
480    // This implementation is required for two reasons:
481    // 1. there is no trait `TrustedLen` in stable rust and therefore
482    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
483    // 2. `from_trusted_len_iter` is faster.
484    #[inline]
485    pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
486        iterator: I,
487    ) -> Self {
488        MutableBuffer::from_trusted_len_iter(iterator).into()
489    }
490
491    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length or errors
492    /// if any of the items of the iterator is an error.
493    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
494    /// # Safety
495    /// This method assumes that the iterator's size is correct and is undefined behavior
496    /// to use it on an iterator that reports an incorrect length.
497    #[inline]
498    pub unsafe fn try_from_trusted_len_iter<
499        E,
500        T: ArrowNativeType,
501        I: Iterator<Item = Result<T, E>>,
502    >(
503        iterator: I,
504    ) -> Result<Self, E> {
505        Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into())
506    }
507}
508
509impl<T: ArrowNativeType> FromIterator<T> for Buffer {
510    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
511        let vec = Vec::from_iter(iter);
512        Buffer::from_vec(vec)
513    }
514}
515
516#[cfg(test)]
517mod tests {
518    use crate::i256;
519    use std::panic::{RefUnwindSafe, UnwindSafe};
520    use std::thread;
521
522    use super::*;
523
524    #[test]
525    fn test_buffer_data_equality() {
526        let buf1 = Buffer::from(&[0, 1, 2, 3, 4]);
527        let buf2 = Buffer::from(&[0, 1, 2, 3, 4]);
528        assert_eq!(buf1, buf2);
529
530        // slice with same offset and same length should still preserve equality
531        let buf3 = buf1.slice(2);
532        assert_ne!(buf1, buf3);
533        let buf4 = buf2.slice_with_length(2, 3);
534        assert_eq!(buf3, buf4);
535
536        // Different capacities should still preserve equality
537        let mut buf2 = MutableBuffer::new(65);
538        buf2.extend_from_slice(&[0u8, 1, 2, 3, 4]);
539
540        let buf2 = buf2.into();
541        assert_eq!(buf1, buf2);
542
543        // unequal because of different elements
544        let buf2 = Buffer::from(&[0, 0, 2, 3, 4]);
545        assert_ne!(buf1, buf2);
546
547        // unequal because of different length
548        let buf2 = Buffer::from(&[0, 1, 2, 3]);
549        assert_ne!(buf1, buf2);
550    }
551
552    #[test]
553    fn test_from_raw_parts() {
554        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
555        assert_eq!(5, buf.len());
556        assert!(!buf.as_ptr().is_null());
557        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
558    }
559
560    #[test]
561    fn test_from_vec() {
562        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
563        assert_eq!(5, buf.len());
564        assert!(!buf.as_ptr().is_null());
565        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
566    }
567
568    #[test]
569    fn test_copy() {
570        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
571        let buf2 = buf;
572        assert_eq!(5, buf2.len());
573        assert_eq!(64, buf2.capacity());
574        assert!(!buf2.as_ptr().is_null());
575        assert_eq!([0, 1, 2, 3, 4], buf2.as_slice());
576    }
577
578    #[test]
579    fn test_slice() {
580        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
581        let buf2 = buf.slice(2);
582
583        assert_eq!([6, 8, 10], buf2.as_slice());
584        assert_eq!(3, buf2.len());
585        assert_eq!(unsafe { buf.as_ptr().offset(2) }, buf2.as_ptr());
586
587        let buf3 = buf2.slice_with_length(1, 2);
588        assert_eq!([8, 10], buf3.as_slice());
589        assert_eq!(2, buf3.len());
590        assert_eq!(unsafe { buf.as_ptr().offset(3) }, buf3.as_ptr());
591
592        let buf4 = buf.slice(5);
593        let empty_slice: [u8; 0] = [];
594        assert_eq!(empty_slice, buf4.as_slice());
595        assert_eq!(0, buf4.len());
596        assert!(buf4.is_empty());
597        assert_eq!(buf2.slice_with_length(2, 1).as_slice(), &[10]);
598    }
599
600    #[test]
601    fn test_shrink_to_fit() {
602        let original = Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7]);
603        assert_eq!(original.as_slice(), &[0, 1, 2, 3, 4, 5, 6, 7]);
604        assert_eq!(original.capacity(), 64);
605
606        let slice = original.slice_with_length(2, 3);
607        drop(original); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
608        assert_eq!(slice.as_slice(), &[2, 3, 4]);
609        assert_eq!(slice.capacity(), 64);
610
611        let mut shrunk = slice;
612        shrunk.shrink_to_fit();
613        assert_eq!(shrunk.as_slice(), &[2, 3, 4]);
614        assert_eq!(shrunk.capacity(), 5); // shrink_to_fit is allowed to keep the elements before the offset
615
616        // Test that we can handle empty slices:
617        let empty_slice = shrunk.slice_with_length(1, 0);
618        drop(shrunk); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
619        assert_eq!(empty_slice.as_slice(), &[]);
620        assert_eq!(empty_slice.capacity(), 5);
621
622        let mut shrunk_empty = empty_slice;
623        shrunk_empty.shrink_to_fit();
624        assert_eq!(shrunk_empty.as_slice(), &[]);
625        assert_eq!(shrunk_empty.capacity(), 0);
626    }
627
628    #[test]
629    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
630    fn test_slice_offset_out_of_bound() {
631        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
632        buf.slice(6);
633    }
634
635    #[test]
636    fn test_access_concurrently() {
637        let buffer = Buffer::from([1, 2, 3, 4, 5]);
638        let buffer2 = buffer.clone();
639        assert_eq!([1, 2, 3, 4, 5], buffer.as_slice());
640
641        let buffer_copy = thread::spawn(move || {
642            // access buffer in another thread.
643            buffer
644        })
645        .join();
646
647        assert!(buffer_copy.is_ok());
648        assert_eq!(buffer2, buffer_copy.ok().unwrap());
649    }
650
651    macro_rules! check_as_typed_data {
652        ($input: expr, $native_t: ty) => {{
653            let buffer = Buffer::from_slice_ref($input);
654            let slice: &[$native_t] = buffer.typed_data::<$native_t>();
655            assert_eq!($input, slice);
656        }};
657    }
658
659    #[test]
660    #[allow(clippy::float_cmp)]
661    fn test_as_typed_data() {
662        check_as_typed_data!(&[1i8, 3i8, 6i8], i8);
663        check_as_typed_data!(&[1u8, 3u8, 6u8], u8);
664        check_as_typed_data!(&[1i16, 3i16, 6i16], i16);
665        check_as_typed_data!(&[1i32, 3i32, 6i32], i32);
666        check_as_typed_data!(&[1i64, 3i64, 6i64], i64);
667        check_as_typed_data!(&[1u16, 3u16, 6u16], u16);
668        check_as_typed_data!(&[1u32, 3u32, 6u32], u32);
669        check_as_typed_data!(&[1u64, 3u64, 6u64], u64);
670        check_as_typed_data!(&[1f32, 3f32, 6f32], f32);
671        check_as_typed_data!(&[1f64, 3f64, 6f64], f64);
672    }
673
674    #[test]
675    fn test_count_bits() {
676        assert_eq!(0, Buffer::from(&[0b00000000]).count_set_bits_offset(0, 8));
677        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
678        assert_eq!(3, Buffer::from(&[0b00001101]).count_set_bits_offset(0, 8));
679        assert_eq!(
680            6,
681            Buffer::from(&[0b01001001, 0b01010010]).count_set_bits_offset(0, 16)
682        );
683        assert_eq!(
684            16,
685            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
686        );
687    }
688
689    #[test]
690    fn test_count_bits_slice() {
691        assert_eq!(
692            0,
693            Buffer::from(&[0b11111111, 0b00000000])
694                .slice(1)
695                .count_set_bits_offset(0, 8)
696        );
697        assert_eq!(
698            8,
699            Buffer::from(&[0b11111111, 0b11111111])
700                .slice_with_length(1, 1)
701                .count_set_bits_offset(0, 8)
702        );
703        assert_eq!(
704            3,
705            Buffer::from(&[0b11111111, 0b11111111, 0b00001101])
706                .slice(2)
707                .count_set_bits_offset(0, 8)
708        );
709        assert_eq!(
710            6,
711            Buffer::from(&[0b11111111, 0b01001001, 0b01010010])
712                .slice_with_length(1, 2)
713                .count_set_bits_offset(0, 16)
714        );
715        assert_eq!(
716            16,
717            Buffer::from(&[0b11111111, 0b11111111, 0b11111111, 0b11111111])
718                .slice(2)
719                .count_set_bits_offset(0, 16)
720        );
721    }
722
723    #[test]
724    fn test_count_bits_offset_slice() {
725        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
726        assert_eq!(3, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 3));
727        assert_eq!(5, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 5));
728        assert_eq!(1, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 1));
729        assert_eq!(0, Buffer::from(&[0b11111111]).count_set_bits_offset(8, 0));
730        assert_eq!(2, Buffer::from(&[0b01010101]).count_set_bits_offset(0, 3));
731        assert_eq!(
732            16,
733            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
734        );
735        assert_eq!(
736            10,
737            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 10)
738        );
739        assert_eq!(
740            10,
741            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(3, 10)
742        );
743        assert_eq!(
744            8,
745            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(8, 8)
746        );
747        assert_eq!(
748            5,
749            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(11, 5)
750        );
751        assert_eq!(
752            0,
753            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(16, 0)
754        );
755        assert_eq!(
756            2,
757            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 5)
758        );
759        assert_eq!(
760            4,
761            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 9)
762        );
763    }
764
765    #[test]
766    fn test_unwind_safe() {
767        fn assert_unwind_safe<T: RefUnwindSafe + UnwindSafe>() {}
768        assert_unwind_safe::<Buffer>()
769    }
770
771    #[test]
772    fn test_from_foreign_vec() {
773        let mut vector = vec![1_i32, 2, 3, 4, 5];
774        let buffer = unsafe {
775            Buffer::from_custom_allocation(
776                NonNull::new_unchecked(vector.as_mut_ptr() as *mut u8),
777                vector.len() * std::mem::size_of::<i32>(),
778                Arc::new(vector),
779            )
780        };
781
782        let slice = buffer.typed_data::<i32>();
783        assert_eq!(slice, &[1, 2, 3, 4, 5]);
784
785        let buffer = buffer.slice(std::mem::size_of::<i32>());
786
787        let slice = buffer.typed_data::<i32>();
788        assert_eq!(slice, &[2, 3, 4, 5]);
789    }
790
791    #[test]
792    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
793    fn slice_overflow() {
794        let buffer = Buffer::from(MutableBuffer::from_len_zeroed(12));
795        buffer.slice_with_length(2, usize::MAX);
796    }
797
798    #[test]
799    fn test_vec_interop() {
800        // Test empty vec
801        let a: Vec<i128> = Vec::new();
802        let b = Buffer::from_vec(a);
803        b.into_vec::<i128>().unwrap();
804
805        // Test vec with capacity
806        let a: Vec<i128> = Vec::with_capacity(20);
807        let b = Buffer::from_vec(a);
808        let back = b.into_vec::<i128>().unwrap();
809        assert_eq!(back.len(), 0);
810        assert_eq!(back.capacity(), 20);
811
812        // Test vec with values
813        let mut a: Vec<i128> = Vec::with_capacity(3);
814        a.extend_from_slice(&[1, 2, 3]);
815        let b = Buffer::from_vec(a);
816        let back = b.into_vec::<i128>().unwrap();
817        assert_eq!(back.len(), 3);
818        assert_eq!(back.capacity(), 3);
819
820        // Test vec with values and spare capacity
821        let mut a: Vec<i128> = Vec::with_capacity(20);
822        a.extend_from_slice(&[1, 4, 7, 8, 9, 3, 6]);
823        let b = Buffer::from_vec(a);
824        let back = b.into_vec::<i128>().unwrap();
825        assert_eq!(back.len(), 7);
826        assert_eq!(back.capacity(), 20);
827
828        // Test incorrect alignment
829        let a: Vec<i128> = Vec::new();
830        let b = Buffer::from_vec(a);
831        let b = b.into_vec::<i32>().unwrap_err();
832        b.into_vec::<i8>().unwrap_err();
833
834        // Test convert between types with same alignment
835        // This is an implementation quirk, but isn't harmful
836        // as ArrowNativeType are trivially transmutable
837        let a: Vec<i64> = vec![1, 2, 3, 4];
838        let b = Buffer::from_vec(a);
839        let back = b.into_vec::<u64>().unwrap();
840        assert_eq!(back.len(), 4);
841        assert_eq!(back.capacity(), 4);
842
843        // i256 has the same layout as i128 so this is valid
844        let mut b: Vec<i128> = Vec::with_capacity(4);
845        b.extend_from_slice(&[1, 2, 3, 4]);
846        let b = Buffer::from_vec(b);
847        let back = b.into_vec::<i256>().unwrap();
848        assert_eq!(back.len(), 2);
849        assert_eq!(back.capacity(), 2);
850
851        // Invalid layout
852        let b: Vec<i128> = vec![1, 2, 3];
853        let b = Buffer::from_vec(b);
854        b.into_vec::<i256>().unwrap_err();
855
856        // Invalid layout
857        let mut b: Vec<i128> = Vec::with_capacity(5);
858        b.extend_from_slice(&[1, 2, 3, 4]);
859        let b = Buffer::from_vec(b);
860        b.into_vec::<i256>().unwrap_err();
861
862        // Truncates length
863        // This is an implementation quirk, but isn't harmful
864        let mut b: Vec<i128> = Vec::with_capacity(4);
865        b.extend_from_slice(&[1, 2, 3]);
866        let b = Buffer::from_vec(b);
867        let back = b.into_vec::<i256>().unwrap();
868        assert_eq!(back.len(), 1);
869        assert_eq!(back.capacity(), 2);
870
871        // Cannot use aligned allocation
872        let b = Buffer::from(MutableBuffer::new(10));
873        let b = b.into_vec::<u8>().unwrap_err();
874        b.into_vec::<u64>().unwrap_err();
875
876        // Test slicing
877        let mut a: Vec<i128> = Vec::with_capacity(20);
878        a.extend_from_slice(&[1, 4, 7, 8, 9, 3, 6]);
879        let b = Buffer::from_vec(a);
880        let slice = b.slice_with_length(0, 64);
881
882        // Shared reference fails
883        let slice = slice.into_vec::<i128>().unwrap_err();
884        drop(b);
885
886        // Succeeds as no outstanding shared reference
887        let back = slice.into_vec::<i128>().unwrap();
888        assert_eq!(&back, &[1, 4, 7, 8]);
889        assert_eq!(back.capacity(), 20);
890
891        // Slicing by non-multiple length truncates
892        let mut a: Vec<i128> = Vec::with_capacity(8);
893        a.extend_from_slice(&[1, 4, 7, 3]);
894
895        let b = Buffer::from_vec(a);
896        let slice = b.slice_with_length(0, 34);
897        drop(b);
898
899        let back = slice.into_vec::<i128>().unwrap();
900        assert_eq!(&back, &[1, 4]);
901        assert_eq!(back.capacity(), 8);
902
903        // Offset prevents conversion
904        let a: Vec<u32> = vec![1, 3, 4, 6];
905        let b = Buffer::from_vec(a).slice(2);
906        b.into_vec::<u32>().unwrap_err();
907
908        let b = MutableBuffer::new(16).into_buffer();
909        let b = b.into_vec::<u8>().unwrap_err(); // Invalid layout
910        let b = b.into_vec::<u32>().unwrap_err(); // Invalid layout
911        b.into_mutable().unwrap();
912
913        let b = Buffer::from_vec(vec![1_u32, 3, 5]);
914        let b = b.into_mutable().unwrap();
915        let b = Buffer::from(b);
916        let b = b.into_vec::<u32>().unwrap();
917        assert_eq!(b, &[1, 3, 5]);
918    }
919
920    #[test]
921    #[should_panic(expected = "capacity overflow")]
922    fn test_from_iter_overflow() {
923        let iter_len = usize::MAX / std::mem::size_of::<u64>() + 1;
924        let _ = Buffer::from_iter(std::iter::repeat(0_u64).take(iter_len));
925    }
926}