Skip to main content

arrow_buffer/buffer/
immutable.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::alloc::Layout;
19use std::fmt::Debug;
20use std::ptr::NonNull;
21use std::sync::Arc;
22
23use crate::BufferBuilder;
24use crate::alloc::{Allocation, Deallocation};
25use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
26use crate::{bit_util, bytes::Bytes, native::ArrowNativeType};
27
28#[cfg(feature = "pool")]
29use crate::pool::MemoryPool;
30
31use super::{MutableBuffer, ScalarBuffer};
32
33/// A contiguous memory region that can be shared with other buffers and across
34/// thread boundaries that stores Arrow data.
35///
36/// `Buffer`s can be sliced and cloned without copying the underlying data and can
37/// be created from memory allocated by non-Rust sources such as C/C++.
38///
39/// # Example: Create a `Buffer` from a `Vec` (without copying)
40/// ```
41/// # use arrow_buffer::Buffer;
42/// let vec: Vec<u32> = vec![1, 2, 3];
43/// let buffer = Buffer::from(vec);
44/// ```
45///
46/// # Example: Convert a `Buffer` to a `Vec` (without copying)
47///
48/// Use [`Self::into_vec`] to convert a `Buffer` back into a `Vec` if there are
49/// no other references and the types are aligned correctly.
50/// ```
51/// # use arrow_buffer::Buffer;
52/// # let vec: Vec<u32> = vec![1, 2, 3];
53/// # let buffer = Buffer::from(vec);
54/// // convert the buffer back into a Vec of u32
55/// // note this will fail if the buffer is shared or not aligned correctly
56/// let vec: Vec<u32> = buffer.into_vec().unwrap();
57/// ```
58///
59/// # Example: Create a `Buffer` from a [`bytes::Bytes`] (without copying)
60///
61/// [`bytes::Bytes`] is a common type in the Rust ecosystem for shared memory
62/// regions. You can create a buffer from a `Bytes` instance using the `From`
63/// implementation, also without copying.
64///
65/// ```
66/// # use arrow_buffer::Buffer;
67/// let bytes = bytes::Bytes::from("hello");
68/// let buffer = Buffer::from(bytes);
69///```
70#[derive(Clone, Debug)]
71pub struct Buffer {
72    /// the internal byte buffer.
73    data: Arc<Bytes>,
74
75    /// Pointer into `data` valid
76    ///
77    /// We store a pointer instead of an offset to avoid pointer arithmetic
78    /// which causes LLVM to fail to vectorise code correctly
79    ptr: *const u8,
80
81    /// Byte length of the buffer.
82    ///
83    /// Must be less than or equal to `data.len()`
84    length: usize,
85}
86
87impl Default for Buffer {
88    #[inline]
89    fn default() -> Self {
90        MutableBuffer::default().into()
91    }
92}
93
94impl PartialEq for Buffer {
95    fn eq(&self, other: &Self) -> bool {
96        self.as_slice().eq(other.as_slice())
97    }
98}
99
100impl Eq for Buffer {}
101
102unsafe impl Send for Buffer where Bytes: Send {}
103unsafe impl Sync for Buffer where Bytes: Sync {}
104
105impl Buffer {
106    /// Returns the offset, in bytes, of `Self::ptr` to `Self::data`
107    ///
108    /// self.ptr and self.data can be different after slicing or advancing the buffer.
109    pub fn ptr_offset(&self) -> usize {
110        // Safety: `ptr` is always in bounds of `data`.
111        unsafe { self.ptr.offset_from(self.data.ptr().as_ptr()) as usize }
112    }
113
114    /// Returns the pointer to the start of the buffer without the offset.
115    pub fn data_ptr(&self) -> NonNull<u8> {
116        self.data.ptr()
117    }
118
119    /// Returns the number of strong references to the buffer.
120    ///
121    /// This method is safe but if the buffer is shared across multiple threads
122    /// the underlying value could change between calling this method and using
123    /// the result.
124    pub fn strong_count(&self) -> usize {
125        Arc::strong_count(&self.data)
126    }
127
128    /// Create a [`Buffer`] from the provided [`Vec`] without copying
129    #[inline]
130    pub fn from_vec<T: ArrowNativeType>(vec: Vec<T>) -> Self {
131        MutableBuffer::from(vec).into()
132    }
133
134    /// Initializes a [Buffer] from a slice of items.
135    pub fn from_slice_ref<U: ArrowNativeType, T: AsRef<[U]>>(items: T) -> Self {
136        let slice = items.as_ref();
137        let capacity = std::mem::size_of_val(slice);
138        let mut buffer = MutableBuffer::with_capacity(capacity);
139        buffer.extend_from_slice(slice);
140        buffer.into()
141    }
142
143    /// Creates a buffer from an existing memory region.
144    ///
145    /// Ownership of the memory is tracked via reference counting
146    /// and the memory will be freed using the `drop` method of
147    /// [crate::alloc::Allocation] when the reference count reaches zero.
148    ///
149    /// # Arguments
150    ///
151    /// * `ptr` - Pointer to raw parts
152    /// * `len` - Length of raw parts in **bytes**
153    /// * `owner` - A [crate::alloc::Allocation] which is responsible for freeing that data
154    ///
155    /// # Safety
156    ///
157    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` bytes
158    pub unsafe fn from_custom_allocation(
159        ptr: NonNull<u8>,
160        len: usize,
161        owner: Arc<dyn Allocation>,
162    ) -> Self {
163        unsafe { Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner, len)) }
164    }
165
166    /// Auxiliary method to create a new Buffer
167    unsafe fn build_with_arguments(
168        ptr: NonNull<u8>,
169        len: usize,
170        deallocation: Deallocation,
171    ) -> Self {
172        let bytes = unsafe { Bytes::new(ptr, len, deallocation) };
173        let ptr = bytes.as_ptr();
174        Buffer {
175            ptr,
176            data: Arc::new(bytes),
177            length: len,
178        }
179    }
180
181    /// Returns the number of bytes in the buffer
182    #[inline]
183    pub fn len(&self) -> usize {
184        self.length
185    }
186
187    /// Returns the capacity of this buffer.
188    /// For externally owned buffers, this returns zero
189    #[inline]
190    pub fn capacity(&self) -> usize {
191        self.data.capacity()
192    }
193
194    /// Tries to shrink the capacity of the buffer as much as possible, freeing unused memory.
195    ///
196    /// If the buffer is shared, this is a no-op.
197    ///
198    /// If the memory was allocated with a custom allocator, this is a no-op.
199    ///
200    /// If the capacity is already less than or equal to the desired capacity, this is a no-op.
201    ///
202    /// The memory region will be reallocated using `std::alloc::realloc`.
203    pub fn shrink_to_fit(&mut self) {
204        let offset = self.ptr_offset();
205        let is_empty = self.is_empty();
206        let desired_capacity = if is_empty {
207            0
208        } else {
209            // For realloc to work, we cannot free the elements before the offset
210            offset + self.len()
211        };
212        if desired_capacity < self.capacity() {
213            if let Some(bytes) = Arc::get_mut(&mut self.data) {
214                if bytes.try_realloc(desired_capacity).is_ok() {
215                    // Realloc complete - update our pointer into `bytes`:
216                    self.ptr = if is_empty {
217                        bytes.as_ptr()
218                    } else {
219                        // SAFETY: we kept all elements leading up to the offset
220                        unsafe { bytes.as_ptr().add(offset) }
221                    }
222                } else {
223                    // Failure to reallocate is fine; we just failed to free up memory.
224                }
225            }
226        }
227    }
228
229    /// Returns true if the buffer is empty.
230    #[inline]
231    pub fn is_empty(&self) -> bool {
232        self.length == 0
233    }
234
235    /// Returns the byte slice stored in this buffer
236    pub fn as_slice(&self) -> &[u8] {
237        unsafe { std::slice::from_raw_parts(self.ptr, self.length) }
238    }
239
240    pub(crate) fn deallocation(&self) -> &Deallocation {
241        self.data.deallocation()
242    }
243
244    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`.
245    ///
246    /// This function is `O(1)` and does not copy any data, allowing the
247    /// same memory region to be shared between buffers.
248    ///
249    /// # Panics
250    ///
251    /// Panics iff `offset` is larger than `len`.
252    pub fn slice(&self, offset: usize) -> Self {
253        let mut s = self.clone();
254        s.advance(offset);
255        s
256    }
257
258    /// Increases the offset of this buffer by `offset`
259    ///
260    /// # Panics
261    ///
262    /// Panics iff `offset` is larger than `len`.
263    #[inline]
264    pub fn advance(&mut self, offset: usize) {
265        assert!(
266            offset <= self.length,
267            "the offset of the new Buffer cannot exceed the existing length: offset={} length={}",
268            offset,
269            self.length
270        );
271        self.length -= offset;
272        // Safety:
273        // This cannot overflow as
274        // `self.offset + self.length < self.data.len()`
275        // `offset < self.length`
276        self.ptr = unsafe { self.ptr.add(offset) };
277    }
278
279    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`,
280    /// with `length` bytes.
281    ///
282    /// This function is `O(1)` and does not copy any data, allowing the same
283    /// memory region to be shared between buffers.
284    ///
285    /// # Panics
286    /// Panics iff `(offset + length)` is larger than the existing length.
287    pub fn slice_with_length(&self, offset: usize, length: usize) -> Self {
288        assert!(
289            offset.saturating_add(length) <= self.length,
290            "the offset of the new Buffer cannot exceed the existing length: slice offset={offset} length={length} selflen={}",
291            self.length
292        );
293        // Safety:
294        // offset + length <= self.length
295        let ptr = unsafe { self.ptr.add(offset) };
296        Self {
297            data: self.data.clone(),
298            ptr,
299            length,
300        }
301    }
302
303    /// Returns a pointer to the start of this buffer.
304    ///
305    /// Note that this should be used cautiously, and the returned pointer should not be
306    /// stored anywhere, to avoid dangling pointers.
307    #[inline]
308    pub fn as_ptr(&self) -> *const u8 {
309        self.ptr
310    }
311
312    /// View buffer as a slice of a specific type.
313    ///
314    /// # Panics
315    ///
316    /// This function panics if the underlying buffer is not aligned
317    /// correctly for type `T`.
318    pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
319        // SAFETY
320        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
321        // implementation outside this crate, and this method checks alignment
322        let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::<T>() };
323        assert!(prefix.is_empty() && suffix.is_empty());
324        offsets
325    }
326
327    /// Returns a slice of this buffer starting at a certain bit offset.
328    /// If the offset is byte-aligned the returned buffer is a shallow clone,
329    /// otherwise a new buffer is allocated and filled with a copy of the bits in the range.
330    pub fn bit_slice(&self, offset: usize, len: usize) -> Self {
331        if offset % 8 == 0 {
332            return self.slice_with_length(offset / 8, bit_util::ceil(len, 8));
333        }
334
335        let chunks = self.bit_chunks(offset, len);
336
337        let buffer: Vec<u64> = if chunks.remainder_len() > 0 {
338            chunks.iter().chain(Some(chunks.remainder_bits())).collect()
339        } else {
340            chunks.iter().collect()
341        };
342        let mut buffer = Buffer::from_vec(buffer);
343        // Update length to be byte-aligned
344        buffer.length = bit_util::ceil(len, 8);
345        buffer
346    }
347
348    /// Returns a `BitChunks` instance which can be used to iterate over this buffers bits
349    /// in larger chunks and starting at arbitrary bit offsets.
350    /// Note that both `offset` and `length` are measured in bits.
351    pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks<'_> {
352        BitChunks::new(self.as_slice(), offset, len)
353    }
354
355    /// Returns the number of 1-bits in this buffer, starting from `offset` with `length` bits
356    /// inspected. Note that both `offset` and `length` are measured in bits.
357    pub fn count_set_bits_offset(&self, offset: usize, len: usize) -> usize {
358        UnalignedBitChunk::new(self.as_slice(), offset, len).count_ones()
359    }
360
361    /// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared.
362    /// Returns `Err` if this is shared or its allocation is from an external source or
363    /// it is not allocated with alignment [`ALIGNMENT`]
364    ///
365    /// # Example: Creating a [`MutableBuffer`] from a [`Buffer`]
366    /// ```
367    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
368    /// let buffer: Buffer = Buffer::from(&[1u8, 2, 3, 4][..]);
369    /// // Only possible to convert a Buffer into a MutableBuffer if uniquely owned
370    /// // (i.e., there are no other references to it).
371    /// let mut mutable_buffer = match buffer.into_mutable() {
372    ///    Ok(mutable) => mutable,
373    ///    Err(orig_buffer) => {
374    ///      panic!("buffer was not uniquely owned");
375    ///    }
376    /// };
377    /// mutable_buffer.push(5u8);
378    /// let buffer = Buffer::from(mutable_buffer);
379    /// assert_eq!(buffer.as_slice(), &[1u8, 2, 3, 4, 5])
380    /// ```
381    ///
382    /// [`ALIGNMENT`]: crate::alloc::ALIGNMENT
383    pub fn into_mutable(self) -> Result<MutableBuffer, Self> {
384        let ptr = self.ptr;
385        let length = self.length;
386        Arc::try_unwrap(self.data)
387            .and_then(|bytes| {
388                // The pointer of underlying buffer should not be offset.
389                assert_eq!(ptr, bytes.ptr().as_ptr());
390                MutableBuffer::from_bytes(bytes).map_err(Arc::new)
391            })
392            .map_err(|bytes| Buffer {
393                data: bytes,
394                ptr,
395                length,
396            })
397    }
398
399    /// Converts self into a `Vec`, if possible.
400    ///
401    /// This can be used to reuse / mutate the underlying data.
402    ///
403    /// # Errors
404    ///
405    /// Returns `Err(self)` if
406    /// 1. The buffer does not have the same [`Layout`] as the destination Vec
407    /// 2. The buffer contains a non-zero offset
408    /// 3. The buffer is shared
409    pub fn into_vec<T: ArrowNativeType>(self) -> Result<Vec<T>, Self> {
410        let layout = match self.data.deallocation() {
411            Deallocation::Standard(l) => l,
412            _ => return Err(self), // Custom allocation
413        };
414
415        if self.ptr != self.data.as_ptr() {
416            return Err(self); // Data is offset
417        }
418
419        let v_capacity = layout.size() / std::mem::size_of::<T>();
420        match Layout::array::<T>(v_capacity) {
421            Ok(expected) if layout == &expected => {}
422            _ => return Err(self), // Incorrect layout
423        }
424
425        let length = self.length;
426        let ptr = self.ptr;
427        let v_len = self.length / std::mem::size_of::<T>();
428
429        Arc::try_unwrap(self.data)
430            .map(|bytes| unsafe {
431                let ptr = bytes.ptr().as_ptr() as _;
432                std::mem::forget(bytes);
433                // Safety
434                // Verified that bytes layout matches that of Vec
435                Vec::from_raw_parts(ptr, v_len, v_capacity)
436            })
437            .map_err(|bytes| Buffer {
438                data: bytes,
439                ptr,
440                length,
441            })
442    }
443
444    /// Returns true if this [`Buffer`] is equal to `other`, using pointer comparisons
445    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
446    /// return false when the arrays are logically equal
447    #[inline]
448    pub fn ptr_eq(&self, other: &Self) -> bool {
449        self.ptr == other.ptr && self.length == other.length
450    }
451
452    /// Register this [`Buffer`] with the provided [`MemoryPool`]
453    ///
454    /// This claims the memory used by this buffer in the pool, allowing for
455    /// accurate accounting of memory usage. Any prior reservation will be
456    /// released so this works well when the buffer is being shared among
457    /// multiple arrays.
458    #[cfg(feature = "pool")]
459    pub fn claim(&self, pool: &dyn MemoryPool) {
460        self.data.claim(pool)
461    }
462}
463
464/// Note that here we deliberately do not implement
465/// `impl<T: AsRef<[u8]>> From<T> for Buffer`
466/// As it would accept `Buffer::from(vec![...])` that would cause an unexpected copy.
467/// Instead, we ask user to be explicit when copying is occurring, e.g., `Buffer::from(vec![...].to_byte_slice())`.
468/// For zero-copy conversion, user should use `Buffer::from_vec(vec![...])`.
469///
470/// Since we removed impl for `AsRef<u8>`, we added the following three specific implementations to reduce API breakage.
471/// See <https://github.com/apache/arrow-rs/issues/6033> for more discussion on this.
472impl From<&[u8]> for Buffer {
473    fn from(p: &[u8]) -> Self {
474        Self::from_slice_ref(p)
475    }
476}
477
478impl<const N: usize> From<[u8; N]> for Buffer {
479    fn from(p: [u8; N]) -> Self {
480        Self::from_slice_ref(p)
481    }
482}
483
484impl<const N: usize> From<&[u8; N]> for Buffer {
485    fn from(p: &[u8; N]) -> Self {
486        Self::from_slice_ref(p)
487    }
488}
489
490impl<T: ArrowNativeType> From<Vec<T>> for Buffer {
491    fn from(value: Vec<T>) -> Self {
492        Self::from_vec(value)
493    }
494}
495
496impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Buffer {
497    fn from(value: ScalarBuffer<T>) -> Self {
498        value.into_inner()
499    }
500}
501
502/// Convert from internal `Bytes` (not [`bytes::Bytes`]) to `Buffer`
503impl From<Bytes> for Buffer {
504    #[inline]
505    fn from(bytes: Bytes) -> Self {
506        let length = bytes.len();
507        let ptr = bytes.as_ptr();
508        Self {
509            data: Arc::new(bytes),
510            ptr,
511            length,
512        }
513    }
514}
515
516/// Convert from [`bytes::Bytes`], not internal `Bytes` to `Buffer`
517impl From<bytes::Bytes> for Buffer {
518    fn from(bytes: bytes::Bytes) -> Self {
519        let bytes: Bytes = bytes.into();
520        Self::from(bytes)
521    }
522}
523
524/// Create a `Buffer` instance by storing the boolean values into the buffer
525impl FromIterator<bool> for Buffer {
526    fn from_iter<I>(iter: I) -> Self
527    where
528        I: IntoIterator<Item = bool>,
529    {
530        MutableBuffer::from_iter(iter).into()
531    }
532}
533
534impl std::ops::Deref for Buffer {
535    type Target = [u8];
536
537    fn deref(&self) -> &[u8] {
538        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len()) }
539    }
540}
541
542impl AsRef<[u8]> for &Buffer {
543    fn as_ref(&self) -> &[u8] {
544        self.as_slice()
545    }
546}
547
548impl From<MutableBuffer> for Buffer {
549    #[inline]
550    fn from(buffer: MutableBuffer) -> Self {
551        buffer.into_buffer()
552    }
553}
554
555impl<T: ArrowNativeType> From<BufferBuilder<T>> for Buffer {
556    fn from(mut value: BufferBuilder<T>) -> Self {
557        value.finish()
558    }
559}
560
561impl Buffer {
562    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length.
563    ///
564    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
565    ///
566    /// # Example
567    /// ```
568    /// # use arrow_buffer::buffer::Buffer;
569    /// let v = vec![1u32];
570    /// let iter = v.iter().map(|x| x * 2);
571    /// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
572    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
573    /// ```
574    /// # Safety
575    /// This method assumes that the iterator's size is correct and is undefined behavior
576    /// to use it on an iterator that reports an incorrect length.
577    // This implementation is required for two reasons:
578    // 1. there is no trait `TrustedLen` in stable rust and therefore
579    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
580    // 2. `from_trusted_len_iter` is faster.
581    #[inline]
582    pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
583        iterator: I,
584    ) -> Self {
585        unsafe { MutableBuffer::from_trusted_len_iter(iterator).into() }
586    }
587
588    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length or errors
589    /// if any of the items of the iterator is an error.
590    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
591    /// # Safety
592    /// This method assumes that the iterator's size is correct and is undefined behavior
593    /// to use it on an iterator that reports an incorrect length.
594    #[inline]
595    pub unsafe fn try_from_trusted_len_iter<
596        E,
597        T: ArrowNativeType,
598        I: Iterator<Item = Result<T, E>>,
599    >(
600        iterator: I,
601    ) -> Result<Self, E> {
602        unsafe { Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into()) }
603    }
604}
605
606impl<T: ArrowNativeType> FromIterator<T> for Buffer {
607    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
608        let vec = Vec::from_iter(iter);
609        Buffer::from_vec(vec)
610    }
611}
612
613#[cfg(test)]
614mod tests {
615    use crate::i256;
616    use std::panic::{RefUnwindSafe, UnwindSafe};
617    use std::thread;
618
619    use super::*;
620
621    #[test]
622    fn test_buffer_data_equality() {
623        let buf1 = Buffer::from(&[0, 1, 2, 3, 4]);
624        let buf2 = Buffer::from(&[0, 1, 2, 3, 4]);
625        assert_eq!(buf1, buf2);
626
627        // slice with same offset and same length should still preserve equality
628        let buf3 = buf1.slice(2);
629        assert_ne!(buf1, buf3);
630        let buf4 = buf2.slice_with_length(2, 3);
631        assert_eq!(buf3, buf4);
632
633        // Different capacities should still preserve equality
634        let mut buf2 = MutableBuffer::new(65);
635        buf2.extend_from_slice(&[0u8, 1, 2, 3, 4]);
636
637        let buf2 = buf2.into();
638        assert_eq!(buf1, buf2);
639
640        // unequal because of different elements
641        let buf2 = Buffer::from(&[0, 0, 2, 3, 4]);
642        assert_ne!(buf1, buf2);
643
644        // unequal because of different length
645        let buf2 = Buffer::from(&[0, 1, 2, 3]);
646        assert_ne!(buf1, buf2);
647    }
648
649    #[test]
650    fn test_from_raw_parts() {
651        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
652        assert_eq!(5, buf.len());
653        assert!(!buf.as_ptr().is_null());
654        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
655    }
656
657    #[test]
658    fn test_from_vec() {
659        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
660        assert_eq!(5, buf.len());
661        assert!(!buf.as_ptr().is_null());
662        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
663    }
664
665    #[test]
666    fn test_copy() {
667        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
668        let buf2 = buf;
669        assert_eq!(5, buf2.len());
670        assert_eq!(64, buf2.capacity());
671        assert!(!buf2.as_ptr().is_null());
672        assert_eq!([0, 1, 2, 3, 4], buf2.as_slice());
673    }
674
675    #[test]
676    fn test_slice() {
677        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
678        let buf2 = buf.slice(2);
679
680        assert_eq!([6, 8, 10], buf2.as_slice());
681        assert_eq!(3, buf2.len());
682        assert_eq!(unsafe { buf.as_ptr().offset(2) }, buf2.as_ptr());
683
684        let buf3 = buf2.slice_with_length(1, 2);
685        assert_eq!([8, 10], buf3.as_slice());
686        assert_eq!(2, buf3.len());
687        assert_eq!(unsafe { buf.as_ptr().offset(3) }, buf3.as_ptr());
688
689        let buf4 = buf.slice(5);
690        let empty_slice: [u8; 0] = [];
691        assert_eq!(empty_slice, buf4.as_slice());
692        assert_eq!(0, buf4.len());
693        assert!(buf4.is_empty());
694        assert_eq!(buf2.slice_with_length(2, 1).as_slice(), &[10]);
695    }
696
697    #[test]
698    fn test_shrink_to_fit() {
699        let original = Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7]);
700        assert_eq!(original.as_slice(), &[0, 1, 2, 3, 4, 5, 6, 7]);
701        assert_eq!(original.capacity(), 64);
702
703        let slice = original.slice_with_length(2, 3);
704        drop(original); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
705        assert_eq!(slice.as_slice(), &[2, 3, 4]);
706        assert_eq!(slice.capacity(), 64);
707
708        let mut shrunk = slice;
709        shrunk.shrink_to_fit();
710        assert_eq!(shrunk.as_slice(), &[2, 3, 4]);
711        assert_eq!(shrunk.capacity(), 5); // shrink_to_fit is allowed to keep the elements before the offset
712
713        // Test that we can handle empty slices:
714        let empty_slice = shrunk.slice_with_length(1, 0);
715        drop(shrunk); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
716        assert_eq!(empty_slice.as_slice(), &[]);
717        assert_eq!(empty_slice.capacity(), 5);
718
719        let mut shrunk_empty = empty_slice;
720        shrunk_empty.shrink_to_fit();
721        assert_eq!(shrunk_empty.as_slice(), &[]);
722        assert_eq!(shrunk_empty.capacity(), 0);
723    }
724
725    #[test]
726    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
727    fn test_slice_offset_out_of_bound() {
728        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
729        buf.slice(6);
730    }
731
732    #[test]
733    fn test_access_concurrently() {
734        let buffer = Buffer::from([1, 2, 3, 4, 5]);
735        let buffer2 = buffer.clone();
736        assert_eq!([1, 2, 3, 4, 5], buffer.as_slice());
737
738        let buffer_copy = thread::spawn(move || {
739            // access buffer in another thread.
740            buffer
741        })
742        .join();
743
744        assert!(buffer_copy.is_ok());
745        assert_eq!(buffer2, buffer_copy.ok().unwrap());
746    }
747
748    macro_rules! check_as_typed_data {
749        ($input: expr, $native_t: ty) => {{
750            let buffer = Buffer::from_slice_ref($input);
751            let slice: &[$native_t] = buffer.typed_data::<$native_t>();
752            assert_eq!($input, slice);
753        }};
754    }
755
756    #[test]
757    #[allow(clippy::float_cmp)]
758    fn test_as_typed_data() {
759        check_as_typed_data!(&[1i8, 3i8, 6i8], i8);
760        check_as_typed_data!(&[1u8, 3u8, 6u8], u8);
761        check_as_typed_data!(&[1i16, 3i16, 6i16], i16);
762        check_as_typed_data!(&[1i32, 3i32, 6i32], i32);
763        check_as_typed_data!(&[1i64, 3i64, 6i64], i64);
764        check_as_typed_data!(&[1u16, 3u16, 6u16], u16);
765        check_as_typed_data!(&[1u32, 3u32, 6u32], u32);
766        check_as_typed_data!(&[1u64, 3u64, 6u64], u64);
767        check_as_typed_data!(&[1f32, 3f32, 6f32], f32);
768        check_as_typed_data!(&[1f64, 3f64, 6f64], f64);
769    }
770
771    #[test]
772    fn test_count_bits() {
773        assert_eq!(0, Buffer::from(&[0b00000000]).count_set_bits_offset(0, 8));
774        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
775        assert_eq!(3, Buffer::from(&[0b00001101]).count_set_bits_offset(0, 8));
776        assert_eq!(
777            6,
778            Buffer::from(&[0b01001001, 0b01010010]).count_set_bits_offset(0, 16)
779        );
780        assert_eq!(
781            16,
782            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
783        );
784    }
785
786    #[test]
787    fn test_count_bits_slice() {
788        assert_eq!(
789            0,
790            Buffer::from(&[0b11111111, 0b00000000])
791                .slice(1)
792                .count_set_bits_offset(0, 8)
793        );
794        assert_eq!(
795            8,
796            Buffer::from(&[0b11111111, 0b11111111])
797                .slice_with_length(1, 1)
798                .count_set_bits_offset(0, 8)
799        );
800        assert_eq!(
801            3,
802            Buffer::from(&[0b11111111, 0b11111111, 0b00001101])
803                .slice(2)
804                .count_set_bits_offset(0, 8)
805        );
806        assert_eq!(
807            6,
808            Buffer::from(&[0b11111111, 0b01001001, 0b01010010])
809                .slice_with_length(1, 2)
810                .count_set_bits_offset(0, 16)
811        );
812        assert_eq!(
813            16,
814            Buffer::from(&[0b11111111, 0b11111111, 0b11111111, 0b11111111])
815                .slice(2)
816                .count_set_bits_offset(0, 16)
817        );
818    }
819
820    #[test]
821    fn test_count_bits_offset_slice() {
822        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
823        assert_eq!(3, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 3));
824        assert_eq!(5, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 5));
825        assert_eq!(1, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 1));
826        assert_eq!(0, Buffer::from(&[0b11111111]).count_set_bits_offset(8, 0));
827        assert_eq!(2, Buffer::from(&[0b01010101]).count_set_bits_offset(0, 3));
828        assert_eq!(
829            16,
830            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
831        );
832        assert_eq!(
833            10,
834            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 10)
835        );
836        assert_eq!(
837            10,
838            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(3, 10)
839        );
840        assert_eq!(
841            8,
842            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(8, 8)
843        );
844        assert_eq!(
845            5,
846            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(11, 5)
847        );
848        assert_eq!(
849            0,
850            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(16, 0)
851        );
852        assert_eq!(
853            2,
854            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 5)
855        );
856        assert_eq!(
857            4,
858            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 9)
859        );
860    }
861
862    #[test]
863    fn test_unwind_safe() {
864        fn assert_unwind_safe<T: RefUnwindSafe + UnwindSafe>() {}
865        assert_unwind_safe::<Buffer>()
866    }
867
868    #[test]
869    fn test_from_foreign_vec() {
870        let mut vector = vec![1_i32, 2, 3, 4, 5];
871        let buffer = unsafe {
872            Buffer::from_custom_allocation(
873                NonNull::new_unchecked(vector.as_mut_ptr() as *mut u8),
874                vector.len() * std::mem::size_of::<i32>(),
875                Arc::new(vector),
876            )
877        };
878
879        let slice = buffer.typed_data::<i32>();
880        assert_eq!(slice, &[1, 2, 3, 4, 5]);
881
882        let buffer = buffer.slice(std::mem::size_of::<i32>());
883
884        let slice = buffer.typed_data::<i32>();
885        assert_eq!(slice, &[2, 3, 4, 5]);
886    }
887
888    #[test]
889    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
890    fn slice_overflow() {
891        let buffer = Buffer::from(MutableBuffer::from_len_zeroed(12));
892        buffer.slice_with_length(2, usize::MAX);
893    }
894
895    #[test]
896    fn test_vec_interop() {
897        // Test empty vec
898        let a: Vec<i128> = Vec::new();
899        let b = Buffer::from_vec(a);
900        b.into_vec::<i128>().unwrap();
901
902        // Test vec with capacity
903        let a: Vec<i128> = Vec::with_capacity(20);
904        let b = Buffer::from_vec(a);
905        let back = b.into_vec::<i128>().unwrap();
906        assert_eq!(back.len(), 0);
907        assert_eq!(back.capacity(), 20);
908
909        // Test vec with values
910        let mut a: Vec<i128> = Vec::with_capacity(3);
911        a.extend_from_slice(&[1, 2, 3]);
912        let b = Buffer::from_vec(a);
913        let back = b.into_vec::<i128>().unwrap();
914        assert_eq!(back.len(), 3);
915        assert_eq!(back.capacity(), 3);
916
917        // Test vec with values and spare capacity
918        let mut a: Vec<i128> = Vec::with_capacity(20);
919        a.extend_from_slice(&[1, 4, 7, 8, 9, 3, 6]);
920        let b = Buffer::from_vec(a);
921        let back = b.into_vec::<i128>().unwrap();
922        assert_eq!(back.len(), 7);
923        assert_eq!(back.capacity(), 20);
924
925        // Test incorrect alignment
926        let a: Vec<i128> = Vec::new();
927        let b = Buffer::from_vec(a);
928        let b = b.into_vec::<i32>().unwrap_err();
929        b.into_vec::<i8>().unwrap_err();
930
931        // Test convert between types with same alignment
932        // This is an implementation quirk, but isn't harmful
933        // as ArrowNativeType are trivially transmutable
934        let a: Vec<i64> = vec![1, 2, 3, 4];
935        let b = Buffer::from_vec(a);
936        let back = b.into_vec::<u64>().unwrap();
937        assert_eq!(back.len(), 4);
938        assert_eq!(back.capacity(), 4);
939
940        // i256 has the same layout as i128 so this is valid
941        let mut b: Vec<i128> = Vec::with_capacity(4);
942        b.extend_from_slice(&[1, 2, 3, 4]);
943        let b = Buffer::from_vec(b);
944        let back = b.into_vec::<i256>().unwrap();
945        assert_eq!(back.len(), 2);
946        assert_eq!(back.capacity(), 2);
947
948        // Invalid layout
949        let b: Vec<i128> = vec![1, 2, 3];
950        let b = Buffer::from_vec(b);
951        b.into_vec::<i256>().unwrap_err();
952
953        // Invalid layout
954        let mut b: Vec<i128> = Vec::with_capacity(5);
955        b.extend_from_slice(&[1, 2, 3, 4]);
956        let b = Buffer::from_vec(b);
957        b.into_vec::<i256>().unwrap_err();
958
959        // Truncates length
960        // This is an implementation quirk, but isn't harmful
961        let mut b: Vec<i128> = Vec::with_capacity(4);
962        b.extend_from_slice(&[1, 2, 3]);
963        let b = Buffer::from_vec(b);
964        let back = b.into_vec::<i256>().unwrap();
965        assert_eq!(back.len(), 1);
966        assert_eq!(back.capacity(), 2);
967
968        // Cannot use aligned allocation
969        let b = Buffer::from(MutableBuffer::new(10));
970        let b = b.into_vec::<u8>().unwrap_err();
971        b.into_vec::<u64>().unwrap_err();
972
973        // Test slicing
974        let mut a: Vec<i128> = Vec::with_capacity(20);
975        a.extend_from_slice(&[1, 4, 7, 8, 9, 3, 6]);
976        let b = Buffer::from_vec(a);
977        let slice = b.slice_with_length(0, 64);
978
979        // Shared reference fails
980        let slice = slice.into_vec::<i128>().unwrap_err();
981        drop(b);
982
983        // Succeeds as no outstanding shared reference
984        let back = slice.into_vec::<i128>().unwrap();
985        assert_eq!(&back, &[1, 4, 7, 8]);
986        assert_eq!(back.capacity(), 20);
987
988        // Slicing by non-multiple length truncates
989        let mut a: Vec<i128> = Vec::with_capacity(8);
990        a.extend_from_slice(&[1, 4, 7, 3]);
991
992        let b = Buffer::from_vec(a);
993        let slice = b.slice_with_length(0, 34);
994        drop(b);
995
996        let back = slice.into_vec::<i128>().unwrap();
997        assert_eq!(&back, &[1, 4]);
998        assert_eq!(back.capacity(), 8);
999
1000        // Offset prevents conversion
1001        let a: Vec<u32> = vec![1, 3, 4, 6];
1002        let b = Buffer::from_vec(a).slice(2);
1003        b.into_vec::<u32>().unwrap_err();
1004
1005        let b = MutableBuffer::new(16).into_buffer();
1006        let b = b.into_vec::<u8>().unwrap_err(); // Invalid layout
1007        let b = b.into_vec::<u32>().unwrap_err(); // Invalid layout
1008        b.into_mutable().unwrap();
1009
1010        let b = Buffer::from_vec(vec![1_u32, 3, 5]);
1011        let b = b.into_mutable().unwrap();
1012        let b = Buffer::from(b);
1013        let b = b.into_vec::<u32>().unwrap();
1014        assert_eq!(b, &[1, 3, 5]);
1015    }
1016
1017    #[test]
1018    #[should_panic(expected = "capacity overflow")]
1019    fn test_from_iter_overflow() {
1020        let iter_len = usize::MAX / std::mem::size_of::<u64>() + 1;
1021        let _ = Buffer::from_iter(std::iter::repeat_n(0_u64, iter_len));
1022    }
1023
1024    #[test]
1025    fn bit_slice_length_preserved() {
1026        // Create a boring buffer
1027        let buf = Buffer::from_iter(std::iter::repeat_n(true, 64));
1028
1029        let assert_preserved = |offset: usize, len: usize| {
1030            let new_buf = buf.bit_slice(offset, len);
1031            assert_eq!(new_buf.len(), bit_util::ceil(len, 8));
1032
1033            // if the offset is not byte-aligned, we have to create a deep copy to a new buffer
1034            // (since the `offset` value inside a Buffer is byte-granular, not bit-granular), so
1035            // checking the offset should always return 0 if so. If the offset IS byte-aligned, we
1036            // want to make sure it doesn't unnecessarily create a deep copy.
1037            if offset % 8 == 0 {
1038                assert_eq!(new_buf.ptr_offset(), offset / 8);
1039            } else {
1040                assert_eq!(new_buf.ptr_offset(), 0);
1041            }
1042        };
1043
1044        // go through every available value for offset
1045        for o in 0..=64 {
1046            // and go through every length that could accompany that offset - we can't have a
1047            // situation where offset + len > 64, because that would go past the end of the buffer,
1048            // so we use the map to ensure it's in range.
1049            for l in (o..=64).map(|l| l - o) {
1050                // and we just want to make sure every one of these keeps its offset and length
1051                // when neeeded
1052                assert_preserved(o, l);
1053            }
1054        }
1055    }
1056
1057    #[test]
1058    fn test_strong_count() {
1059        let buffer = Buffer::from_iter(std::iter::repeat_n(0_u8, 100));
1060        assert_eq!(buffer.strong_count(), 1);
1061
1062        let buffer2 = buffer.clone();
1063        assert_eq!(buffer.strong_count(), 2);
1064
1065        let buffer3 = buffer2.clone();
1066        assert_eq!(buffer.strong_count(), 3);
1067
1068        drop(buffer);
1069        assert_eq!(buffer2.strong_count(), 2);
1070        assert_eq!(buffer3.strong_count(), 2);
1071
1072        // Strong count does not increase on move
1073        let capture = move || {
1074            assert_eq!(buffer3.strong_count(), 2);
1075        };
1076
1077        capture();
1078        assert_eq!(buffer2.strong_count(), 2);
1079
1080        drop(capture);
1081        assert_eq!(buffer2.strong_count(), 1);
1082    }
1083}