arrow_buffer/buffer/
immutable.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::alloc::Layout;
19use std::fmt::Debug;
20use std::ptr::NonNull;
21use std::sync::Arc;
22
23use crate::BufferBuilder;
24use crate::alloc::{Allocation, Deallocation};
25use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
26use crate::{bit_util, bytes::Bytes, native::ArrowNativeType};
27
28#[cfg(feature = "pool")]
29use crate::pool::MemoryPool;
30
31use super::ops::bitwise_unary_op_helper;
32use super::{MutableBuffer, ScalarBuffer};
33
34/// A contiguous memory region that can be shared with other buffers and across
35/// thread boundaries that stores Arrow data.
36///
37/// `Buffer`s can be sliced and cloned without copying the underlying data and can
38/// be created from memory allocated by non-Rust sources such as C/C++.
39///
40/// # Example: Create a `Buffer` from a `Vec` (without copying)
41/// ```
42/// # use arrow_buffer::Buffer;
43/// let vec: Vec<u32> = vec![1, 2, 3];
44/// let buffer = Buffer::from(vec);
45/// ```
46///
47/// # Example: Convert a `Buffer` to a `Vec` (without copying)
48///
49/// Use [`Self::into_vec`] to convert a `Buffer` back into a `Vec` if there are
50/// no other references and the types are aligned correctly.
51/// ```
52/// # use arrow_buffer::Buffer;
53/// # let vec: Vec<u32> = vec![1, 2, 3];
54/// # let buffer = Buffer::from(vec);
55/// // convert the buffer back into a Vec of u32
56/// // note this will fail if the buffer is shared or not aligned correctly
57/// let vec: Vec<u32> = buffer.into_vec().unwrap();
58/// ```
59///
60/// # Example: Create a `Buffer` from a [`bytes::Bytes`] (without copying)
61///
62/// [`bytes::Bytes`] is a common type in the Rust ecosystem for shared memory
63/// regions. You can create a buffer from a `Bytes` instance using the `From`
64/// implementation, also without copying.
65///
66/// ```
67/// # use arrow_buffer::Buffer;
68/// let bytes = bytes::Bytes::from("hello");
69/// let buffer = Buffer::from(bytes);
70///```
71#[derive(Clone, Debug)]
72pub struct Buffer {
73    /// the internal byte buffer.
74    data: Arc<Bytes>,
75
76    /// Pointer into `data` valid
77    ///
78    /// We store a pointer instead of an offset to avoid pointer arithmetic
79    /// which causes LLVM to fail to vectorise code correctly
80    ptr: *const u8,
81
82    /// Byte length of the buffer.
83    ///
84    /// Must be less than or equal to `data.len()`
85    length: usize,
86}
87
88impl Default for Buffer {
89    #[inline]
90    fn default() -> Self {
91        MutableBuffer::default().into()
92    }
93}
94
95impl PartialEq for Buffer {
96    fn eq(&self, other: &Self) -> bool {
97        self.as_slice().eq(other.as_slice())
98    }
99}
100
101impl Eq for Buffer {}
102
103unsafe impl Send for Buffer where Bytes: Send {}
104unsafe impl Sync for Buffer where Bytes: Sync {}
105
106impl Buffer {
107    /// Create a new Buffer from a (internal) `Bytes`
108    ///
109    /// NOTE despite the same name, `Bytes` is an internal struct in arrow-rs
110    /// and is different than [`bytes::Bytes`].
111    ///
112    /// See examples on [`Buffer`] for ways to create a buffer from a [`bytes::Bytes`].
113    #[deprecated(since = "54.1.0", note = "Use Buffer::from instead")]
114    pub fn from_bytes(bytes: Bytes) -> Self {
115        Self::from(bytes)
116    }
117
118    /// Returns the offset, in bytes, of `Self::ptr` to `Self::data`
119    ///
120    /// self.ptr and self.data can be different after slicing or advancing the buffer.
121    pub fn ptr_offset(&self) -> usize {
122        // Safety: `ptr` is always in bounds of `data`.
123        unsafe { self.ptr.offset_from(self.data.ptr().as_ptr()) as usize }
124    }
125
126    /// Returns the pointer to the start of the buffer without the offset.
127    pub fn data_ptr(&self) -> NonNull<u8> {
128        self.data.ptr()
129    }
130
131    /// Returns the number of strong references to the buffer.
132    ///
133    /// This method is safe but if the buffer is shared across multiple threads
134    /// the underlying value could change between calling this method and using
135    /// the result.
136    pub fn strong_count(&self) -> usize {
137        Arc::strong_count(&self.data)
138    }
139
140    /// Create a [`Buffer`] from the provided [`Vec`] without copying
141    #[inline]
142    pub fn from_vec<T: ArrowNativeType>(vec: Vec<T>) -> Self {
143        MutableBuffer::from(vec).into()
144    }
145
146    /// Initializes a [Buffer] from a slice of items.
147    pub fn from_slice_ref<U: ArrowNativeType, T: AsRef<[U]>>(items: T) -> Self {
148        let slice = items.as_ref();
149        let capacity = std::mem::size_of_val(slice);
150        let mut buffer = MutableBuffer::with_capacity(capacity);
151        buffer.extend_from_slice(slice);
152        buffer.into()
153    }
154
155    /// Creates a buffer from an existing memory region.
156    ///
157    /// Ownership of the memory is tracked via reference counting
158    /// and the memory will be freed using the `drop` method of
159    /// [crate::alloc::Allocation] when the reference count reaches zero.
160    ///
161    /// # Arguments
162    ///
163    /// * `ptr` - Pointer to raw parts
164    /// * `len` - Length of raw parts in **bytes**
165    /// * `owner` - A [crate::alloc::Allocation] which is responsible for freeing that data
166    ///
167    /// # Safety
168    ///
169    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` bytes
170    pub unsafe fn from_custom_allocation(
171        ptr: NonNull<u8>,
172        len: usize,
173        owner: Arc<dyn Allocation>,
174    ) -> Self {
175        unsafe { Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner, len)) }
176    }
177
178    /// Auxiliary method to create a new Buffer
179    unsafe fn build_with_arguments(
180        ptr: NonNull<u8>,
181        len: usize,
182        deallocation: Deallocation,
183    ) -> Self {
184        let bytes = unsafe { Bytes::new(ptr, len, deallocation) };
185        let ptr = bytes.as_ptr();
186        Buffer {
187            ptr,
188            data: Arc::new(bytes),
189            length: len,
190        }
191    }
192
193    /// Returns the number of bytes in the buffer
194    #[inline]
195    pub fn len(&self) -> usize {
196        self.length
197    }
198
199    /// Returns the capacity of this buffer.
200    /// For externally owned buffers, this returns zero
201    #[inline]
202    pub fn capacity(&self) -> usize {
203        self.data.capacity()
204    }
205
206    /// Tries to shrink the capacity of the buffer as much as possible, freeing unused memory.
207    ///
208    /// If the buffer is shared, this is a no-op.
209    ///
210    /// If the memory was allocated with a custom allocator, this is a no-op.
211    ///
212    /// If the capacity is already less than or equal to the desired capacity, this is a no-op.
213    ///
214    /// The memory region will be reallocated using `std::alloc::realloc`.
215    pub fn shrink_to_fit(&mut self) {
216        let offset = self.ptr_offset();
217        let is_empty = self.is_empty();
218        let desired_capacity = if is_empty {
219            0
220        } else {
221            // For realloc to work, we cannot free the elements before the offset
222            offset + self.len()
223        };
224        if desired_capacity < self.capacity() {
225            if let Some(bytes) = Arc::get_mut(&mut self.data) {
226                if bytes.try_realloc(desired_capacity).is_ok() {
227                    // Realloc complete - update our pointer into `bytes`:
228                    self.ptr = if is_empty {
229                        bytes.as_ptr()
230                    } else {
231                        // SAFETY: we kept all elements leading up to the offset
232                        unsafe { bytes.as_ptr().add(offset) }
233                    }
234                } else {
235                    // Failure to reallocate is fine; we just failed to free up memory.
236                }
237            }
238        }
239    }
240
241    /// Returns true if the buffer is empty.
242    #[inline]
243    pub fn is_empty(&self) -> bool {
244        self.length == 0
245    }
246
247    /// Returns the byte slice stored in this buffer
248    pub fn as_slice(&self) -> &[u8] {
249        unsafe { std::slice::from_raw_parts(self.ptr, self.length) }
250    }
251
252    pub(crate) fn deallocation(&self) -> &Deallocation {
253        self.data.deallocation()
254    }
255
256    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`.
257    ///
258    /// This function is `O(1)` and does not copy any data, allowing the
259    /// same memory region to be shared between buffers.
260    ///
261    /// # Panics
262    ///
263    /// Panics iff `offset` is larger than `len`.
264    pub fn slice(&self, offset: usize) -> Self {
265        let mut s = self.clone();
266        s.advance(offset);
267        s
268    }
269
270    /// Increases the offset of this buffer by `offset`
271    ///
272    /// # Panics
273    ///
274    /// Panics iff `offset` is larger than `len`.
275    #[inline]
276    pub fn advance(&mut self, offset: usize) {
277        assert!(
278            offset <= self.length,
279            "the offset of the new Buffer cannot exceed the existing length: offset={} length={}",
280            offset,
281            self.length
282        );
283        self.length -= offset;
284        // Safety:
285        // This cannot overflow as
286        // `self.offset + self.length < self.data.len()`
287        // `offset < self.length`
288        self.ptr = unsafe { self.ptr.add(offset) };
289    }
290
291    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`,
292    /// with `length` bytes.
293    ///
294    /// This function is `O(1)` and does not copy any data, allowing the same
295    /// memory region to be shared between buffers.
296    ///
297    /// # Panics
298    /// Panics iff `(offset + length)` is larger than the existing length.
299    pub fn slice_with_length(&self, offset: usize, length: usize) -> Self {
300        assert!(
301            offset.saturating_add(length) <= self.length,
302            "the offset of the new Buffer cannot exceed the existing length: slice offset={offset} length={length} selflen={}",
303            self.length
304        );
305        // Safety:
306        // offset + length <= self.length
307        let ptr = unsafe { self.ptr.add(offset) };
308        Self {
309            data: self.data.clone(),
310            ptr,
311            length,
312        }
313    }
314
315    /// Returns a pointer to the start of this buffer.
316    ///
317    /// Note that this should be used cautiously, and the returned pointer should not be
318    /// stored anywhere, to avoid dangling pointers.
319    #[inline]
320    pub fn as_ptr(&self) -> *const u8 {
321        self.ptr
322    }
323
324    /// View buffer as a slice of a specific type.
325    ///
326    /// # Panics
327    ///
328    /// This function panics if the underlying buffer is not aligned
329    /// correctly for type `T`.
330    pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
331        // SAFETY
332        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
333        // implementation outside this crate, and this method checks alignment
334        let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::<T>() };
335        assert!(prefix.is_empty() && suffix.is_empty());
336        offsets
337    }
338
339    /// Returns a slice of this buffer starting at a certain bit offset.
340    /// If the offset is byte-aligned the returned buffer is a shallow clone,
341    /// otherwise a new buffer is allocated and filled with a copy of the bits in the range.
342    pub fn bit_slice(&self, offset: usize, len: usize) -> Self {
343        if offset % 8 == 0 {
344            return self.slice_with_length(offset / 8, bit_util::ceil(len, 8));
345        }
346
347        bitwise_unary_op_helper(self, offset, len, |a| a)
348    }
349
350    /// Returns a `BitChunks` instance which can be used to iterate over this buffers bits
351    /// in larger chunks and starting at arbitrary bit offsets.
352    /// Note that both `offset` and `length` are measured in bits.
353    pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks<'_> {
354        BitChunks::new(self.as_slice(), offset, len)
355    }
356
357    /// Returns the number of 1-bits in this buffer, starting from `offset` with `length` bits
358    /// inspected. Note that both `offset` and `length` are measured in bits.
359    pub fn count_set_bits_offset(&self, offset: usize, len: usize) -> usize {
360        UnalignedBitChunk::new(self.as_slice(), offset, len).count_ones()
361    }
362
363    /// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared.
364    /// Returns `Err` if this is shared or its allocation is from an external source or
365    /// it is not allocated with alignment [`ALIGNMENT`]
366    ///
367    /// # Example: Creating a [`MutableBuffer`] from a [`Buffer`]
368    /// ```
369    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
370    /// let buffer: Buffer = Buffer::from(&[1u8, 2, 3, 4][..]);
371    /// // Only possible to convert a Buffer into a MutableBuffer if uniquely owned
372    /// // (i.e., there are no other references to it).
373    /// let mut mutable_buffer = match buffer.into_mutable() {
374    ///    Ok(mutable) => mutable,
375    ///    Err(orig_buffer) => {
376    ///      panic!("buffer was not uniquely owned");
377    ///    }
378    /// };
379    /// mutable_buffer.push(5u8);
380    /// let buffer = Buffer::from(mutable_buffer);
381    /// assert_eq!(buffer.as_slice(), &[1u8, 2, 3, 4, 5])
382    /// ```
383    ///
384    /// [`ALIGNMENT`]: crate::alloc::ALIGNMENT
385    pub fn into_mutable(self) -> Result<MutableBuffer, Self> {
386        let ptr = self.ptr;
387        let length = self.length;
388        Arc::try_unwrap(self.data)
389            .and_then(|bytes| {
390                // The pointer of underlying buffer should not be offset.
391                assert_eq!(ptr, bytes.ptr().as_ptr());
392                MutableBuffer::from_bytes(bytes).map_err(Arc::new)
393            })
394            .map_err(|bytes| Buffer {
395                data: bytes,
396                ptr,
397                length,
398            })
399    }
400
401    /// Converts self into a `Vec`, if possible.
402    ///
403    /// This can be used to reuse / mutate the underlying data.
404    ///
405    /// # Errors
406    ///
407    /// Returns `Err(self)` if
408    /// 1. The buffer does not have the same [`Layout`] as the destination Vec
409    /// 2. The buffer contains a non-zero offset
410    /// 3. The buffer is shared
411    pub fn into_vec<T: ArrowNativeType>(self) -> Result<Vec<T>, Self> {
412        let layout = match self.data.deallocation() {
413            Deallocation::Standard(l) => l,
414            _ => return Err(self), // Custom allocation
415        };
416
417        if self.ptr != self.data.as_ptr() {
418            return Err(self); // Data is offset
419        }
420
421        let v_capacity = layout.size() / std::mem::size_of::<T>();
422        match Layout::array::<T>(v_capacity) {
423            Ok(expected) if layout == &expected => {}
424            _ => return Err(self), // Incorrect layout
425        }
426
427        let length = self.length;
428        let ptr = self.ptr;
429        let v_len = self.length / std::mem::size_of::<T>();
430
431        Arc::try_unwrap(self.data)
432            .map(|bytes| unsafe {
433                let ptr = bytes.ptr().as_ptr() as _;
434                std::mem::forget(bytes);
435                // Safety
436                // Verified that bytes layout matches that of Vec
437                Vec::from_raw_parts(ptr, v_len, v_capacity)
438            })
439            .map_err(|bytes| Buffer {
440                data: bytes,
441                ptr,
442                length,
443            })
444    }
445
446    /// Returns true if this [`Buffer`] is equal to `other`, using pointer comparisons
447    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
448    /// return false when the arrays are logically equal
449    #[inline]
450    pub fn ptr_eq(&self, other: &Self) -> bool {
451        self.ptr == other.ptr && self.length == other.length
452    }
453
454    /// Register this [`Buffer`] with the provided [`MemoryPool`]
455    ///
456    /// This claims the memory used by this buffer in the pool, allowing for
457    /// accurate accounting of memory usage. Any prior reservation will be
458    /// released so this works well when the buffer is being shared among
459    /// multiple arrays.
460    #[cfg(feature = "pool")]
461    pub fn claim(&self, pool: &dyn MemoryPool) {
462        self.data.claim(pool)
463    }
464}
465
466/// Note that here we deliberately do not implement
467/// `impl<T: AsRef<[u8]>> From<T> for Buffer`
468/// As it would accept `Buffer::from(vec![...])` that would cause an unexpected copy.
469/// Instead, we ask user to be explicit when copying is occurring, e.g., `Buffer::from(vec![...].to_byte_slice())`.
470/// For zero-copy conversion, user should use `Buffer::from_vec(vec![...])`.
471///
472/// Since we removed impl for `AsRef<u8>`, we added the following three specific implementations to reduce API breakage.
473/// See <https://github.com/apache/arrow-rs/issues/6033> for more discussion on this.
474impl From<&[u8]> for Buffer {
475    fn from(p: &[u8]) -> Self {
476        Self::from_slice_ref(p)
477    }
478}
479
480impl<const N: usize> From<[u8; N]> for Buffer {
481    fn from(p: [u8; N]) -> Self {
482        Self::from_slice_ref(p)
483    }
484}
485
486impl<const N: usize> From<&[u8; N]> for Buffer {
487    fn from(p: &[u8; N]) -> Self {
488        Self::from_slice_ref(p)
489    }
490}
491
492impl<T: ArrowNativeType> From<Vec<T>> for Buffer {
493    fn from(value: Vec<T>) -> Self {
494        Self::from_vec(value)
495    }
496}
497
498impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Buffer {
499    fn from(value: ScalarBuffer<T>) -> Self {
500        value.into_inner()
501    }
502}
503
504/// Convert from internal `Bytes` (not [`bytes::Bytes`]) to `Buffer`
505impl From<Bytes> for Buffer {
506    #[inline]
507    fn from(bytes: Bytes) -> Self {
508        let length = bytes.len();
509        let ptr = bytes.as_ptr();
510        Self {
511            data: Arc::new(bytes),
512            ptr,
513            length,
514        }
515    }
516}
517
518/// Convert from [`bytes::Bytes`], not internal `Bytes` to `Buffer`
519impl From<bytes::Bytes> for Buffer {
520    fn from(bytes: bytes::Bytes) -> Self {
521        let bytes: Bytes = bytes.into();
522        Self::from(bytes)
523    }
524}
525
526/// Create a `Buffer` instance by storing the boolean values into the buffer
527impl FromIterator<bool> for Buffer {
528    fn from_iter<I>(iter: I) -> Self
529    where
530        I: IntoIterator<Item = bool>,
531    {
532        MutableBuffer::from_iter(iter).into()
533    }
534}
535
536impl std::ops::Deref for Buffer {
537    type Target = [u8];
538
539    fn deref(&self) -> &[u8] {
540        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len()) }
541    }
542}
543
544impl AsRef<[u8]> for &Buffer {
545    fn as_ref(&self) -> &[u8] {
546        self.as_slice()
547    }
548}
549
550impl From<MutableBuffer> for Buffer {
551    #[inline]
552    fn from(buffer: MutableBuffer) -> Self {
553        buffer.into_buffer()
554    }
555}
556
557impl<T: ArrowNativeType> From<BufferBuilder<T>> for Buffer {
558    fn from(mut value: BufferBuilder<T>) -> Self {
559        value.finish()
560    }
561}
562
563impl Buffer {
564    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length.
565    ///
566    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
567    ///
568    /// # Example
569    /// ```
570    /// # use arrow_buffer::buffer::Buffer;
571    /// let v = vec![1u32];
572    /// let iter = v.iter().map(|x| x * 2);
573    /// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
574    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
575    /// ```
576    /// # Safety
577    /// This method assumes that the iterator's size is correct and is undefined behavior
578    /// to use it on an iterator that reports an incorrect length.
579    // This implementation is required for two reasons:
580    // 1. there is no trait `TrustedLen` in stable rust and therefore
581    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
582    // 2. `from_trusted_len_iter` is faster.
583    #[inline]
584    pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
585        iterator: I,
586    ) -> Self {
587        unsafe { MutableBuffer::from_trusted_len_iter(iterator).into() }
588    }
589
590    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length or errors
591    /// if any of the items of the iterator is an error.
592    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
593    /// # Safety
594    /// This method assumes that the iterator's size is correct and is undefined behavior
595    /// to use it on an iterator that reports an incorrect length.
596    #[inline]
597    pub unsafe fn try_from_trusted_len_iter<
598        E,
599        T: ArrowNativeType,
600        I: Iterator<Item = Result<T, E>>,
601    >(
602        iterator: I,
603    ) -> Result<Self, E> {
604        unsafe { Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into()) }
605    }
606}
607
608impl<T: ArrowNativeType> FromIterator<T> for Buffer {
609    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
610        let vec = Vec::from_iter(iter);
611        Buffer::from_vec(vec)
612    }
613}
614
615#[cfg(test)]
616mod tests {
617    use crate::i256;
618    use std::panic::{RefUnwindSafe, UnwindSafe};
619    use std::thread;
620
621    use super::*;
622
623    #[test]
624    fn test_buffer_data_equality() {
625        let buf1 = Buffer::from(&[0, 1, 2, 3, 4]);
626        let buf2 = Buffer::from(&[0, 1, 2, 3, 4]);
627        assert_eq!(buf1, buf2);
628
629        // slice with same offset and same length should still preserve equality
630        let buf3 = buf1.slice(2);
631        assert_ne!(buf1, buf3);
632        let buf4 = buf2.slice_with_length(2, 3);
633        assert_eq!(buf3, buf4);
634
635        // Different capacities should still preserve equality
636        let mut buf2 = MutableBuffer::new(65);
637        buf2.extend_from_slice(&[0u8, 1, 2, 3, 4]);
638
639        let buf2 = buf2.into();
640        assert_eq!(buf1, buf2);
641
642        // unequal because of different elements
643        let buf2 = Buffer::from(&[0, 0, 2, 3, 4]);
644        assert_ne!(buf1, buf2);
645
646        // unequal because of different length
647        let buf2 = Buffer::from(&[0, 1, 2, 3]);
648        assert_ne!(buf1, buf2);
649    }
650
651    #[test]
652    fn test_from_raw_parts() {
653        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
654        assert_eq!(5, buf.len());
655        assert!(!buf.as_ptr().is_null());
656        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
657    }
658
659    #[test]
660    fn test_from_vec() {
661        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
662        assert_eq!(5, buf.len());
663        assert!(!buf.as_ptr().is_null());
664        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
665    }
666
667    #[test]
668    fn test_copy() {
669        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
670        let buf2 = buf;
671        assert_eq!(5, buf2.len());
672        assert_eq!(64, buf2.capacity());
673        assert!(!buf2.as_ptr().is_null());
674        assert_eq!([0, 1, 2, 3, 4], buf2.as_slice());
675    }
676
677    #[test]
678    fn test_slice() {
679        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
680        let buf2 = buf.slice(2);
681
682        assert_eq!([6, 8, 10], buf2.as_slice());
683        assert_eq!(3, buf2.len());
684        assert_eq!(unsafe { buf.as_ptr().offset(2) }, buf2.as_ptr());
685
686        let buf3 = buf2.slice_with_length(1, 2);
687        assert_eq!([8, 10], buf3.as_slice());
688        assert_eq!(2, buf3.len());
689        assert_eq!(unsafe { buf.as_ptr().offset(3) }, buf3.as_ptr());
690
691        let buf4 = buf.slice(5);
692        let empty_slice: [u8; 0] = [];
693        assert_eq!(empty_slice, buf4.as_slice());
694        assert_eq!(0, buf4.len());
695        assert!(buf4.is_empty());
696        assert_eq!(buf2.slice_with_length(2, 1).as_slice(), &[10]);
697    }
698
699    #[test]
700    fn test_shrink_to_fit() {
701        let original = Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7]);
702        assert_eq!(original.as_slice(), &[0, 1, 2, 3, 4, 5, 6, 7]);
703        assert_eq!(original.capacity(), 64);
704
705        let slice = original.slice_with_length(2, 3);
706        drop(original); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
707        assert_eq!(slice.as_slice(), &[2, 3, 4]);
708        assert_eq!(slice.capacity(), 64);
709
710        let mut shrunk = slice;
711        shrunk.shrink_to_fit();
712        assert_eq!(shrunk.as_slice(), &[2, 3, 4]);
713        assert_eq!(shrunk.capacity(), 5); // shrink_to_fit is allowed to keep the elements before the offset
714
715        // Test that we can handle empty slices:
716        let empty_slice = shrunk.slice_with_length(1, 0);
717        drop(shrunk); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
718        assert_eq!(empty_slice.as_slice(), &[]);
719        assert_eq!(empty_slice.capacity(), 5);
720
721        let mut shrunk_empty = empty_slice;
722        shrunk_empty.shrink_to_fit();
723        assert_eq!(shrunk_empty.as_slice(), &[]);
724        assert_eq!(shrunk_empty.capacity(), 0);
725    }
726
727    #[test]
728    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
729    fn test_slice_offset_out_of_bound() {
730        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
731        buf.slice(6);
732    }
733
734    #[test]
735    fn test_access_concurrently() {
736        let buffer = Buffer::from([1, 2, 3, 4, 5]);
737        let buffer2 = buffer.clone();
738        assert_eq!([1, 2, 3, 4, 5], buffer.as_slice());
739
740        let buffer_copy = thread::spawn(move || {
741            // access buffer in another thread.
742            buffer
743        })
744        .join();
745
746        assert!(buffer_copy.is_ok());
747        assert_eq!(buffer2, buffer_copy.ok().unwrap());
748    }
749
750    macro_rules! check_as_typed_data {
751        ($input: expr, $native_t: ty) => {{
752            let buffer = Buffer::from_slice_ref($input);
753            let slice: &[$native_t] = buffer.typed_data::<$native_t>();
754            assert_eq!($input, slice);
755        }};
756    }
757
758    #[test]
759    #[allow(clippy::float_cmp)]
760    fn test_as_typed_data() {
761        check_as_typed_data!(&[1i8, 3i8, 6i8], i8);
762        check_as_typed_data!(&[1u8, 3u8, 6u8], u8);
763        check_as_typed_data!(&[1i16, 3i16, 6i16], i16);
764        check_as_typed_data!(&[1i32, 3i32, 6i32], i32);
765        check_as_typed_data!(&[1i64, 3i64, 6i64], i64);
766        check_as_typed_data!(&[1u16, 3u16, 6u16], u16);
767        check_as_typed_data!(&[1u32, 3u32, 6u32], u32);
768        check_as_typed_data!(&[1u64, 3u64, 6u64], u64);
769        check_as_typed_data!(&[1f32, 3f32, 6f32], f32);
770        check_as_typed_data!(&[1f64, 3f64, 6f64], f64);
771    }
772
773    #[test]
774    fn test_count_bits() {
775        assert_eq!(0, Buffer::from(&[0b00000000]).count_set_bits_offset(0, 8));
776        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
777        assert_eq!(3, Buffer::from(&[0b00001101]).count_set_bits_offset(0, 8));
778        assert_eq!(
779            6,
780            Buffer::from(&[0b01001001, 0b01010010]).count_set_bits_offset(0, 16)
781        );
782        assert_eq!(
783            16,
784            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
785        );
786    }
787
788    #[test]
789    fn test_count_bits_slice() {
790        assert_eq!(
791            0,
792            Buffer::from(&[0b11111111, 0b00000000])
793                .slice(1)
794                .count_set_bits_offset(0, 8)
795        );
796        assert_eq!(
797            8,
798            Buffer::from(&[0b11111111, 0b11111111])
799                .slice_with_length(1, 1)
800                .count_set_bits_offset(0, 8)
801        );
802        assert_eq!(
803            3,
804            Buffer::from(&[0b11111111, 0b11111111, 0b00001101])
805                .slice(2)
806                .count_set_bits_offset(0, 8)
807        );
808        assert_eq!(
809            6,
810            Buffer::from(&[0b11111111, 0b01001001, 0b01010010])
811                .slice_with_length(1, 2)
812                .count_set_bits_offset(0, 16)
813        );
814        assert_eq!(
815            16,
816            Buffer::from(&[0b11111111, 0b11111111, 0b11111111, 0b11111111])
817                .slice(2)
818                .count_set_bits_offset(0, 16)
819        );
820    }
821
822    #[test]
823    fn test_count_bits_offset_slice() {
824        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
825        assert_eq!(3, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 3));
826        assert_eq!(5, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 5));
827        assert_eq!(1, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 1));
828        assert_eq!(0, Buffer::from(&[0b11111111]).count_set_bits_offset(8, 0));
829        assert_eq!(2, Buffer::from(&[0b01010101]).count_set_bits_offset(0, 3));
830        assert_eq!(
831            16,
832            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
833        );
834        assert_eq!(
835            10,
836            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 10)
837        );
838        assert_eq!(
839            10,
840            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(3, 10)
841        );
842        assert_eq!(
843            8,
844            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(8, 8)
845        );
846        assert_eq!(
847            5,
848            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(11, 5)
849        );
850        assert_eq!(
851            0,
852            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(16, 0)
853        );
854        assert_eq!(
855            2,
856            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 5)
857        );
858        assert_eq!(
859            4,
860            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 9)
861        );
862    }
863
864    #[test]
865    fn test_unwind_safe() {
866        fn assert_unwind_safe<T: RefUnwindSafe + UnwindSafe>() {}
867        assert_unwind_safe::<Buffer>()
868    }
869
870    #[test]
871    fn test_from_foreign_vec() {
872        let mut vector = vec![1_i32, 2, 3, 4, 5];
873        let buffer = unsafe {
874            Buffer::from_custom_allocation(
875                NonNull::new_unchecked(vector.as_mut_ptr() as *mut u8),
876                vector.len() * std::mem::size_of::<i32>(),
877                Arc::new(vector),
878            )
879        };
880
881        let slice = buffer.typed_data::<i32>();
882        assert_eq!(slice, &[1, 2, 3, 4, 5]);
883
884        let buffer = buffer.slice(std::mem::size_of::<i32>());
885
886        let slice = buffer.typed_data::<i32>();
887        assert_eq!(slice, &[2, 3, 4, 5]);
888    }
889
890    #[test]
891    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
892    fn slice_overflow() {
893        let buffer = Buffer::from(MutableBuffer::from_len_zeroed(12));
894        buffer.slice_with_length(2, usize::MAX);
895    }
896
897    #[test]
898    fn test_vec_interop() {
899        // Test empty vec
900        let a: Vec<i128> = Vec::new();
901        let b = Buffer::from_vec(a);
902        b.into_vec::<i128>().unwrap();
903
904        // Test vec with capacity
905        let a: Vec<i128> = Vec::with_capacity(20);
906        let b = Buffer::from_vec(a);
907        let back = b.into_vec::<i128>().unwrap();
908        assert_eq!(back.len(), 0);
909        assert_eq!(back.capacity(), 20);
910
911        // Test vec with values
912        let mut a: Vec<i128> = Vec::with_capacity(3);
913        a.extend_from_slice(&[1, 2, 3]);
914        let b = Buffer::from_vec(a);
915        let back = b.into_vec::<i128>().unwrap();
916        assert_eq!(back.len(), 3);
917        assert_eq!(back.capacity(), 3);
918
919        // Test vec with values and spare capacity
920        let mut a: Vec<i128> = Vec::with_capacity(20);
921        a.extend_from_slice(&[1, 4, 7, 8, 9, 3, 6]);
922        let b = Buffer::from_vec(a);
923        let back = b.into_vec::<i128>().unwrap();
924        assert_eq!(back.len(), 7);
925        assert_eq!(back.capacity(), 20);
926
927        // Test incorrect alignment
928        let a: Vec<i128> = Vec::new();
929        let b = Buffer::from_vec(a);
930        let b = b.into_vec::<i32>().unwrap_err();
931        b.into_vec::<i8>().unwrap_err();
932
933        // Test convert between types with same alignment
934        // This is an implementation quirk, but isn't harmful
935        // as ArrowNativeType are trivially transmutable
936        let a: Vec<i64> = vec![1, 2, 3, 4];
937        let b = Buffer::from_vec(a);
938        let back = b.into_vec::<u64>().unwrap();
939        assert_eq!(back.len(), 4);
940        assert_eq!(back.capacity(), 4);
941
942        // i256 has the same layout as i128 so this is valid
943        let mut b: Vec<i128> = Vec::with_capacity(4);
944        b.extend_from_slice(&[1, 2, 3, 4]);
945        let b = Buffer::from_vec(b);
946        let back = b.into_vec::<i256>().unwrap();
947        assert_eq!(back.len(), 2);
948        assert_eq!(back.capacity(), 2);
949
950        // Invalid layout
951        let b: Vec<i128> = vec![1, 2, 3];
952        let b = Buffer::from_vec(b);
953        b.into_vec::<i256>().unwrap_err();
954
955        // Invalid layout
956        let mut b: Vec<i128> = Vec::with_capacity(5);
957        b.extend_from_slice(&[1, 2, 3, 4]);
958        let b = Buffer::from_vec(b);
959        b.into_vec::<i256>().unwrap_err();
960
961        // Truncates length
962        // This is an implementation quirk, but isn't harmful
963        let mut b: Vec<i128> = Vec::with_capacity(4);
964        b.extend_from_slice(&[1, 2, 3]);
965        let b = Buffer::from_vec(b);
966        let back = b.into_vec::<i256>().unwrap();
967        assert_eq!(back.len(), 1);
968        assert_eq!(back.capacity(), 2);
969
970        // Cannot use aligned allocation
971        let b = Buffer::from(MutableBuffer::new(10));
972        let b = b.into_vec::<u8>().unwrap_err();
973        b.into_vec::<u64>().unwrap_err();
974
975        // Test slicing
976        let mut a: Vec<i128> = Vec::with_capacity(20);
977        a.extend_from_slice(&[1, 4, 7, 8, 9, 3, 6]);
978        let b = Buffer::from_vec(a);
979        let slice = b.slice_with_length(0, 64);
980
981        // Shared reference fails
982        let slice = slice.into_vec::<i128>().unwrap_err();
983        drop(b);
984
985        // Succeeds as no outstanding shared reference
986        let back = slice.into_vec::<i128>().unwrap();
987        assert_eq!(&back, &[1, 4, 7, 8]);
988        assert_eq!(back.capacity(), 20);
989
990        // Slicing by non-multiple length truncates
991        let mut a: Vec<i128> = Vec::with_capacity(8);
992        a.extend_from_slice(&[1, 4, 7, 3]);
993
994        let b = Buffer::from_vec(a);
995        let slice = b.slice_with_length(0, 34);
996        drop(b);
997
998        let back = slice.into_vec::<i128>().unwrap();
999        assert_eq!(&back, &[1, 4]);
1000        assert_eq!(back.capacity(), 8);
1001
1002        // Offset prevents conversion
1003        let a: Vec<u32> = vec![1, 3, 4, 6];
1004        let b = Buffer::from_vec(a).slice(2);
1005        b.into_vec::<u32>().unwrap_err();
1006
1007        let b = MutableBuffer::new(16).into_buffer();
1008        let b = b.into_vec::<u8>().unwrap_err(); // Invalid layout
1009        let b = b.into_vec::<u32>().unwrap_err(); // Invalid layout
1010        b.into_mutable().unwrap();
1011
1012        let b = Buffer::from_vec(vec![1_u32, 3, 5]);
1013        let b = b.into_mutable().unwrap();
1014        let b = Buffer::from(b);
1015        let b = b.into_vec::<u32>().unwrap();
1016        assert_eq!(b, &[1, 3, 5]);
1017    }
1018
1019    #[test]
1020    #[should_panic(expected = "capacity overflow")]
1021    fn test_from_iter_overflow() {
1022        let iter_len = usize::MAX / std::mem::size_of::<u64>() + 1;
1023        let _ = Buffer::from_iter(std::iter::repeat_n(0_u64, iter_len));
1024    }
1025
1026    #[test]
1027    fn bit_slice_length_preserved() {
1028        // Create a boring buffer
1029        let buf = Buffer::from_iter(std::iter::repeat_n(true, 64));
1030
1031        let assert_preserved = |offset: usize, len: usize| {
1032            let new_buf = buf.bit_slice(offset, len);
1033            assert_eq!(new_buf.len(), bit_util::ceil(len, 8));
1034
1035            // if the offset is not byte-aligned, we have to create a deep copy to a new buffer
1036            // (since the `offset` value inside a Buffer is byte-granular, not bit-granular), so
1037            // checking the offset should always return 0 if so. If the offset IS byte-aligned, we
1038            // want to make sure it doesn't unnecessarily create a deep copy.
1039            if offset % 8 == 0 {
1040                assert_eq!(new_buf.ptr_offset(), offset / 8);
1041            } else {
1042                assert_eq!(new_buf.ptr_offset(), 0);
1043            }
1044        };
1045
1046        // go through every available value for offset
1047        for o in 0..=64 {
1048            // and go through every length that could accompany that offset - we can't have a
1049            // situation where offset + len > 64, because that would go past the end of the buffer,
1050            // so we use the map to ensure it's in range.
1051            for l in (o..=64).map(|l| l - o) {
1052                // and we just want to make sure every one of these keeps its offset and length
1053                // when neeeded
1054                assert_preserved(o, l);
1055            }
1056        }
1057    }
1058
1059    #[test]
1060    fn test_strong_count() {
1061        let buffer = Buffer::from_iter(std::iter::repeat_n(0_u8, 100));
1062        assert_eq!(buffer.strong_count(), 1);
1063
1064        let buffer2 = buffer.clone();
1065        assert_eq!(buffer.strong_count(), 2);
1066
1067        let buffer3 = buffer2.clone();
1068        assert_eq!(buffer.strong_count(), 3);
1069
1070        drop(buffer);
1071        assert_eq!(buffer2.strong_count(), 2);
1072        assert_eq!(buffer3.strong_count(), 2);
1073
1074        // Strong count does not increase on move
1075        let capture = move || {
1076            assert_eq!(buffer3.strong_count(), 2);
1077        };
1078
1079        capture();
1080        assert_eq!(buffer2.strong_count(), 2);
1081
1082        drop(capture);
1083        assert_eq!(buffer2.strong_count(), 1);
1084    }
1085}