Skip to main content

arrow_buffer/buffer/
immutable.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::alloc::Layout;
19use std::fmt::Debug;
20use std::ptr::NonNull;
21use std::sync::Arc;
22
23use crate::BufferBuilder;
24use crate::alloc::{Allocation, Deallocation};
25use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
26use crate::{bit_util, bytes::Bytes, native::ArrowNativeType};
27
28#[cfg(feature = "pool")]
29use crate::pool::MemoryPool;
30
31use super::{MutableBuffer, ScalarBuffer};
32
33/// A contiguous memory region that can be shared with other buffers and across
34/// thread boundaries that stores Arrow data.
35///
36/// `Buffer`s can be sliced and cloned without copying the underlying data and can
37/// be created from memory allocated by non-Rust sources such as C/C++.
38///
39/// # Example: Create a `Buffer` from a `Vec` (without copying)
40/// ```
41/// # use arrow_buffer::Buffer;
42/// let vec: Vec<u32> = vec![1, 2, 3];
43/// let buffer = Buffer::from(vec);
44/// ```
45///
46/// # Example: Convert a `Buffer` to a `Vec` (without copying)
47///
48/// Use [`Self::into_vec`] to convert a `Buffer` back into a `Vec` if there are
49/// no other references and the types are aligned correctly.
50/// ```
51/// # use arrow_buffer::Buffer;
52/// # let vec: Vec<u32> = vec![1, 2, 3];
53/// # let buffer = Buffer::from(vec);
54/// // convert the buffer back into a Vec of u32
55/// // note this will fail if the buffer is shared or not aligned correctly
56/// let vec: Vec<u32> = buffer.into_vec().unwrap();
57/// ```
58///
59/// # Example: Create a `Buffer` from a [`bytes::Bytes`] (without copying)
60///
61/// [`bytes::Bytes`] is a common type in the Rust ecosystem for shared memory
62/// regions. You can create a buffer from a `Bytes` instance using the `From`
63/// implementation, also without copying.
64///
65/// ```
66/// # use arrow_buffer::Buffer;
67/// let bytes = bytes::Bytes::from("hello");
68/// let buffer = Buffer::from(bytes);
69///```
70#[derive(Clone, Debug)]
71pub struct Buffer {
72    /// the internal byte buffer.
73    data: Arc<Bytes>,
74
75    /// Pointer into `data` valid
76    ///
77    /// We store a pointer instead of an offset to avoid pointer arithmetic
78    /// which causes LLVM to fail to vectorise code correctly
79    ptr: *const u8,
80
81    /// Byte length of the buffer.
82    ///
83    /// Must be less than or equal to `data.len()`
84    length: usize,
85}
86
87impl Default for Buffer {
88    #[inline]
89    fn default() -> Self {
90        MutableBuffer::default().into()
91    }
92}
93
94impl PartialEq for Buffer {
95    fn eq(&self, other: &Self) -> bool {
96        self.as_slice().eq(other.as_slice())
97    }
98}
99
100impl Eq for Buffer {}
101
102unsafe impl Send for Buffer where Bytes: Send {}
103unsafe impl Sync for Buffer where Bytes: Sync {}
104
105impl Buffer {
106    /// Create a new Buffer from a (internal) `Bytes`
107    ///
108    /// NOTE despite the same name, `Bytes` is an internal struct in arrow-rs
109    /// and is different than [`bytes::Bytes`].
110    ///
111    /// See examples on [`Buffer`] for ways to create a buffer from a [`bytes::Bytes`].
112    #[deprecated(since = "54.1.0", note = "Use Buffer::from instead")]
113    pub fn from_bytes(bytes: Bytes) -> Self {
114        Self::from(bytes)
115    }
116
117    /// Returns the offset, in bytes, of `Self::ptr` to `Self::data`
118    ///
119    /// self.ptr and self.data can be different after slicing or advancing the buffer.
120    pub fn ptr_offset(&self) -> usize {
121        // Safety: `ptr` is always in bounds of `data`.
122        unsafe { self.ptr.offset_from(self.data.ptr().as_ptr()) as usize }
123    }
124
125    /// Returns the pointer to the start of the buffer without the offset.
126    pub fn data_ptr(&self) -> NonNull<u8> {
127        self.data.ptr()
128    }
129
130    /// Returns the number of strong references to the buffer.
131    ///
132    /// This method is safe but if the buffer is shared across multiple threads
133    /// the underlying value could change between calling this method and using
134    /// the result.
135    pub fn strong_count(&self) -> usize {
136        Arc::strong_count(&self.data)
137    }
138
139    /// Create a [`Buffer`] from the provided [`Vec`] without copying
140    #[inline]
141    pub fn from_vec<T: ArrowNativeType>(vec: Vec<T>) -> Self {
142        MutableBuffer::from(vec).into()
143    }
144
145    /// Initializes a [Buffer] from a slice of items.
146    pub fn from_slice_ref<U: ArrowNativeType, T: AsRef<[U]>>(items: T) -> Self {
147        let slice = items.as_ref();
148        let capacity = std::mem::size_of_val(slice);
149        let mut buffer = MutableBuffer::with_capacity(capacity);
150        buffer.extend_from_slice(slice);
151        buffer.into()
152    }
153
154    /// Creates a buffer from an existing memory region.
155    ///
156    /// Ownership of the memory is tracked via reference counting
157    /// and the memory will be freed using the `drop` method of
158    /// [crate::alloc::Allocation] when the reference count reaches zero.
159    ///
160    /// # Arguments
161    ///
162    /// * `ptr` - Pointer to raw parts
163    /// * `len` - Length of raw parts in **bytes**
164    /// * `owner` - A [crate::alloc::Allocation] which is responsible for freeing that data
165    ///
166    /// # Safety
167    ///
168    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` bytes
169    pub unsafe fn from_custom_allocation(
170        ptr: NonNull<u8>,
171        len: usize,
172        owner: Arc<dyn Allocation>,
173    ) -> Self {
174        unsafe { Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner, len)) }
175    }
176
177    /// Auxiliary method to create a new Buffer
178    unsafe fn build_with_arguments(
179        ptr: NonNull<u8>,
180        len: usize,
181        deallocation: Deallocation,
182    ) -> Self {
183        let bytes = unsafe { Bytes::new(ptr, len, deallocation) };
184        let ptr = bytes.as_ptr();
185        Buffer {
186            ptr,
187            data: Arc::new(bytes),
188            length: len,
189        }
190    }
191
192    /// Returns the number of bytes in the buffer
193    #[inline]
194    pub fn len(&self) -> usize {
195        self.length
196    }
197
198    /// Returns the capacity of this buffer.
199    /// For externally owned buffers, this returns zero
200    #[inline]
201    pub fn capacity(&self) -> usize {
202        self.data.capacity()
203    }
204
205    /// Tries to shrink the capacity of the buffer as much as possible, freeing unused memory.
206    ///
207    /// If the buffer is shared, this is a no-op.
208    ///
209    /// If the memory was allocated with a custom allocator, this is a no-op.
210    ///
211    /// If the capacity is already less than or equal to the desired capacity, this is a no-op.
212    ///
213    /// The memory region will be reallocated using `std::alloc::realloc`.
214    pub fn shrink_to_fit(&mut self) {
215        let offset = self.ptr_offset();
216        let is_empty = self.is_empty();
217        let desired_capacity = if is_empty {
218            0
219        } else {
220            // For realloc to work, we cannot free the elements before the offset
221            offset + self.len()
222        };
223        if desired_capacity < self.capacity() {
224            if let Some(bytes) = Arc::get_mut(&mut self.data) {
225                if bytes.try_realloc(desired_capacity).is_ok() {
226                    // Realloc complete - update our pointer into `bytes`:
227                    self.ptr = if is_empty {
228                        bytes.as_ptr()
229                    } else {
230                        // SAFETY: we kept all elements leading up to the offset
231                        unsafe { bytes.as_ptr().add(offset) }
232                    }
233                } else {
234                    // Failure to reallocate is fine; we just failed to free up memory.
235                }
236            }
237        }
238    }
239
240    /// Returns true if the buffer is empty.
241    #[inline]
242    pub fn is_empty(&self) -> bool {
243        self.length == 0
244    }
245
246    /// Returns the byte slice stored in this buffer
247    pub fn as_slice(&self) -> &[u8] {
248        unsafe { std::slice::from_raw_parts(self.ptr, self.length) }
249    }
250
251    pub(crate) fn deallocation(&self) -> &Deallocation {
252        self.data.deallocation()
253    }
254
255    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`.
256    ///
257    /// This function is `O(1)` and does not copy any data, allowing the
258    /// same memory region to be shared between buffers.
259    ///
260    /// # Panics
261    ///
262    /// Panics iff `offset` is larger than `len`.
263    pub fn slice(&self, offset: usize) -> Self {
264        let mut s = self.clone();
265        s.advance(offset);
266        s
267    }
268
269    /// Increases the offset of this buffer by `offset`
270    ///
271    /// # Panics
272    ///
273    /// Panics iff `offset` is larger than `len`.
274    #[inline]
275    pub fn advance(&mut self, offset: usize) {
276        assert!(
277            offset <= self.length,
278            "the offset of the new Buffer cannot exceed the existing length: offset={} length={}",
279            offset,
280            self.length
281        );
282        self.length -= offset;
283        // Safety:
284        // This cannot overflow as
285        // `self.offset + self.length < self.data.len()`
286        // `offset < self.length`
287        self.ptr = unsafe { self.ptr.add(offset) };
288    }
289
290    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`,
291    /// with `length` bytes.
292    ///
293    /// This function is `O(1)` and does not copy any data, allowing the same
294    /// memory region to be shared between buffers.
295    ///
296    /// # Panics
297    /// Panics iff `(offset + length)` is larger than the existing length.
298    pub fn slice_with_length(&self, offset: usize, length: usize) -> Self {
299        assert!(
300            offset.saturating_add(length) <= self.length,
301            "the offset of the new Buffer cannot exceed the existing length: slice offset={offset} length={length} selflen={}",
302            self.length
303        );
304        // Safety:
305        // offset + length <= self.length
306        let ptr = unsafe { self.ptr.add(offset) };
307        Self {
308            data: self.data.clone(),
309            ptr,
310            length,
311        }
312    }
313
314    /// Returns a pointer to the start of this buffer.
315    ///
316    /// Note that this should be used cautiously, and the returned pointer should not be
317    /// stored anywhere, to avoid dangling pointers.
318    #[inline]
319    pub fn as_ptr(&self) -> *const u8 {
320        self.ptr
321    }
322
323    /// View buffer as a slice of a specific type.
324    ///
325    /// # Panics
326    ///
327    /// This function panics if the underlying buffer is not aligned
328    /// correctly for type `T`.
329    pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
330        // SAFETY
331        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
332        // implementation outside this crate, and this method checks alignment
333        let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::<T>() };
334        assert!(prefix.is_empty() && suffix.is_empty());
335        offsets
336    }
337
338    /// Returns a slice of this buffer starting at a certain bit offset.
339    /// If the offset is byte-aligned the returned buffer is a shallow clone,
340    /// otherwise a new buffer is allocated and filled with a copy of the bits in the range.
341    pub fn bit_slice(&self, offset: usize, len: usize) -> Self {
342        if offset % 8 == 0 {
343            return self.slice_with_length(offset / 8, bit_util::ceil(len, 8));
344        }
345
346        let chunks = self.bit_chunks(offset, len);
347
348        let buffer: Vec<u64> = if chunks.remainder_len() > 0 {
349            chunks.iter().chain(Some(chunks.remainder_bits())).collect()
350        } else {
351            chunks.iter().collect()
352        };
353        let mut buffer = Buffer::from_vec(buffer);
354        // Update length to be byte-aligned
355        buffer.length = bit_util::ceil(len, 8);
356        buffer
357    }
358
359    /// Returns a `BitChunks` instance which can be used to iterate over this buffers bits
360    /// in larger chunks and starting at arbitrary bit offsets.
361    /// Note that both `offset` and `length` are measured in bits.
362    pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks<'_> {
363        BitChunks::new(self.as_slice(), offset, len)
364    }
365
366    /// Returns the number of 1-bits in this buffer, starting from `offset` with `length` bits
367    /// inspected. Note that both `offset` and `length` are measured in bits.
368    pub fn count_set_bits_offset(&self, offset: usize, len: usize) -> usize {
369        UnalignedBitChunk::new(self.as_slice(), offset, len).count_ones()
370    }
371
372    /// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared.
373    /// Returns `Err` if this is shared or its allocation is from an external source or
374    /// it is not allocated with alignment [`ALIGNMENT`]
375    ///
376    /// # Example: Creating a [`MutableBuffer`] from a [`Buffer`]
377    /// ```
378    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
379    /// let buffer: Buffer = Buffer::from(&[1u8, 2, 3, 4][..]);
380    /// // Only possible to convert a Buffer into a MutableBuffer if uniquely owned
381    /// // (i.e., there are no other references to it).
382    /// let mut mutable_buffer = match buffer.into_mutable() {
383    ///    Ok(mutable) => mutable,
384    ///    Err(orig_buffer) => {
385    ///      panic!("buffer was not uniquely owned");
386    ///    }
387    /// };
388    /// mutable_buffer.push(5u8);
389    /// let buffer = Buffer::from(mutable_buffer);
390    /// assert_eq!(buffer.as_slice(), &[1u8, 2, 3, 4, 5])
391    /// ```
392    ///
393    /// [`ALIGNMENT`]: crate::alloc::ALIGNMENT
394    pub fn into_mutable(self) -> Result<MutableBuffer, Self> {
395        let ptr = self.ptr;
396        let length = self.length;
397        Arc::try_unwrap(self.data)
398            .and_then(|bytes| {
399                // The pointer of underlying buffer should not be offset.
400                assert_eq!(ptr, bytes.ptr().as_ptr());
401                MutableBuffer::from_bytes(bytes).map_err(Arc::new)
402            })
403            .map_err(|bytes| Buffer {
404                data: bytes,
405                ptr,
406                length,
407            })
408    }
409
410    /// Converts self into a `Vec`, if possible.
411    ///
412    /// This can be used to reuse / mutate the underlying data.
413    ///
414    /// # Errors
415    ///
416    /// Returns `Err(self)` if
417    /// 1. The buffer does not have the same [`Layout`] as the destination Vec
418    /// 2. The buffer contains a non-zero offset
419    /// 3. The buffer is shared
420    pub fn into_vec<T: ArrowNativeType>(self) -> Result<Vec<T>, Self> {
421        let layout = match self.data.deallocation() {
422            Deallocation::Standard(l) => l,
423            _ => return Err(self), // Custom allocation
424        };
425
426        if self.ptr != self.data.as_ptr() {
427            return Err(self); // Data is offset
428        }
429
430        let v_capacity = layout.size() / std::mem::size_of::<T>();
431        match Layout::array::<T>(v_capacity) {
432            Ok(expected) if layout == &expected => {}
433            _ => return Err(self), // Incorrect layout
434        }
435
436        let length = self.length;
437        let ptr = self.ptr;
438        let v_len = self.length / std::mem::size_of::<T>();
439
440        Arc::try_unwrap(self.data)
441            .map(|bytes| unsafe {
442                let ptr = bytes.ptr().as_ptr() as _;
443                std::mem::forget(bytes);
444                // Safety
445                // Verified that bytes layout matches that of Vec
446                Vec::from_raw_parts(ptr, v_len, v_capacity)
447            })
448            .map_err(|bytes| Buffer {
449                data: bytes,
450                ptr,
451                length,
452            })
453    }
454
455    /// Returns true if this [`Buffer`] is equal to `other`, using pointer comparisons
456    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
457    /// return false when the arrays are logically equal
458    #[inline]
459    pub fn ptr_eq(&self, other: &Self) -> bool {
460        self.ptr == other.ptr && self.length == other.length
461    }
462
463    /// Register this [`Buffer`] with the provided [`MemoryPool`]
464    ///
465    /// This claims the memory used by this buffer in the pool, allowing for
466    /// accurate accounting of memory usage. Any prior reservation will be
467    /// released so this works well when the buffer is being shared among
468    /// multiple arrays.
469    #[cfg(feature = "pool")]
470    pub fn claim(&self, pool: &dyn MemoryPool) {
471        self.data.claim(pool)
472    }
473}
474
475/// Note that here we deliberately do not implement
476/// `impl<T: AsRef<[u8]>> From<T> for Buffer`
477/// As it would accept `Buffer::from(vec![...])` that would cause an unexpected copy.
478/// Instead, we ask user to be explicit when copying is occurring, e.g., `Buffer::from(vec![...].to_byte_slice())`.
479/// For zero-copy conversion, user should use `Buffer::from_vec(vec![...])`.
480///
481/// Since we removed impl for `AsRef<u8>`, we added the following three specific implementations to reduce API breakage.
482/// See <https://github.com/apache/arrow-rs/issues/6033> for more discussion on this.
483impl From<&[u8]> for Buffer {
484    fn from(p: &[u8]) -> Self {
485        Self::from_slice_ref(p)
486    }
487}
488
489impl<const N: usize> From<[u8; N]> for Buffer {
490    fn from(p: [u8; N]) -> Self {
491        Self::from_slice_ref(p)
492    }
493}
494
495impl<const N: usize> From<&[u8; N]> for Buffer {
496    fn from(p: &[u8; N]) -> Self {
497        Self::from_slice_ref(p)
498    }
499}
500
501impl<T: ArrowNativeType> From<Vec<T>> for Buffer {
502    fn from(value: Vec<T>) -> Self {
503        Self::from_vec(value)
504    }
505}
506
507impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Buffer {
508    fn from(value: ScalarBuffer<T>) -> Self {
509        value.into_inner()
510    }
511}
512
513/// Convert from internal `Bytes` (not [`bytes::Bytes`]) to `Buffer`
514impl From<Bytes> for Buffer {
515    #[inline]
516    fn from(bytes: Bytes) -> Self {
517        let length = bytes.len();
518        let ptr = bytes.as_ptr();
519        Self {
520            data: Arc::new(bytes),
521            ptr,
522            length,
523        }
524    }
525}
526
527/// Convert from [`bytes::Bytes`], not internal `Bytes` to `Buffer`
528impl From<bytes::Bytes> for Buffer {
529    fn from(bytes: bytes::Bytes) -> Self {
530        let bytes: Bytes = bytes.into();
531        Self::from(bytes)
532    }
533}
534
535/// Create a `Buffer` instance by storing the boolean values into the buffer
536impl FromIterator<bool> for Buffer {
537    fn from_iter<I>(iter: I) -> Self
538    where
539        I: IntoIterator<Item = bool>,
540    {
541        MutableBuffer::from_iter(iter).into()
542    }
543}
544
545impl std::ops::Deref for Buffer {
546    type Target = [u8];
547
548    fn deref(&self) -> &[u8] {
549        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len()) }
550    }
551}
552
553impl AsRef<[u8]> for &Buffer {
554    fn as_ref(&self) -> &[u8] {
555        self.as_slice()
556    }
557}
558
559impl From<MutableBuffer> for Buffer {
560    #[inline]
561    fn from(buffer: MutableBuffer) -> Self {
562        buffer.into_buffer()
563    }
564}
565
566impl<T: ArrowNativeType> From<BufferBuilder<T>> for Buffer {
567    fn from(mut value: BufferBuilder<T>) -> Self {
568        value.finish()
569    }
570}
571
572impl Buffer {
573    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length.
574    ///
575    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
576    ///
577    /// # Example
578    /// ```
579    /// # use arrow_buffer::buffer::Buffer;
580    /// let v = vec![1u32];
581    /// let iter = v.iter().map(|x| x * 2);
582    /// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
583    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
584    /// ```
585    /// # Safety
586    /// This method assumes that the iterator's size is correct and is undefined behavior
587    /// to use it on an iterator that reports an incorrect length.
588    // This implementation is required for two reasons:
589    // 1. there is no trait `TrustedLen` in stable rust and therefore
590    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
591    // 2. `from_trusted_len_iter` is faster.
592    #[inline]
593    pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
594        iterator: I,
595    ) -> Self {
596        unsafe { MutableBuffer::from_trusted_len_iter(iterator).into() }
597    }
598
599    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length or errors
600    /// if any of the items of the iterator is an error.
601    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
602    /// # Safety
603    /// This method assumes that the iterator's size is correct and is undefined behavior
604    /// to use it on an iterator that reports an incorrect length.
605    #[inline]
606    pub unsafe fn try_from_trusted_len_iter<
607        E,
608        T: ArrowNativeType,
609        I: Iterator<Item = Result<T, E>>,
610    >(
611        iterator: I,
612    ) -> Result<Self, E> {
613        unsafe { Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into()) }
614    }
615}
616
617impl<T: ArrowNativeType> FromIterator<T> for Buffer {
618    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
619        let vec = Vec::from_iter(iter);
620        Buffer::from_vec(vec)
621    }
622}
623
624#[cfg(test)]
625mod tests {
626    use crate::i256;
627    use std::panic::{RefUnwindSafe, UnwindSafe};
628    use std::thread;
629
630    use super::*;
631
632    #[test]
633    fn test_buffer_data_equality() {
634        let buf1 = Buffer::from(&[0, 1, 2, 3, 4]);
635        let buf2 = Buffer::from(&[0, 1, 2, 3, 4]);
636        assert_eq!(buf1, buf2);
637
638        // slice with same offset and same length should still preserve equality
639        let buf3 = buf1.slice(2);
640        assert_ne!(buf1, buf3);
641        let buf4 = buf2.slice_with_length(2, 3);
642        assert_eq!(buf3, buf4);
643
644        // Different capacities should still preserve equality
645        let mut buf2 = MutableBuffer::new(65);
646        buf2.extend_from_slice(&[0u8, 1, 2, 3, 4]);
647
648        let buf2 = buf2.into();
649        assert_eq!(buf1, buf2);
650
651        // unequal because of different elements
652        let buf2 = Buffer::from(&[0, 0, 2, 3, 4]);
653        assert_ne!(buf1, buf2);
654
655        // unequal because of different length
656        let buf2 = Buffer::from(&[0, 1, 2, 3]);
657        assert_ne!(buf1, buf2);
658    }
659
660    #[test]
661    fn test_from_raw_parts() {
662        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
663        assert_eq!(5, buf.len());
664        assert!(!buf.as_ptr().is_null());
665        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
666    }
667
668    #[test]
669    fn test_from_vec() {
670        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
671        assert_eq!(5, buf.len());
672        assert!(!buf.as_ptr().is_null());
673        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
674    }
675
676    #[test]
677    fn test_copy() {
678        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
679        let buf2 = buf;
680        assert_eq!(5, buf2.len());
681        assert_eq!(64, buf2.capacity());
682        assert!(!buf2.as_ptr().is_null());
683        assert_eq!([0, 1, 2, 3, 4], buf2.as_slice());
684    }
685
686    #[test]
687    fn test_slice() {
688        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
689        let buf2 = buf.slice(2);
690
691        assert_eq!([6, 8, 10], buf2.as_slice());
692        assert_eq!(3, buf2.len());
693        assert_eq!(unsafe { buf.as_ptr().offset(2) }, buf2.as_ptr());
694
695        let buf3 = buf2.slice_with_length(1, 2);
696        assert_eq!([8, 10], buf3.as_slice());
697        assert_eq!(2, buf3.len());
698        assert_eq!(unsafe { buf.as_ptr().offset(3) }, buf3.as_ptr());
699
700        let buf4 = buf.slice(5);
701        let empty_slice: [u8; 0] = [];
702        assert_eq!(empty_slice, buf4.as_slice());
703        assert_eq!(0, buf4.len());
704        assert!(buf4.is_empty());
705        assert_eq!(buf2.slice_with_length(2, 1).as_slice(), &[10]);
706    }
707
708    #[test]
709    fn test_shrink_to_fit() {
710        let original = Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7]);
711        assert_eq!(original.as_slice(), &[0, 1, 2, 3, 4, 5, 6, 7]);
712        assert_eq!(original.capacity(), 64);
713
714        let slice = original.slice_with_length(2, 3);
715        drop(original); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
716        assert_eq!(slice.as_slice(), &[2, 3, 4]);
717        assert_eq!(slice.capacity(), 64);
718
719        let mut shrunk = slice;
720        shrunk.shrink_to_fit();
721        assert_eq!(shrunk.as_slice(), &[2, 3, 4]);
722        assert_eq!(shrunk.capacity(), 5); // shrink_to_fit is allowed to keep the elements before the offset
723
724        // Test that we can handle empty slices:
725        let empty_slice = shrunk.slice_with_length(1, 0);
726        drop(shrunk); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
727        assert_eq!(empty_slice.as_slice(), &[]);
728        assert_eq!(empty_slice.capacity(), 5);
729
730        let mut shrunk_empty = empty_slice;
731        shrunk_empty.shrink_to_fit();
732        assert_eq!(shrunk_empty.as_slice(), &[]);
733        assert_eq!(shrunk_empty.capacity(), 0);
734    }
735
736    #[test]
737    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
738    fn test_slice_offset_out_of_bound() {
739        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
740        buf.slice(6);
741    }
742
743    #[test]
744    fn test_access_concurrently() {
745        let buffer = Buffer::from([1, 2, 3, 4, 5]);
746        let buffer2 = buffer.clone();
747        assert_eq!([1, 2, 3, 4, 5], buffer.as_slice());
748
749        let buffer_copy = thread::spawn(move || {
750            // access buffer in another thread.
751            buffer
752        })
753        .join();
754
755        assert!(buffer_copy.is_ok());
756        assert_eq!(buffer2, buffer_copy.ok().unwrap());
757    }
758
759    macro_rules! check_as_typed_data {
760        ($input: expr, $native_t: ty) => {{
761            let buffer = Buffer::from_slice_ref($input);
762            let slice: &[$native_t] = buffer.typed_data::<$native_t>();
763            assert_eq!($input, slice);
764        }};
765    }
766
767    #[test]
768    #[allow(clippy::float_cmp)]
769    fn test_as_typed_data() {
770        check_as_typed_data!(&[1i8, 3i8, 6i8], i8);
771        check_as_typed_data!(&[1u8, 3u8, 6u8], u8);
772        check_as_typed_data!(&[1i16, 3i16, 6i16], i16);
773        check_as_typed_data!(&[1i32, 3i32, 6i32], i32);
774        check_as_typed_data!(&[1i64, 3i64, 6i64], i64);
775        check_as_typed_data!(&[1u16, 3u16, 6u16], u16);
776        check_as_typed_data!(&[1u32, 3u32, 6u32], u32);
777        check_as_typed_data!(&[1u64, 3u64, 6u64], u64);
778        check_as_typed_data!(&[1f32, 3f32, 6f32], f32);
779        check_as_typed_data!(&[1f64, 3f64, 6f64], f64);
780    }
781
782    #[test]
783    fn test_count_bits() {
784        assert_eq!(0, Buffer::from(&[0b00000000]).count_set_bits_offset(0, 8));
785        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
786        assert_eq!(3, Buffer::from(&[0b00001101]).count_set_bits_offset(0, 8));
787        assert_eq!(
788            6,
789            Buffer::from(&[0b01001001, 0b01010010]).count_set_bits_offset(0, 16)
790        );
791        assert_eq!(
792            16,
793            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
794        );
795    }
796
797    #[test]
798    fn test_count_bits_slice() {
799        assert_eq!(
800            0,
801            Buffer::from(&[0b11111111, 0b00000000])
802                .slice(1)
803                .count_set_bits_offset(0, 8)
804        );
805        assert_eq!(
806            8,
807            Buffer::from(&[0b11111111, 0b11111111])
808                .slice_with_length(1, 1)
809                .count_set_bits_offset(0, 8)
810        );
811        assert_eq!(
812            3,
813            Buffer::from(&[0b11111111, 0b11111111, 0b00001101])
814                .slice(2)
815                .count_set_bits_offset(0, 8)
816        );
817        assert_eq!(
818            6,
819            Buffer::from(&[0b11111111, 0b01001001, 0b01010010])
820                .slice_with_length(1, 2)
821                .count_set_bits_offset(0, 16)
822        );
823        assert_eq!(
824            16,
825            Buffer::from(&[0b11111111, 0b11111111, 0b11111111, 0b11111111])
826                .slice(2)
827                .count_set_bits_offset(0, 16)
828        );
829    }
830
831    #[test]
832    fn test_count_bits_offset_slice() {
833        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
834        assert_eq!(3, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 3));
835        assert_eq!(5, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 5));
836        assert_eq!(1, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 1));
837        assert_eq!(0, Buffer::from(&[0b11111111]).count_set_bits_offset(8, 0));
838        assert_eq!(2, Buffer::from(&[0b01010101]).count_set_bits_offset(0, 3));
839        assert_eq!(
840            16,
841            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
842        );
843        assert_eq!(
844            10,
845            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 10)
846        );
847        assert_eq!(
848            10,
849            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(3, 10)
850        );
851        assert_eq!(
852            8,
853            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(8, 8)
854        );
855        assert_eq!(
856            5,
857            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(11, 5)
858        );
859        assert_eq!(
860            0,
861            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(16, 0)
862        );
863        assert_eq!(
864            2,
865            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 5)
866        );
867        assert_eq!(
868            4,
869            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 9)
870        );
871    }
872
873    #[test]
874    fn test_unwind_safe() {
875        fn assert_unwind_safe<T: RefUnwindSafe + UnwindSafe>() {}
876        assert_unwind_safe::<Buffer>()
877    }
878
879    #[test]
880    fn test_from_foreign_vec() {
881        let mut vector = vec![1_i32, 2, 3, 4, 5];
882        let buffer = unsafe {
883            Buffer::from_custom_allocation(
884                NonNull::new_unchecked(vector.as_mut_ptr() as *mut u8),
885                vector.len() * std::mem::size_of::<i32>(),
886                Arc::new(vector),
887            )
888        };
889
890        let slice = buffer.typed_data::<i32>();
891        assert_eq!(slice, &[1, 2, 3, 4, 5]);
892
893        let buffer = buffer.slice(std::mem::size_of::<i32>());
894
895        let slice = buffer.typed_data::<i32>();
896        assert_eq!(slice, &[2, 3, 4, 5]);
897    }
898
899    #[test]
900    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
901    fn slice_overflow() {
902        let buffer = Buffer::from(MutableBuffer::from_len_zeroed(12));
903        buffer.slice_with_length(2, usize::MAX);
904    }
905
906    #[test]
907    fn test_vec_interop() {
908        // Test empty vec
909        let a: Vec<i128> = Vec::new();
910        let b = Buffer::from_vec(a);
911        b.into_vec::<i128>().unwrap();
912
913        // Test vec with capacity
914        let a: Vec<i128> = Vec::with_capacity(20);
915        let b = Buffer::from_vec(a);
916        let back = b.into_vec::<i128>().unwrap();
917        assert_eq!(back.len(), 0);
918        assert_eq!(back.capacity(), 20);
919
920        // Test vec with values
921        let mut a: Vec<i128> = Vec::with_capacity(3);
922        a.extend_from_slice(&[1, 2, 3]);
923        let b = Buffer::from_vec(a);
924        let back = b.into_vec::<i128>().unwrap();
925        assert_eq!(back.len(), 3);
926        assert_eq!(back.capacity(), 3);
927
928        // Test vec with values and spare capacity
929        let mut a: Vec<i128> = Vec::with_capacity(20);
930        a.extend_from_slice(&[1, 4, 7, 8, 9, 3, 6]);
931        let b = Buffer::from_vec(a);
932        let back = b.into_vec::<i128>().unwrap();
933        assert_eq!(back.len(), 7);
934        assert_eq!(back.capacity(), 20);
935
936        // Test incorrect alignment
937        let a: Vec<i128> = Vec::new();
938        let b = Buffer::from_vec(a);
939        let b = b.into_vec::<i32>().unwrap_err();
940        b.into_vec::<i8>().unwrap_err();
941
942        // Test convert between types with same alignment
943        // This is an implementation quirk, but isn't harmful
944        // as ArrowNativeType are trivially transmutable
945        let a: Vec<i64> = vec![1, 2, 3, 4];
946        let b = Buffer::from_vec(a);
947        let back = b.into_vec::<u64>().unwrap();
948        assert_eq!(back.len(), 4);
949        assert_eq!(back.capacity(), 4);
950
951        // i256 has the same layout as i128 so this is valid
952        let mut b: Vec<i128> = Vec::with_capacity(4);
953        b.extend_from_slice(&[1, 2, 3, 4]);
954        let b = Buffer::from_vec(b);
955        let back = b.into_vec::<i256>().unwrap();
956        assert_eq!(back.len(), 2);
957        assert_eq!(back.capacity(), 2);
958
959        // Invalid layout
960        let b: Vec<i128> = vec![1, 2, 3];
961        let b = Buffer::from_vec(b);
962        b.into_vec::<i256>().unwrap_err();
963
964        // Invalid layout
965        let mut b: Vec<i128> = Vec::with_capacity(5);
966        b.extend_from_slice(&[1, 2, 3, 4]);
967        let b = Buffer::from_vec(b);
968        b.into_vec::<i256>().unwrap_err();
969
970        // Truncates length
971        // This is an implementation quirk, but isn't harmful
972        let mut b: Vec<i128> = Vec::with_capacity(4);
973        b.extend_from_slice(&[1, 2, 3]);
974        let b = Buffer::from_vec(b);
975        let back = b.into_vec::<i256>().unwrap();
976        assert_eq!(back.len(), 1);
977        assert_eq!(back.capacity(), 2);
978
979        // Cannot use aligned allocation
980        let b = Buffer::from(MutableBuffer::new(10));
981        let b = b.into_vec::<u8>().unwrap_err();
982        b.into_vec::<u64>().unwrap_err();
983
984        // Test slicing
985        let mut a: Vec<i128> = Vec::with_capacity(20);
986        a.extend_from_slice(&[1, 4, 7, 8, 9, 3, 6]);
987        let b = Buffer::from_vec(a);
988        let slice = b.slice_with_length(0, 64);
989
990        // Shared reference fails
991        let slice = slice.into_vec::<i128>().unwrap_err();
992        drop(b);
993
994        // Succeeds as no outstanding shared reference
995        let back = slice.into_vec::<i128>().unwrap();
996        assert_eq!(&back, &[1, 4, 7, 8]);
997        assert_eq!(back.capacity(), 20);
998
999        // Slicing by non-multiple length truncates
1000        let mut a: Vec<i128> = Vec::with_capacity(8);
1001        a.extend_from_slice(&[1, 4, 7, 3]);
1002
1003        let b = Buffer::from_vec(a);
1004        let slice = b.slice_with_length(0, 34);
1005        drop(b);
1006
1007        let back = slice.into_vec::<i128>().unwrap();
1008        assert_eq!(&back, &[1, 4]);
1009        assert_eq!(back.capacity(), 8);
1010
1011        // Offset prevents conversion
1012        let a: Vec<u32> = vec![1, 3, 4, 6];
1013        let b = Buffer::from_vec(a).slice(2);
1014        b.into_vec::<u32>().unwrap_err();
1015
1016        let b = MutableBuffer::new(16).into_buffer();
1017        let b = b.into_vec::<u8>().unwrap_err(); // Invalid layout
1018        let b = b.into_vec::<u32>().unwrap_err(); // Invalid layout
1019        b.into_mutable().unwrap();
1020
1021        let b = Buffer::from_vec(vec![1_u32, 3, 5]);
1022        let b = b.into_mutable().unwrap();
1023        let b = Buffer::from(b);
1024        let b = b.into_vec::<u32>().unwrap();
1025        assert_eq!(b, &[1, 3, 5]);
1026    }
1027
1028    #[test]
1029    #[should_panic(expected = "capacity overflow")]
1030    fn test_from_iter_overflow() {
1031        let iter_len = usize::MAX / std::mem::size_of::<u64>() + 1;
1032        let _ = Buffer::from_iter(std::iter::repeat_n(0_u64, iter_len));
1033    }
1034
1035    #[test]
1036    fn bit_slice_length_preserved() {
1037        // Create a boring buffer
1038        let buf = Buffer::from_iter(std::iter::repeat_n(true, 64));
1039
1040        let assert_preserved = |offset: usize, len: usize| {
1041            let new_buf = buf.bit_slice(offset, len);
1042            assert_eq!(new_buf.len(), bit_util::ceil(len, 8));
1043
1044            // if the offset is not byte-aligned, we have to create a deep copy to a new buffer
1045            // (since the `offset` value inside a Buffer is byte-granular, not bit-granular), so
1046            // checking the offset should always return 0 if so. If the offset IS byte-aligned, we
1047            // want to make sure it doesn't unnecessarily create a deep copy.
1048            if offset % 8 == 0 {
1049                assert_eq!(new_buf.ptr_offset(), offset / 8);
1050            } else {
1051                assert_eq!(new_buf.ptr_offset(), 0);
1052            }
1053        };
1054
1055        // go through every available value for offset
1056        for o in 0..=64 {
1057            // and go through every length that could accompany that offset - we can't have a
1058            // situation where offset + len > 64, because that would go past the end of the buffer,
1059            // so we use the map to ensure it's in range.
1060            for l in (o..=64).map(|l| l - o) {
1061                // and we just want to make sure every one of these keeps its offset and length
1062                // when neeeded
1063                assert_preserved(o, l);
1064            }
1065        }
1066    }
1067
1068    #[test]
1069    fn test_strong_count() {
1070        let buffer = Buffer::from_iter(std::iter::repeat_n(0_u8, 100));
1071        assert_eq!(buffer.strong_count(), 1);
1072
1073        let buffer2 = buffer.clone();
1074        assert_eq!(buffer.strong_count(), 2);
1075
1076        let buffer3 = buffer2.clone();
1077        assert_eq!(buffer.strong_count(), 3);
1078
1079        drop(buffer);
1080        assert_eq!(buffer2.strong_count(), 2);
1081        assert_eq!(buffer3.strong_count(), 2);
1082
1083        // Strong count does not increase on move
1084        let capture = move || {
1085            assert_eq!(buffer3.strong_count(), 2);
1086        };
1087
1088        capture();
1089        assert_eq!(buffer2.strong_count(), 2);
1090
1091        drop(capture);
1092        assert_eq!(buffer2.strong_count(), 1);
1093    }
1094}