arrow_buffer/buffer/
mutable.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::alloc::{Layout, handle_alloc_error};
19use std::mem;
20use std::ptr::NonNull;
21
22use crate::alloc::{ALIGNMENT, Deallocation};
23use crate::{
24    bytes::Bytes,
25    native::{ArrowNativeType, ToByteSlice},
26    util::bit_util,
27};
28
29#[cfg(feature = "pool")]
30use crate::pool::{MemoryPool, MemoryReservation};
31#[cfg(feature = "pool")]
32use std::sync::Mutex;
33
34use super::Buffer;
35
36/// A [`MutableBuffer`] is a wrapper over memory regions, used to build
37/// [`Buffer`]s out of items or slices of items.
38///
39/// [`Buffer`]s created from [`MutableBuffer`] (via `into`) are guaranteed to be
40/// aligned along cache lines and in multiples of 64 bytes.
41///
42/// Use [MutableBuffer::push] to insert an item, [MutableBuffer::extend_from_slice]
43/// to insert many items, and `into` to convert it to [`Buffer`]. For typed data,
44/// it is often more efficient to use [`Vec`] and convert it to [`Buffer`] rather
45/// than using [`MutableBuffer`] (see examples below).
46///
47/// # See Also
48/// * For a safe, strongly typed API consider using [`Vec`] and [`ScalarBuffer`](crate::ScalarBuffer)
49/// * To apply bitwise operations, see [`apply_bitwise_binary_op`] and [`apply_bitwise_unary_op`]
50///
51/// [`apply_bitwise_binary_op`]: crate::bit_util::apply_bitwise_binary_op
52/// [`apply_bitwise_unary_op`]: crate::bit_util::apply_bitwise_unary_op
53///
54/// # Example: Creating a [`Buffer`] from a [`MutableBuffer`]
55/// ```
56/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
57/// let mut buffer = MutableBuffer::new(0);
58/// buffer.push(256u32);
59/// buffer.extend_from_slice(&[1u32]);
60/// let buffer = Buffer::from(buffer);
61/// assert_eq!(buffer.as_slice(), &[0u8, 1, 0, 0, 1, 0, 0, 0])
62/// ```
63///
64/// The same can be achieved more efficiently by using a `Vec<u32>`
65/// ```
66/// # use arrow_buffer::buffer::Buffer;
67/// let mut vec = Vec::new();
68/// vec.push(256u32);
69/// vec.extend_from_slice(&[1u32]);
70/// let buffer = Buffer::from(vec);
71/// assert_eq!(buffer.as_slice(), &[0u8, 1, 0, 0, 1, 0, 0, 0]);
72/// ```
73///
74/// # Example: Creating a [`MutableBuffer`] from a `Vec<T>`
75/// ```
76/// # use arrow_buffer::buffer::MutableBuffer;
77/// let vec = vec![1u32, 2, 3];
78/// let mutable_buffer = MutableBuffer::from(vec); // reuses the allocation from vec
79/// assert_eq!(mutable_buffer.len(), 12); // 3 * 4 bytes
80/// ```
81///
82/// # Example: Creating a [`MutableBuffer`] from a [`Buffer`]
83/// ```
84/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
85/// let buffer: Buffer = Buffer::from(&[1u8, 2, 3, 4][..]);
86/// // Only possible to convert a Buffer into a MutableBuffer if uniquely owned
87/// // (i.e., there are no other references to it).
88/// let mut mutable_buffer = match buffer.into_mutable() {
89///    Ok(mutable) => mutable,
90///    Err(orig_buffer) => {
91///      panic!("buffer was not uniquely owned");
92///    }
93/// };
94/// mutable_buffer.push(5u8);
95/// let buffer = Buffer::from(mutable_buffer);
96/// assert_eq!(buffer.as_slice(), &[1u8, 2, 3, 4, 5])
97/// ```
98#[derive(Debug)]
99pub struct MutableBuffer {
100    // dangling iff capacity = 0
101    data: NonNull<u8>,
102    // invariant: len <= capacity
103    len: usize,
104    layout: Layout,
105
106    /// Memory reservation for tracking memory usage
107    #[cfg(feature = "pool")]
108    reservation: Mutex<Option<Box<dyn MemoryReservation>>>,
109}
110
111impl MutableBuffer {
112    /// Allocate a new [MutableBuffer] with initial capacity to be at least `capacity`.
113    ///
114    /// See [`MutableBuffer::with_capacity`].
115    #[inline]
116    pub fn new(capacity: usize) -> Self {
117        Self::with_capacity(capacity)
118    }
119
120    /// Allocate a new [MutableBuffer] with initial capacity to be at least `capacity`.
121    ///
122    /// # Panics
123    ///
124    /// If `capacity`, when rounded up to the nearest multiple of [`ALIGNMENT`], is greater
125    /// then `isize::MAX`, then this function will panic.
126    #[inline]
127    pub fn with_capacity(capacity: usize) -> Self {
128        let capacity = bit_util::round_upto_multiple_of_64(capacity);
129        let layout = Layout::from_size_align(capacity, ALIGNMENT)
130            .expect("failed to create layout for MutableBuffer");
131        let data = match layout.size() {
132            0 => dangling_ptr(),
133            _ => {
134                // Safety: Verified size != 0
135                let raw_ptr = unsafe { std::alloc::alloc(layout) };
136                NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
137            }
138        };
139        Self {
140            data,
141            len: 0,
142            layout,
143            #[cfg(feature = "pool")]
144            reservation: std::sync::Mutex::new(None),
145        }
146    }
147
148    /// Allocates a new [MutableBuffer] with `len` and capacity to be at least `len` where
149    /// all bytes are guaranteed to be `0u8`.
150    /// # Example
151    /// ```
152    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
153    /// let mut buffer = MutableBuffer::from_len_zeroed(127);
154    /// assert_eq!(buffer.len(), 127);
155    /// assert!(buffer.capacity() >= 127);
156    /// let data = buffer.as_slice_mut();
157    /// assert_eq!(data[126], 0u8);
158    /// ```
159    pub fn from_len_zeroed(len: usize) -> Self {
160        let layout = Layout::from_size_align(len, ALIGNMENT).unwrap();
161        let data = match layout.size() {
162            0 => dangling_ptr(),
163            _ => {
164                // Safety: Verified size != 0
165                let raw_ptr = unsafe { std::alloc::alloc_zeroed(layout) };
166                NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
167            }
168        };
169        Self {
170            data,
171            len,
172            layout,
173            #[cfg(feature = "pool")]
174            reservation: std::sync::Mutex::new(None),
175        }
176    }
177
178    /// Allocates a new [MutableBuffer] from given `Bytes`.
179    pub(crate) fn from_bytes(bytes: Bytes) -> Result<Self, Bytes> {
180        let layout = match bytes.deallocation() {
181            Deallocation::Standard(layout) => *layout,
182            _ => return Err(bytes),
183        };
184
185        let len = bytes.len();
186        let data = bytes.ptr();
187        #[cfg(feature = "pool")]
188        let reservation = bytes.reservation.lock().unwrap().take();
189        mem::forget(bytes);
190
191        Ok(Self {
192            data,
193            len,
194            layout,
195            #[cfg(feature = "pool")]
196            reservation: Mutex::new(reservation),
197        })
198    }
199
200    /// creates a new [MutableBuffer] with capacity and length capable of holding `len` bits.
201    /// This is useful to create a buffer for packed bitmaps.
202    pub fn new_null(len: usize) -> Self {
203        let num_bytes = bit_util::ceil(len, 8);
204        MutableBuffer::from_len_zeroed(num_bytes)
205    }
206
207    /// Set the bits in the range of `[0, end)` to 0 (if `val` is false), or 1 (if `val`
208    /// is true). Also extend the length of this buffer to be `end`.
209    ///
210    /// This is useful when one wants to clear (or set) the bits and then manipulate
211    /// the buffer directly (e.g., modifying the buffer by holding a mutable reference
212    /// from `data_mut()`).
213    pub fn with_bitset(mut self, end: usize, val: bool) -> Self {
214        assert!(end <= self.layout.size());
215        let v = if val { 255 } else { 0 };
216        unsafe {
217            std::ptr::write_bytes(self.data.as_ptr(), v, end);
218            self.len = end;
219        }
220        self
221    }
222
223    /// Ensure that `count` bytes from `start` contain zero bits
224    ///
225    /// This is used to initialize the bits in a buffer, however, it has no impact on the
226    /// `len` of the buffer and so can be used to initialize the memory region from
227    /// `len` to `capacity`.
228    pub fn set_null_bits(&mut self, start: usize, count: usize) {
229        assert!(
230            start.saturating_add(count) <= self.layout.size(),
231            "range start index {start} and count {count} out of bounds for \
232            buffer of length {}",
233            self.layout.size(),
234        );
235
236        // Safety: `self.data[start..][..count]` is in-bounds and well-aligned for `u8`
237        unsafe {
238            std::ptr::write_bytes(self.data.as_ptr().add(start), 0, count);
239        }
240    }
241
242    /// Ensures that this buffer has at least `self.len + additional` bytes. This re-allocates iff
243    /// `self.len + additional > capacity`.
244    /// # Example
245    /// ```
246    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
247    /// let mut buffer = MutableBuffer::new(0);
248    /// buffer.reserve(253); // allocates for the first time
249    /// (0..253u8).for_each(|i| buffer.push(i)); // no reallocation
250    /// let buffer: Buffer = buffer.into();
251    /// assert_eq!(buffer.len(), 253);
252    /// ```
253    // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just
254    // exits.
255    #[inline(always)]
256    pub fn reserve(&mut self, additional: usize) {
257        let required_cap = self.len + additional;
258        if required_cap > self.layout.size() {
259            let new_capacity = bit_util::round_upto_multiple_of_64(required_cap);
260            let new_capacity = std::cmp::max(new_capacity, self.layout.size() * 2);
261            self.reallocate(new_capacity)
262        }
263    }
264
265    /// Adding to this mutable buffer `slice_to_repeat` repeated `repeat_count` times.
266    ///
267    /// # Example
268    ///
269    /// ## Repeat the same string bytes multiple times
270    /// ```
271    /// # use arrow_buffer::buffer::MutableBuffer;
272    /// let mut buffer = MutableBuffer::new(0);
273    /// let bytes_to_repeat = b"ab";
274    /// buffer.repeat_slice_n_times(bytes_to_repeat, 3);
275    /// assert_eq!(buffer.as_slice(), b"ababab");
276    /// ```
277    pub fn repeat_slice_n_times<T: ArrowNativeType>(
278        &mut self,
279        slice_to_repeat: &[T],
280        repeat_count: usize,
281    ) {
282        if repeat_count == 0 || slice_to_repeat.is_empty() {
283            return;
284        }
285
286        let bytes_to_repeat = size_of_val(slice_to_repeat);
287
288        // Ensure capacity
289        self.reserve(repeat_count * bytes_to_repeat);
290
291        // Save the length before we do all the copies to know where to start from
292        let length_before = self.len;
293
294        // Copy the initial slice once so we can use doubling strategy on it
295        self.extend_from_slice(slice_to_repeat);
296
297        // This tracks how much bytes we have added by repeating so far
298        let added_repeats_length = bytes_to_repeat;
299        assert_eq!(
300            self.len - length_before,
301            added_repeats_length,
302            "should copy exactly the same number of bytes"
303        );
304
305        // Number of times the slice was repeated
306        let mut already_repeated_times = 1;
307
308        // We will use doubling strategy to fill the buffer in log(repeat_count) steps
309        while already_repeated_times < repeat_count {
310            // How many slices can we copy in this iteration
311            // (either double what we have, or just the remaining ones)
312            let number_of_slices_to_copy =
313                already_repeated_times.min(repeat_count - already_repeated_times);
314            let number_of_bytes_to_copy = number_of_slices_to_copy * bytes_to_repeat;
315
316            unsafe {
317                // Get to the start of the data before we started copying anything
318                let src = self.data.as_ptr().add(length_before) as *const u8;
319
320                // Go to the current location to copy to (end of current data)
321                let dst = self.data.as_ptr().add(self.len);
322
323                // SAFETY: the pointers are not overlapping as there is `number_of_bytes_to_copy` or less between them
324                std::ptr::copy_nonoverlapping(src, dst, number_of_bytes_to_copy)
325            }
326
327            // Advance the length by the amount of data we just copied (doubled)
328            self.len += number_of_bytes_to_copy;
329
330            already_repeated_times += number_of_slices_to_copy;
331        }
332    }
333
334    #[cold]
335    fn reallocate(&mut self, capacity: usize) {
336        let new_layout = Layout::from_size_align(capacity, self.layout.align()).unwrap();
337        if new_layout.size() == 0 {
338            if self.layout.size() != 0 {
339                // Safety: data was allocated with layout
340                unsafe { std::alloc::dealloc(self.as_mut_ptr(), self.layout) };
341                self.layout = new_layout
342            }
343            return;
344        }
345
346        let data = match self.layout.size() {
347            // Safety: new_layout is not empty
348            0 => unsafe { std::alloc::alloc(new_layout) },
349            // Safety: verified new layout is valid and not empty
350            _ => unsafe { std::alloc::realloc(self.as_mut_ptr(), self.layout, capacity) },
351        };
352        self.data = NonNull::new(data).unwrap_or_else(|| handle_alloc_error(new_layout));
353        self.layout = new_layout;
354        #[cfg(feature = "pool")]
355        {
356            if let Some(reservation) = self.reservation.lock().unwrap().as_mut() {
357                reservation.resize(self.layout.size());
358            }
359        }
360    }
361
362    /// Truncates this buffer to `len` bytes
363    ///
364    /// If `len` is greater than the buffer's current length, this has no effect
365    #[inline(always)]
366    pub fn truncate(&mut self, len: usize) {
367        if len > self.len {
368            return;
369        }
370        self.len = len;
371        #[cfg(feature = "pool")]
372        {
373            if let Some(reservation) = self.reservation.lock().unwrap().as_mut() {
374                reservation.resize(self.len);
375            }
376        }
377    }
378
379    /// Resizes the buffer, either truncating its contents (with no change in capacity), or
380    /// growing it (potentially reallocating it) and writing `value` in the newly available bytes.
381    /// # Example
382    /// ```
383    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
384    /// let mut buffer = MutableBuffer::new(0);
385    /// buffer.resize(253, 2); // allocates for the first time
386    /// assert_eq!(buffer.as_slice()[252], 2u8);
387    /// ```
388    // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just
389    // exits.
390    #[inline(always)]
391    pub fn resize(&mut self, new_len: usize, value: u8) {
392        if new_len > self.len {
393            let diff = new_len - self.len;
394            self.reserve(diff);
395            // write the value
396            unsafe { self.data.as_ptr().add(self.len).write_bytes(value, diff) };
397        }
398        // this truncates the buffer when new_len < self.len
399        self.len = new_len;
400        #[cfg(feature = "pool")]
401        {
402            if let Some(reservation) = self.reservation.lock().unwrap().as_mut() {
403                reservation.resize(self.len);
404            }
405        }
406    }
407
408    /// Shrinks the capacity of the buffer as much as possible.
409    /// The new capacity will aligned to the nearest 64 bit alignment.
410    ///
411    /// # Example
412    /// ```
413    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
414    /// // 2 cache lines
415    /// let mut buffer = MutableBuffer::new(128);
416    /// assert_eq!(buffer.capacity(), 128);
417    /// buffer.push(1);
418    /// buffer.push(2);
419    ///
420    /// buffer.shrink_to_fit();
421    /// assert!(buffer.capacity() >= 64 && buffer.capacity() < 128);
422    /// ```
423    pub fn shrink_to_fit(&mut self) {
424        let new_capacity = bit_util::round_upto_multiple_of_64(self.len);
425        if new_capacity < self.layout.size() {
426            self.reallocate(new_capacity)
427        }
428    }
429
430    /// Returns whether this buffer is empty or not.
431    #[inline]
432    pub const fn is_empty(&self) -> bool {
433        self.len == 0
434    }
435
436    /// Returns the length (the number of bytes written) in this buffer.
437    /// The invariant `buffer.len() <= buffer.capacity()` is always upheld.
438    #[inline]
439    pub const fn len(&self) -> usize {
440        self.len
441    }
442
443    /// Returns the total capacity in this buffer, in bytes.
444    ///
445    /// The invariant `buffer.len() <= buffer.capacity()` is always upheld.
446    #[inline]
447    pub const fn capacity(&self) -> usize {
448        self.layout.size()
449    }
450
451    /// Clear all existing data from this buffer.
452    pub fn clear(&mut self) {
453        self.len = 0
454    }
455
456    /// Returns the data stored in this buffer as a slice.
457    pub fn as_slice(&self) -> &[u8] {
458        self
459    }
460
461    /// Returns the data stored in this buffer as a mutable slice.
462    pub fn as_slice_mut(&mut self) -> &mut [u8] {
463        self
464    }
465
466    /// Returns a raw pointer to this buffer's internal memory
467    /// This pointer is guaranteed to be aligned along cache-lines.
468    #[inline]
469    pub const fn as_ptr(&self) -> *const u8 {
470        self.data.as_ptr()
471    }
472
473    /// Returns a mutable raw pointer to this buffer's internal memory
474    /// This pointer is guaranteed to be aligned along cache-lines.
475    #[inline]
476    pub fn as_mut_ptr(&mut self) -> *mut u8 {
477        self.data.as_ptr()
478    }
479
480    #[inline]
481    pub(super) fn into_buffer(self) -> Buffer {
482        let bytes = unsafe { Bytes::new(self.data, self.len, Deallocation::Standard(self.layout)) };
483        #[cfg(feature = "pool")]
484        {
485            let reservation = self.reservation.lock().unwrap().take();
486            *bytes.reservation.lock().unwrap() = reservation;
487        }
488        std::mem::forget(self);
489        Buffer::from(bytes)
490    }
491
492    /// View this buffer as a mutable slice of a specific type.
493    ///
494    /// # Panics
495    ///
496    /// This function panics if the underlying buffer is not aligned
497    /// correctly for type `T`.
498    pub fn typed_data_mut<T: ArrowNativeType>(&mut self) -> &mut [T] {
499        // SAFETY
500        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
501        // implementation outside this crate, and this method checks alignment
502        let (prefix, offsets, suffix) = unsafe { self.as_slice_mut().align_to_mut::<T>() };
503        assert!(prefix.is_empty() && suffix.is_empty());
504        offsets
505    }
506
507    /// View buffer as a immutable slice of a specific type.
508    ///
509    /// # Panics
510    ///
511    /// This function panics if the underlying buffer is not aligned
512    /// correctly for type `T`.
513    pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
514        // SAFETY
515        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
516        // implementation outside this crate, and this method checks alignment
517        let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::<T>() };
518        assert!(prefix.is_empty() && suffix.is_empty());
519        offsets
520    }
521
522    /// Extends this buffer from a slice of items that can be represented in bytes, increasing its capacity if needed.
523    /// # Example
524    /// ```
525    /// # use arrow_buffer::buffer::MutableBuffer;
526    /// let mut buffer = MutableBuffer::new(0);
527    /// buffer.extend_from_slice(&[2u32, 0]);
528    /// assert_eq!(buffer.len(), 8) // u32 has 4 bytes
529    /// ```
530    #[inline]
531    pub fn extend_from_slice<T: ArrowNativeType>(&mut self, items: &[T]) {
532        let additional = mem::size_of_val(items);
533        self.reserve(additional);
534        unsafe {
535            // this assumes that `[ToByteSlice]` can be copied directly
536            // without calling `to_byte_slice` for each element,
537            // which is correct for all ArrowNativeType implementations.
538            let src = items.as_ptr() as *const u8;
539            let dst = self.data.as_ptr().add(self.len);
540            std::ptr::copy_nonoverlapping(src, dst, additional)
541        }
542        self.len += additional;
543    }
544
545    /// Extends the buffer with a new item, increasing its capacity if needed.
546    /// # Example
547    /// ```
548    /// # use arrow_buffer::buffer::MutableBuffer;
549    /// let mut buffer = MutableBuffer::new(0);
550    /// buffer.push(256u32);
551    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
552    /// ```
553    #[inline]
554    pub fn push<T: ToByteSlice>(&mut self, item: T) {
555        let additional = std::mem::size_of::<T>();
556        self.reserve(additional);
557        unsafe {
558            let src = item.to_byte_slice().as_ptr();
559            let dst = self.data.as_ptr().add(self.len);
560            std::ptr::copy_nonoverlapping(src, dst, additional);
561        }
562        self.len += additional;
563    }
564
565    /// Extends the buffer with a new item, without checking for sufficient capacity
566    /// # Safety
567    /// Caller must ensure that the capacity()-len()>=`size_of<T>`()
568    #[inline]
569    pub unsafe fn push_unchecked<T: ToByteSlice>(&mut self, item: T) {
570        let additional = std::mem::size_of::<T>();
571        let src = item.to_byte_slice().as_ptr();
572        let dst = unsafe { self.data.as_ptr().add(self.len) };
573        unsafe { std::ptr::copy_nonoverlapping(src, dst, additional) };
574        self.len += additional;
575    }
576
577    /// Extends the buffer by `additional` bytes equal to `0u8`, incrementing its capacity if needed.
578    #[inline]
579    pub fn extend_zeros(&mut self, additional: usize) {
580        self.resize(self.len + additional, 0);
581    }
582
583    /// # Safety
584    /// The caller must ensure that the buffer was properly initialized up to `len`.
585    #[inline]
586    pub unsafe fn set_len(&mut self, len: usize) {
587        assert!(len <= self.capacity());
588        self.len = len;
589    }
590
591    /// Invokes `f` with values `0..len` collecting the boolean results into a new `MutableBuffer`
592    ///
593    /// This is similar to `from_trusted_len_iter_bool`, however, can be significantly faster
594    /// as it eliminates the conditional `Iterator::next`
595    #[inline]
596    pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, mut f: F) -> Self {
597        let mut buffer: Vec<u64> = Vec::with_capacity(bit_util::ceil(len, 64));
598
599        let chunks = len / 64;
600        let remainder = len % 64;
601        buffer.extend((0..chunks).map(|chunk| {
602            let mut packed = 0;
603            for bit_idx in 0..64 {
604                let i = bit_idx + chunk * 64;
605                packed |= (f(i) as u64) << bit_idx;
606            }
607
608            packed
609        }));
610
611        if remainder != 0 {
612            let mut packed = 0;
613            for bit_idx in 0..remainder {
614                let i = bit_idx + chunks * 64;
615                packed |= (f(i) as u64) << bit_idx;
616            }
617
618            buffer.push(packed)
619        }
620
621        let mut buffer: MutableBuffer = buffer.into();
622        buffer.truncate(bit_util::ceil(len, 8));
623        buffer
624    }
625
626    /// Register this [`MutableBuffer`] with the provided [`MemoryPool`]
627    ///
628    /// This claims the memory used by this buffer in the pool, allowing for
629    /// accurate accounting of memory usage. Any prior reservation will be
630    /// released so this works well when the buffer is being shared among
631    /// multiple arrays.
632    #[cfg(feature = "pool")]
633    pub fn claim(&self, pool: &dyn MemoryPool) {
634        *self.reservation.lock().unwrap() = Some(pool.reserve(self.capacity()));
635    }
636}
637
638/// Creates a non-null pointer with alignment of [`ALIGNMENT`]
639///
640/// This is similar to [`NonNull::dangling`]
641#[inline]
642pub(crate) fn dangling_ptr() -> NonNull<u8> {
643    // SAFETY: ALIGNMENT is a non-zero usize which is then cast
644    // to a *mut u8. Therefore, `ptr` is not null and the conditions for
645    // calling new_unchecked() are respected.
646    #[cfg(miri)]
647    {
648        // Since miri implies a nightly rust version we can use the unstable strict_provenance feature
649        unsafe { NonNull::new_unchecked(std::ptr::without_provenance_mut(ALIGNMENT)) }
650    }
651    #[cfg(not(miri))]
652    {
653        unsafe { NonNull::new_unchecked(ALIGNMENT as *mut u8) }
654    }
655}
656
657impl<A: ArrowNativeType> Extend<A> for MutableBuffer {
658    #[inline]
659    fn extend<T: IntoIterator<Item = A>>(&mut self, iter: T) {
660        let iterator = iter.into_iter();
661        self.extend_from_iter(iterator)
662    }
663}
664
665impl<T: ArrowNativeType> From<Vec<T>> for MutableBuffer {
666    fn from(value: Vec<T>) -> Self {
667        // Safety
668        // Vec::as_ptr guaranteed to not be null and ArrowNativeType are trivially transmutable
669        let data = unsafe { NonNull::new_unchecked(value.as_ptr() as _) };
670        let len = value.len() * mem::size_of::<T>();
671        // Safety
672        // Vec guaranteed to have a valid layout matching that of `Layout::array`
673        // This is based on `RawVec::current_memory`
674        let layout = unsafe { Layout::array::<T>(value.capacity()).unwrap_unchecked() };
675        mem::forget(value);
676        Self {
677            data,
678            len,
679            layout,
680            #[cfg(feature = "pool")]
681            reservation: std::sync::Mutex::new(None),
682        }
683    }
684}
685
686impl MutableBuffer {
687    #[inline]
688    pub(super) fn extend_from_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
689        &mut self,
690        mut iterator: I,
691    ) {
692        let item_size = std::mem::size_of::<T>();
693        let (lower, _) = iterator.size_hint();
694        let additional = lower * item_size;
695        self.reserve(additional);
696
697        // this is necessary because of https://github.com/rust-lang/rust/issues/32155
698        let mut len = SetLenOnDrop::new(&mut self.len);
699        let mut dst = unsafe { self.data.as_ptr().add(len.local_len) };
700        let capacity = self.layout.size();
701
702        while len.local_len + item_size <= capacity {
703            if let Some(item) = iterator.next() {
704                unsafe {
705                    let src = item.to_byte_slice().as_ptr();
706                    std::ptr::copy_nonoverlapping(src, dst, item_size);
707                    dst = dst.add(item_size);
708                }
709                len.local_len += item_size;
710            } else {
711                break;
712            }
713        }
714        drop(len);
715
716        iterator.for_each(|item| self.push(item));
717    }
718
719    /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length.
720    /// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
721    /// # Example
722    /// ```
723    /// # use arrow_buffer::buffer::MutableBuffer;
724    /// let v = vec![1u32];
725    /// let iter = v.iter().map(|x| x * 2);
726    /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
727    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
728    /// ```
729    /// # Safety
730    /// This method assumes that the iterator's size is correct and is undefined behavior
731    /// to use it on an iterator that reports an incorrect length.
732    // This implementation is required for two reasons:
733    // 1. there is no trait `TrustedLen` in stable rust and therefore
734    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
735    // 2. `from_trusted_len_iter` is faster.
736    #[inline]
737    pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
738        iterator: I,
739    ) -> Self {
740        let item_size = std::mem::size_of::<T>();
741        let (_, upper) = iterator.size_hint();
742        let upper = upper.expect("from_trusted_len_iter requires an upper limit");
743        let len = upper * item_size;
744
745        let mut buffer = MutableBuffer::new(len);
746
747        let mut dst = buffer.data.as_ptr();
748        for item in iterator {
749            // note how there is no reserve here (compared with `extend_from_iter`)
750            let src = item.to_byte_slice().as_ptr();
751            unsafe { std::ptr::copy_nonoverlapping(src, dst, item_size) };
752            dst = unsafe { dst.add(item_size) };
753        }
754        assert_eq!(
755            unsafe { dst.offset_from(buffer.data.as_ptr()) } as usize,
756            len,
757            "Trusted iterator length was not accurately reported"
758        );
759        buffer.len = len;
760        buffer
761    }
762
763    /// Creates a [`MutableBuffer`] from a boolean [`Iterator`] with a trusted (upper) length.
764    /// # use arrow_buffer::buffer::MutableBuffer;
765    /// # Example
766    /// ```
767    /// # use arrow_buffer::buffer::MutableBuffer;
768    /// let v = vec![false, true, false];
769    /// let iter = v.iter().map(|x| *x || true);
770    /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter) };
771    /// assert_eq!(buffer.len(), 1) // 3 booleans have 1 byte
772    /// ```
773    /// # Safety
774    /// This method assumes that the iterator's size is correct and is undefined behavior
775    /// to use it on an iterator that reports an incorrect length.
776    // This implementation is required for two reasons:
777    // 1. there is no trait `TrustedLen` in stable rust and therefore
778    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
779    // 2. `from_trusted_len_iter_bool` is faster.
780    #[inline]
781    pub unsafe fn from_trusted_len_iter_bool<I: Iterator<Item = bool>>(mut iterator: I) -> Self {
782        let (_, upper) = iterator.size_hint();
783        let len = upper.expect("from_trusted_len_iter requires an upper limit");
784
785        Self::collect_bool(len, |_| iterator.next().unwrap())
786    }
787
788    /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length or errors
789    /// if any of the items of the iterator is an error.
790    /// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
791    /// # Safety
792    /// This method assumes that the iterator's size is correct and is undefined behavior
793    /// to use it on an iterator that reports an incorrect length.
794    #[inline]
795    pub unsafe fn try_from_trusted_len_iter<
796        E,
797        T: ArrowNativeType,
798        I: Iterator<Item = Result<T, E>>,
799    >(
800        iterator: I,
801    ) -> Result<Self, E> {
802        let item_size = std::mem::size_of::<T>();
803        let (_, upper) = iterator.size_hint();
804        let upper = upper.expect("try_from_trusted_len_iter requires an upper limit");
805        let len = upper * item_size;
806
807        let mut buffer = MutableBuffer::new(len);
808
809        let mut dst = buffer.data.as_ptr();
810        for item in iterator {
811            let item = item?;
812            // note how there is no reserve here (compared with `extend_from_iter`)
813            let src = item.to_byte_slice().as_ptr();
814            unsafe { std::ptr::copy_nonoverlapping(src, dst, item_size) };
815            dst = unsafe { dst.add(item_size) };
816        }
817        // try_from_trusted_len_iter is instantiated a lot, so we extract part of it into a less
818        // generic method to reduce compile time
819        unsafe fn finalize_buffer(dst: *mut u8, buffer: &mut MutableBuffer, len: usize) {
820            unsafe {
821                assert_eq!(
822                    dst.offset_from(buffer.data.as_ptr()) as usize,
823                    len,
824                    "Trusted iterator length was not accurately reported"
825                );
826                buffer.len = len;
827            }
828        }
829        unsafe { finalize_buffer(dst, &mut buffer, len) };
830        Ok(buffer)
831    }
832}
833
834impl Default for MutableBuffer {
835    fn default() -> Self {
836        Self::with_capacity(0)
837    }
838}
839
840impl std::ops::Deref for MutableBuffer {
841    type Target = [u8];
842
843    fn deref(&self) -> &[u8] {
844        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len) }
845    }
846}
847
848impl std::ops::DerefMut for MutableBuffer {
849    fn deref_mut(&mut self) -> &mut [u8] {
850        unsafe { std::slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) }
851    }
852}
853
854impl AsRef<[u8]> for &MutableBuffer {
855    fn as_ref(&self) -> &[u8] {
856        self.as_slice()
857    }
858}
859
860impl Drop for MutableBuffer {
861    fn drop(&mut self) {
862        if self.layout.size() != 0 {
863            // Safety: data was allocated with standard allocator with given layout
864            unsafe { std::alloc::dealloc(self.data.as_ptr() as _, self.layout) };
865        }
866    }
867}
868
869impl PartialEq for MutableBuffer {
870    fn eq(&self, other: &MutableBuffer) -> bool {
871        if self.len != other.len {
872            return false;
873        }
874        if self.layout != other.layout {
875            return false;
876        }
877        self.as_slice() == other.as_slice()
878    }
879}
880
881unsafe impl Sync for MutableBuffer {}
882unsafe impl Send for MutableBuffer {}
883
884struct SetLenOnDrop<'a> {
885    len: &'a mut usize,
886    local_len: usize,
887}
888
889impl<'a> SetLenOnDrop<'a> {
890    #[inline]
891    fn new(len: &'a mut usize) -> Self {
892        SetLenOnDrop {
893            local_len: *len,
894            len,
895        }
896    }
897}
898
899impl Drop for SetLenOnDrop<'_> {
900    #[inline]
901    fn drop(&mut self) {
902        *self.len = self.local_len;
903    }
904}
905
906/// Creating a `MutableBuffer` instance by setting bits according to the boolean values
907impl std::iter::FromIterator<bool> for MutableBuffer {
908    fn from_iter<I>(iter: I) -> Self
909    where
910        I: IntoIterator<Item = bool>,
911    {
912        let mut iterator = iter.into_iter();
913        let mut result = {
914            let byte_capacity: usize = iterator.size_hint().0.saturating_add(7) / 8;
915            MutableBuffer::new(byte_capacity)
916        };
917
918        loop {
919            let mut exhausted = false;
920            let mut byte_accum: u8 = 0;
921            let mut mask: u8 = 1;
922
923            //collect (up to) 8 bits into a byte
924            while mask != 0 {
925                if let Some(value) = iterator.next() {
926                    byte_accum |= match value {
927                        true => mask,
928                        false => 0,
929                    };
930                    mask <<= 1;
931                } else {
932                    exhausted = true;
933                    break;
934                }
935            }
936
937            // break if the iterator was exhausted before it provided a bool for this byte
938            if exhausted && mask == 1 {
939                break;
940            }
941
942            //ensure we have capacity to write the byte
943            if result.len() == result.capacity() {
944                //no capacity for new byte, allocate 1 byte more (plus however many more the iterator advertises)
945                let additional_byte_capacity = 1usize.saturating_add(
946                    iterator.size_hint().0.saturating_add(7) / 8, //convert bit count to byte count, rounding up
947                );
948                result.reserve(additional_byte_capacity)
949            }
950
951            // Soundness: capacity was allocated above
952            unsafe { result.push_unchecked(byte_accum) };
953            if exhausted {
954                break;
955            }
956        }
957        result
958    }
959}
960
961impl<T: ArrowNativeType> std::iter::FromIterator<T> for MutableBuffer {
962    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
963        let mut buffer = Self::default();
964        buffer.extend_from_iter(iter.into_iter());
965        buffer
966    }
967}
968
969#[cfg(test)]
970mod tests {
971    use super::*;
972
973    #[test]
974    fn test_mutable_new() {
975        let buf = MutableBuffer::new(63);
976        assert_eq!(64, buf.capacity());
977        assert_eq!(0, buf.len());
978        assert!(buf.is_empty());
979    }
980
981    #[test]
982    fn test_mutable_default() {
983        let buf = MutableBuffer::default();
984        assert_eq!(0, buf.capacity());
985        assert_eq!(0, buf.len());
986        assert!(buf.is_empty());
987
988        let mut buf = MutableBuffer::default();
989        buf.extend_from_slice(b"hello");
990        assert_eq!(5, buf.len());
991        assert_eq!(b"hello", buf.as_slice());
992    }
993
994    #[test]
995    fn test_mutable_extend_from_slice() {
996        let mut buf = MutableBuffer::new(100);
997        buf.extend_from_slice(b"hello");
998        assert_eq!(5, buf.len());
999        assert_eq!(b"hello", buf.as_slice());
1000
1001        buf.extend_from_slice(b" world");
1002        assert_eq!(11, buf.len());
1003        assert_eq!(b"hello world", buf.as_slice());
1004
1005        buf.clear();
1006        assert_eq!(0, buf.len());
1007        buf.extend_from_slice(b"hello arrow");
1008        assert_eq!(11, buf.len());
1009        assert_eq!(b"hello arrow", buf.as_slice());
1010    }
1011
1012    #[test]
1013    fn mutable_extend_from_iter() {
1014        let mut buf = MutableBuffer::new(0);
1015        buf.extend(vec![1u32, 2]);
1016        assert_eq!(8, buf.len());
1017        assert_eq!(&[1u8, 0, 0, 0, 2, 0, 0, 0], buf.as_slice());
1018
1019        buf.extend(vec![3u32, 4]);
1020        assert_eq!(16, buf.len());
1021        assert_eq!(
1022            &[1u8, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0],
1023            buf.as_slice()
1024        );
1025    }
1026
1027    #[test]
1028    fn mutable_extend_from_iter_unaligned_u64() {
1029        let mut buf = MutableBuffer::new(16);
1030        buf.push(1_u8);
1031        buf.extend([1_u64]);
1032        assert_eq!(9, buf.len());
1033        assert_eq!(&[1u8, 1u8, 0, 0, 0, 0, 0, 0, 0], buf.as_slice());
1034    }
1035
1036    #[test]
1037    fn mutable_extend_from_slice_unaligned_u64() {
1038        let mut buf = MutableBuffer::new(16);
1039        buf.extend_from_slice(&[1_u8]);
1040        buf.extend_from_slice(&[1_u64]);
1041        assert_eq!(9, buf.len());
1042        assert_eq!(&[1u8, 1u8, 0, 0, 0, 0, 0, 0, 0], buf.as_slice());
1043    }
1044
1045    #[test]
1046    fn mutable_push_unaligned_u64() {
1047        let mut buf = MutableBuffer::new(16);
1048        buf.push(1_u8);
1049        buf.push(1_u64);
1050        assert_eq!(9, buf.len());
1051        assert_eq!(&[1u8, 1u8, 0, 0, 0, 0, 0, 0, 0], buf.as_slice());
1052    }
1053
1054    #[test]
1055    fn mutable_push_unchecked_unaligned_u64() {
1056        let mut buf = MutableBuffer::new(16);
1057        unsafe {
1058            buf.push_unchecked(1_u8);
1059            buf.push_unchecked(1_u64);
1060        }
1061        assert_eq!(9, buf.len());
1062        assert_eq!(&[1u8, 1u8, 0, 0, 0, 0, 0, 0, 0], buf.as_slice());
1063    }
1064
1065    #[test]
1066    fn test_from_trusted_len_iter() {
1067        let iter = vec![1u32, 2].into_iter();
1068        let buf = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
1069        assert_eq!(8, buf.len());
1070        assert_eq!(&[1u8, 0, 0, 0, 2, 0, 0, 0], buf.as_slice());
1071    }
1072
1073    #[test]
1074    fn test_mutable_reserve() {
1075        let mut buf = MutableBuffer::new(1);
1076        assert_eq!(64, buf.capacity());
1077
1078        // Reserving a smaller capacity should have no effect.
1079        buf.reserve(10);
1080        assert_eq!(64, buf.capacity());
1081
1082        buf.reserve(80);
1083        assert_eq!(128, buf.capacity());
1084
1085        buf.reserve(129);
1086        assert_eq!(256, buf.capacity());
1087    }
1088
1089    #[test]
1090    fn test_mutable_resize() {
1091        let mut buf = MutableBuffer::new(1);
1092        assert_eq!(64, buf.capacity());
1093        assert_eq!(0, buf.len());
1094
1095        buf.resize(20, 0);
1096        assert_eq!(64, buf.capacity());
1097        assert_eq!(20, buf.len());
1098
1099        buf.resize(10, 0);
1100        assert_eq!(64, buf.capacity());
1101        assert_eq!(10, buf.len());
1102
1103        buf.resize(100, 0);
1104        assert_eq!(128, buf.capacity());
1105        assert_eq!(100, buf.len());
1106
1107        buf.resize(30, 0);
1108        assert_eq!(128, buf.capacity());
1109        assert_eq!(30, buf.len());
1110
1111        buf.resize(0, 0);
1112        assert_eq!(128, buf.capacity());
1113        assert_eq!(0, buf.len());
1114    }
1115
1116    #[test]
1117    fn test_mutable_into() {
1118        let mut buf = MutableBuffer::new(1);
1119        buf.extend_from_slice(b"aaaa bbbb cccc dddd");
1120        assert_eq!(19, buf.len());
1121        assert_eq!(64, buf.capacity());
1122        assert_eq!(b"aaaa bbbb cccc dddd", buf.as_slice());
1123
1124        let immutable_buf: Buffer = buf.into();
1125        assert_eq!(19, immutable_buf.len());
1126        assert_eq!(64, immutable_buf.capacity());
1127        assert_eq!(b"aaaa bbbb cccc dddd", immutable_buf.as_slice());
1128    }
1129
1130    #[test]
1131    fn test_mutable_equal() {
1132        let mut buf = MutableBuffer::new(1);
1133        let mut buf2 = MutableBuffer::new(1);
1134
1135        buf.extend_from_slice(&[0xaa]);
1136        buf2.extend_from_slice(&[0xaa, 0xbb]);
1137        assert!(buf != buf2);
1138
1139        buf.extend_from_slice(&[0xbb]);
1140        assert_eq!(buf, buf2);
1141
1142        buf2.reserve(65);
1143        assert!(buf != buf2);
1144    }
1145
1146    #[test]
1147    fn test_mutable_shrink_to_fit() {
1148        let mut buffer = MutableBuffer::new(128);
1149        assert_eq!(buffer.capacity(), 128);
1150        buffer.push(1);
1151        buffer.push(2);
1152
1153        buffer.shrink_to_fit();
1154        assert!(buffer.capacity() >= 64 && buffer.capacity() < 128);
1155    }
1156
1157    #[test]
1158    fn test_mutable_set_null_bits() {
1159        let mut buffer = MutableBuffer::new(8).with_bitset(8, true);
1160
1161        for i in 0..=buffer.capacity() {
1162            buffer.set_null_bits(i, 0);
1163            assert_eq!(buffer[..8], [255; 8][..]);
1164        }
1165
1166        buffer.set_null_bits(1, 4);
1167        assert_eq!(buffer[..8], [255, 0, 0, 0, 0, 255, 255, 255][..]);
1168    }
1169
1170    #[test]
1171    #[should_panic = "out of bounds for buffer of length"]
1172    fn test_mutable_set_null_bits_oob() {
1173        let mut buffer = MutableBuffer::new(64);
1174        buffer.set_null_bits(1, buffer.capacity());
1175    }
1176
1177    #[test]
1178    #[should_panic = "out of bounds for buffer of length"]
1179    fn test_mutable_set_null_bits_oob_by_overflow() {
1180        let mut buffer = MutableBuffer::new(0);
1181        buffer.set_null_bits(1, usize::MAX);
1182    }
1183
1184    #[test]
1185    fn from_iter() {
1186        let buffer = [1u16, 2, 3, 4].into_iter().collect::<MutableBuffer>();
1187        assert_eq!(buffer.len(), 4 * mem::size_of::<u16>());
1188        assert_eq!(buffer.as_slice(), &[1, 0, 2, 0, 3, 0, 4, 0]);
1189    }
1190
1191    #[test]
1192    #[should_panic(expected = "failed to create layout for MutableBuffer: LayoutError")]
1193    fn test_with_capacity_panics_above_max_capacity() {
1194        let max_capacity = isize::MAX as usize - (isize::MAX as usize % ALIGNMENT);
1195        let _ = MutableBuffer::with_capacity(max_capacity + 1);
1196    }
1197
1198    #[cfg(feature = "pool")]
1199    mod pool_tests {
1200        use super::*;
1201        use crate::pool::{MemoryPool, TrackingMemoryPool};
1202
1203        #[test]
1204        fn test_reallocate_with_pool() {
1205            let pool = TrackingMemoryPool::default();
1206            let mut buffer = MutableBuffer::with_capacity(100);
1207            buffer.claim(&pool);
1208
1209            // Initial capacity should be 128 (multiple of 64)
1210            assert_eq!(buffer.capacity(), 128);
1211            assert_eq!(pool.used(), 128);
1212
1213            // Reallocate to a larger size
1214            buffer.reallocate(200);
1215
1216            // The capacity is exactly the requested size, not rounded up
1217            assert_eq!(buffer.capacity(), 200);
1218            assert_eq!(pool.used(), 200);
1219
1220            // Reallocate to a smaller size
1221            buffer.reallocate(50);
1222
1223            // The capacity is exactly the requested size, not rounded up
1224            assert_eq!(buffer.capacity(), 50);
1225            assert_eq!(pool.used(), 50);
1226        }
1227
1228        #[test]
1229        fn test_truncate_with_pool() {
1230            let pool = TrackingMemoryPool::default();
1231            let mut buffer = MutableBuffer::with_capacity(100);
1232
1233            // Fill buffer with some data
1234            buffer.resize(80, 1);
1235            assert_eq!(buffer.len(), 80);
1236
1237            buffer.claim(&pool);
1238            assert_eq!(pool.used(), 128);
1239
1240            // Truncate buffer
1241            buffer.truncate(40);
1242            assert_eq!(buffer.len(), 40);
1243            assert_eq!(pool.used(), 40);
1244
1245            // Truncate to zero
1246            buffer.truncate(0);
1247            assert_eq!(buffer.len(), 0);
1248            assert_eq!(pool.used(), 0);
1249        }
1250
1251        #[test]
1252        fn test_resize_with_pool() {
1253            let pool = TrackingMemoryPool::default();
1254            let mut buffer = MutableBuffer::with_capacity(100);
1255            buffer.claim(&pool);
1256
1257            // Initial state
1258            assert_eq!(buffer.len(), 0);
1259            assert_eq!(pool.used(), 128);
1260
1261            // Resize to increase length
1262            buffer.resize(50, 1);
1263            assert_eq!(buffer.len(), 50);
1264            assert_eq!(pool.used(), 50);
1265
1266            // Resize to increase length beyond capacity
1267            buffer.resize(150, 1);
1268            assert_eq!(buffer.len(), 150);
1269            assert_eq!(buffer.capacity(), 256);
1270            assert_eq!(pool.used(), 150);
1271
1272            // Resize to decrease length
1273            buffer.resize(30, 1);
1274            assert_eq!(buffer.len(), 30);
1275            assert_eq!(pool.used(), 30);
1276        }
1277
1278        #[test]
1279        fn test_buffer_lifecycle_with_pool() {
1280            let pool = TrackingMemoryPool::default();
1281
1282            // Create a buffer with memory reservation
1283            let mut mutable = MutableBuffer::with_capacity(100);
1284            mutable.resize(80, 1);
1285            mutable.claim(&pool);
1286
1287            // Memory reservation is based on capacity when using claim()
1288            assert_eq!(pool.used(), 128);
1289
1290            // Convert to immutable Buffer
1291            let buffer = mutable.into_buffer();
1292
1293            // Memory reservation should be preserved
1294            assert_eq!(pool.used(), 128);
1295
1296            // Drop the buffer and the reservation should be released
1297            drop(buffer);
1298            assert_eq!(pool.used(), 0);
1299        }
1300    }
1301
1302    fn create_expected_repeated_slice<T: ArrowNativeType>(
1303        slice_to_repeat: &[T],
1304        repeat_count: usize,
1305    ) -> Buffer {
1306        let mut expected = MutableBuffer::new(size_of_val(slice_to_repeat) * repeat_count);
1307        for _ in 0..repeat_count {
1308            // Not using push_slice_repeated as this is the function under test
1309            expected.extend_from_slice(slice_to_repeat);
1310        }
1311        expected.into()
1312    }
1313
1314    // Helper to test a specific repeat count with various slice sizes
1315    fn test_repeat_count<T: ArrowNativeType + PartialEq + std::fmt::Debug>(
1316        repeat_count: usize,
1317        test_data: &[T],
1318    ) {
1319        let mut buffer = MutableBuffer::new(0);
1320        buffer.repeat_slice_n_times(test_data, repeat_count);
1321
1322        let expected = create_expected_repeated_slice(test_data, repeat_count);
1323        let result: Buffer = buffer.into();
1324
1325        assert_eq!(
1326            result,
1327            expected,
1328            "Failed for repeat_count={}, slice_len={}",
1329            repeat_count,
1330            test_data.len()
1331        );
1332    }
1333
1334    #[test]
1335    fn test_repeat_slice_count_edge_cases() {
1336        // Empty slice
1337        test_repeat_count(100, &[] as &[i32]);
1338
1339        // Zero repeats
1340        test_repeat_count(0, &[1i32, 2, 3]);
1341    }
1342
1343    #[test]
1344    fn test_small_repeats_counts() {
1345        // test any special implementation for small repeat counts
1346        let data = &[1u8, 2, 3, 4, 5];
1347
1348        for _ in 1..=10 {
1349            test_repeat_count(2, data);
1350        }
1351    }
1352
1353    #[test]
1354    fn test_different_size_of_i32_repeat_slice() {
1355        let data: &[i32] = &[1, 2, 3];
1356        let data_with_single_item: &[i32] = &[42];
1357
1358        for data in &[data, data_with_single_item] {
1359            for item in 1..=9 {
1360                let base_repeat_count = 2_usize.pow(item);
1361                test_repeat_count(base_repeat_count - 1, data);
1362                test_repeat_count(base_repeat_count, data);
1363                test_repeat_count(base_repeat_count + 1, data);
1364            }
1365        }
1366    }
1367
1368    #[test]
1369    fn test_different_size_of_u8_repeat_slice() {
1370        let data: &[u8] = &[1, 2, 3];
1371        let data_with_single_item: &[u8] = &[10];
1372
1373        for data in &[data, data_with_single_item] {
1374            for item in 1..=9 {
1375                let base_repeat_count = 2_usize.pow(item);
1376                test_repeat_count(base_repeat_count - 1, data);
1377                test_repeat_count(base_repeat_count, data);
1378                test_repeat_count(base_repeat_count + 1, data);
1379            }
1380        }
1381    }
1382
1383    #[test]
1384    fn test_different_size_of_u16_repeat_slice() {
1385        let data: &[u16] = &[1, 2, 3];
1386        let data_with_single_item: &[u16] = &[10];
1387
1388        for data in &[data, data_with_single_item] {
1389            for item in 1..=9 {
1390                let base_repeat_count = 2_usize.pow(item);
1391                test_repeat_count(base_repeat_count - 1, data);
1392                test_repeat_count(base_repeat_count, data);
1393                test_repeat_count(base_repeat_count + 1, data);
1394            }
1395        }
1396    }
1397
1398    #[test]
1399    fn test_various_slice_lengths() {
1400        // Test different slice lengths with same repeat pattern
1401        let repeat_count = 37; // Arbitrary non-power-of-2
1402
1403        // Single element
1404        test_repeat_count(repeat_count, &[42i32]);
1405
1406        // Small slices
1407        test_repeat_count(repeat_count, &[1i32, 2]);
1408        test_repeat_count(repeat_count, &[1i32, 2, 3]);
1409        test_repeat_count(repeat_count, &[1i32, 2, 3, 4]);
1410        test_repeat_count(repeat_count, &[1i32, 2, 3, 4, 5]);
1411
1412        // Larger slices
1413        let data_10: Vec<i32> = (0..10).collect();
1414        test_repeat_count(repeat_count, &data_10);
1415
1416        let data_100: Vec<i32> = (0..100).collect();
1417        test_repeat_count(repeat_count, &data_100);
1418
1419        let data_1000: Vec<i32> = (0..1000).collect();
1420        test_repeat_count(repeat_count, &data_1000);
1421    }
1422}