arrow_buffer/buffer/
mutable.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::alloc::{handle_alloc_error, Layout};
19use std::mem;
20use std::ptr::NonNull;
21
22use crate::alloc::{Deallocation, ALIGNMENT};
23use crate::{
24    bytes::Bytes,
25    native::{ArrowNativeType, ToByteSlice},
26    util::bit_util,
27};
28
29use super::Buffer;
30
31/// A [`MutableBuffer`] is Arrow's interface to build a [`Buffer`] out of items or slices of items.
32///
33/// [`Buffer`]s created from [`MutableBuffer`] (via `into`) are guaranteed to have its pointer aligned
34/// along cache lines and in multiple of 64 bytes.
35///
36/// Use [MutableBuffer::push] to insert an item, [MutableBuffer::extend_from_slice]
37/// to insert many items, and `into` to convert it to [`Buffer`].
38///
39/// For a safe, strongly typed API consider using [`Vec`] and [`ScalarBuffer`](crate::ScalarBuffer)
40///
41/// Note: this may be deprecated in a future release ([#1176](https://github.com/apache/arrow-rs/issues/1176))
42///
43/// # Example
44///
45/// ```
46/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
47/// let mut buffer = MutableBuffer::new(0);
48/// buffer.push(256u32);
49/// buffer.extend_from_slice(&[1u32]);
50/// let buffer: Buffer = buffer.into();
51/// assert_eq!(buffer.as_slice(), &[0u8, 1, 0, 0, 1, 0, 0, 0])
52/// ```
53#[derive(Debug)]
54pub struct MutableBuffer {
55    // dangling iff capacity = 0
56    data: NonNull<u8>,
57    // invariant: len <= capacity
58    len: usize,
59    layout: Layout,
60}
61
62impl MutableBuffer {
63    /// Allocate a new [MutableBuffer] with initial capacity to be at least `capacity`.
64    ///
65    /// See [`MutableBuffer::with_capacity`].
66    #[inline]
67    pub fn new(capacity: usize) -> Self {
68        Self::with_capacity(capacity)
69    }
70
71    /// Allocate a new [MutableBuffer] with initial capacity to be at least `capacity`.
72    ///
73    /// # Panics
74    ///
75    /// If `capacity`, when rounded up to the nearest multiple of [`ALIGNMENT`], is greater
76    /// then `isize::MAX`, then this function will panic.
77    #[inline]
78    pub fn with_capacity(capacity: usize) -> Self {
79        let capacity = bit_util::round_upto_multiple_of_64(capacity);
80        let layout = Layout::from_size_align(capacity, ALIGNMENT)
81            .expect("failed to create layout for MutableBuffer");
82        let data = match layout.size() {
83            0 => dangling_ptr(),
84            _ => {
85                // Safety: Verified size != 0
86                let raw_ptr = unsafe { std::alloc::alloc(layout) };
87                NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
88            }
89        };
90        Self {
91            data,
92            len: 0,
93            layout,
94        }
95    }
96
97    /// Allocates a new [MutableBuffer] with `len` and capacity to be at least `len` where
98    /// all bytes are guaranteed to be `0u8`.
99    /// # Example
100    /// ```
101    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
102    /// let mut buffer = MutableBuffer::from_len_zeroed(127);
103    /// assert_eq!(buffer.len(), 127);
104    /// assert!(buffer.capacity() >= 127);
105    /// let data = buffer.as_slice_mut();
106    /// assert_eq!(data[126], 0u8);
107    /// ```
108    pub fn from_len_zeroed(len: usize) -> Self {
109        let layout = Layout::from_size_align(len, ALIGNMENT).unwrap();
110        let data = match layout.size() {
111            0 => dangling_ptr(),
112            _ => {
113                // Safety: Verified size != 0
114                let raw_ptr = unsafe { std::alloc::alloc_zeroed(layout) };
115                NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
116            }
117        };
118        Self { data, len, layout }
119    }
120
121    /// Create a [`MutableBuffer`] from the provided [`Vec`] without copying
122    #[inline]
123    #[deprecated(note = "Use From<Vec<T>>")]
124    pub fn from_vec<T: ArrowNativeType>(vec: Vec<T>) -> Self {
125        Self::from(vec)
126    }
127
128    /// Allocates a new [MutableBuffer] from given `Bytes`.
129    pub(crate) fn from_bytes(bytes: Bytes) -> Result<Self, Bytes> {
130        let layout = match bytes.deallocation() {
131            Deallocation::Standard(layout) => *layout,
132            _ => return Err(bytes),
133        };
134
135        let len = bytes.len();
136        let data = bytes.ptr();
137        mem::forget(bytes);
138
139        Ok(Self { data, len, layout })
140    }
141
142    /// creates a new [MutableBuffer] with capacity and length capable of holding `len` bits.
143    /// This is useful to create a buffer for packed bitmaps.
144    pub fn new_null(len: usize) -> Self {
145        let num_bytes = bit_util::ceil(len, 8);
146        MutableBuffer::from_len_zeroed(num_bytes)
147    }
148
149    /// Set the bits in the range of `[0, end)` to 0 (if `val` is false), or 1 (if `val`
150    /// is true). Also extend the length of this buffer to be `end`.
151    ///
152    /// This is useful when one wants to clear (or set) the bits and then manipulate
153    /// the buffer directly (e.g., modifying the buffer by holding a mutable reference
154    /// from `data_mut()`).
155    pub fn with_bitset(mut self, end: usize, val: bool) -> Self {
156        assert!(end <= self.layout.size());
157        let v = if val { 255 } else { 0 };
158        unsafe {
159            std::ptr::write_bytes(self.data.as_ptr(), v, end);
160            self.len = end;
161        }
162        self
163    }
164
165    /// Ensure that `count` bytes from `start` contain zero bits
166    ///
167    /// This is used to initialize the bits in a buffer, however, it has no impact on the
168    /// `len` of the buffer and so can be used to initialize the memory region from
169    /// `len` to `capacity`.
170    pub fn set_null_bits(&mut self, start: usize, count: usize) {
171        assert!(
172            start.saturating_add(count) <= self.layout.size(),
173            "range start index {start} and count {count} out of bounds for \
174            buffer of length {}",
175            self.layout.size(),
176        );
177
178        // Safety: `self.data[start..][..count]` is in-bounds and well-aligned for `u8`
179        unsafe {
180            std::ptr::write_bytes(self.data.as_ptr().add(start), 0, count);
181        }
182    }
183
184    /// Ensures that this buffer has at least `self.len + additional` bytes. This re-allocates iff
185    /// `self.len + additional > capacity`.
186    /// # Example
187    /// ```
188    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
189    /// let mut buffer = MutableBuffer::new(0);
190    /// buffer.reserve(253); // allocates for the first time
191    /// (0..253u8).for_each(|i| buffer.push(i)); // no reallocation
192    /// let buffer: Buffer = buffer.into();
193    /// assert_eq!(buffer.len(), 253);
194    /// ```
195    // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just
196    // exits.
197    #[inline(always)]
198    pub fn reserve(&mut self, additional: usize) {
199        let required_cap = self.len + additional;
200        if required_cap > self.layout.size() {
201            let new_capacity = bit_util::round_upto_multiple_of_64(required_cap);
202            let new_capacity = std::cmp::max(new_capacity, self.layout.size() * 2);
203            self.reallocate(new_capacity)
204        }
205    }
206
207    #[cold]
208    fn reallocate(&mut self, capacity: usize) {
209        let new_layout = Layout::from_size_align(capacity, self.layout.align()).unwrap();
210        if new_layout.size() == 0 {
211            if self.layout.size() != 0 {
212                // Safety: data was allocated with layout
213                unsafe { std::alloc::dealloc(self.as_mut_ptr(), self.layout) };
214                self.layout = new_layout
215            }
216            return;
217        }
218
219        let data = match self.layout.size() {
220            // Safety: new_layout is not empty
221            0 => unsafe { std::alloc::alloc(new_layout) },
222            // Safety: verified new layout is valid and not empty
223            _ => unsafe { std::alloc::realloc(self.as_mut_ptr(), self.layout, capacity) },
224        };
225        self.data = NonNull::new(data).unwrap_or_else(|| handle_alloc_error(new_layout));
226        self.layout = new_layout;
227    }
228
229    /// Truncates this buffer to `len` bytes
230    ///
231    /// If `len` is greater than the buffer's current length, this has no effect
232    #[inline(always)]
233    pub fn truncate(&mut self, len: usize) {
234        if len > self.len {
235            return;
236        }
237        self.len = len;
238    }
239
240    /// Resizes the buffer, either truncating its contents (with no change in capacity), or
241    /// growing it (potentially reallocating it) and writing `value` in the newly available bytes.
242    /// # Example
243    /// ```
244    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
245    /// let mut buffer = MutableBuffer::new(0);
246    /// buffer.resize(253, 2); // allocates for the first time
247    /// assert_eq!(buffer.as_slice()[252], 2u8);
248    /// ```
249    // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just
250    // exits.
251    #[inline(always)]
252    pub fn resize(&mut self, new_len: usize, value: u8) {
253        if new_len > self.len {
254            let diff = new_len - self.len;
255            self.reserve(diff);
256            // write the value
257            unsafe { self.data.as_ptr().add(self.len).write_bytes(value, diff) };
258        }
259        // this truncates the buffer when new_len < self.len
260        self.len = new_len;
261    }
262
263    /// Shrinks the capacity of the buffer as much as possible.
264    /// The new capacity will aligned to the nearest 64 bit alignment.
265    ///
266    /// # Example
267    /// ```
268    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
269    /// // 2 cache lines
270    /// let mut buffer = MutableBuffer::new(128);
271    /// assert_eq!(buffer.capacity(), 128);
272    /// buffer.push(1);
273    /// buffer.push(2);
274    ///
275    /// buffer.shrink_to_fit();
276    /// assert!(buffer.capacity() >= 64 && buffer.capacity() < 128);
277    /// ```
278    pub fn shrink_to_fit(&mut self) {
279        let new_capacity = bit_util::round_upto_multiple_of_64(self.len);
280        if new_capacity < self.layout.size() {
281            self.reallocate(new_capacity)
282        }
283    }
284
285    /// Returns whether this buffer is empty or not.
286    #[inline]
287    pub const fn is_empty(&self) -> bool {
288        self.len == 0
289    }
290
291    /// Returns the length (the number of bytes written) in this buffer.
292    /// The invariant `buffer.len() <= buffer.capacity()` is always upheld.
293    #[inline]
294    pub const fn len(&self) -> usize {
295        self.len
296    }
297
298    /// Returns the total capacity in this buffer.
299    /// The invariant `buffer.len() <= buffer.capacity()` is always upheld.
300    #[inline]
301    pub const fn capacity(&self) -> usize {
302        self.layout.size()
303    }
304
305    /// Clear all existing data from this buffer.
306    pub fn clear(&mut self) {
307        self.len = 0
308    }
309
310    /// Returns the data stored in this buffer as a slice.
311    pub fn as_slice(&self) -> &[u8] {
312        self
313    }
314
315    /// Returns the data stored in this buffer as a mutable slice.
316    pub fn as_slice_mut(&mut self) -> &mut [u8] {
317        self
318    }
319
320    /// Returns a raw pointer to this buffer's internal memory
321    /// This pointer is guaranteed to be aligned along cache-lines.
322    #[inline]
323    pub const fn as_ptr(&self) -> *const u8 {
324        self.data.as_ptr()
325    }
326
327    /// Returns a mutable raw pointer to this buffer's internal memory
328    /// This pointer is guaranteed to be aligned along cache-lines.
329    #[inline]
330    pub fn as_mut_ptr(&mut self) -> *mut u8 {
331        self.data.as_ptr()
332    }
333
334    #[deprecated(
335        since = "2.0.0",
336        note = "This method is deprecated in favour of `into` from the trait `Into`."
337    )]
338    /// Freezes this buffer and return an immutable version of it.
339    pub fn freeze(self) -> Buffer {
340        self.into_buffer()
341    }
342
343    #[inline]
344    pub(super) fn into_buffer(self) -> Buffer {
345        let bytes = unsafe { Bytes::new(self.data, self.len, Deallocation::Standard(self.layout)) };
346        std::mem::forget(self);
347        Buffer::from_bytes(bytes)
348    }
349
350    /// View this buffer as a mutable slice of a specific type.
351    ///
352    /// # Panics
353    ///
354    /// This function panics if the underlying buffer is not aligned
355    /// correctly for type `T`.
356    pub fn typed_data_mut<T: ArrowNativeType>(&mut self) -> &mut [T] {
357        // SAFETY
358        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
359        // implementation outside this crate, and this method checks alignment
360        let (prefix, offsets, suffix) = unsafe { self.as_slice_mut().align_to_mut::<T>() };
361        assert!(prefix.is_empty() && suffix.is_empty());
362        offsets
363    }
364
365    /// View buffer as a immutable slice of a specific type.
366    ///
367    /// # Panics
368    ///
369    /// This function panics if the underlying buffer is not aligned
370    /// correctly for type `T`.
371    pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
372        // SAFETY
373        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
374        // implementation outside this crate, and this method checks alignment
375        let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::<T>() };
376        assert!(prefix.is_empty() && suffix.is_empty());
377        offsets
378    }
379
380    /// Extends this buffer from a slice of items that can be represented in bytes, increasing its capacity if needed.
381    /// # Example
382    /// ```
383    /// # use arrow_buffer::buffer::MutableBuffer;
384    /// let mut buffer = MutableBuffer::new(0);
385    /// buffer.extend_from_slice(&[2u32, 0]);
386    /// assert_eq!(buffer.len(), 8) // u32 has 4 bytes
387    /// ```
388    #[inline]
389    pub fn extend_from_slice<T: ArrowNativeType>(&mut self, items: &[T]) {
390        let additional = mem::size_of_val(items);
391        self.reserve(additional);
392        unsafe {
393            // this assumes that `[ToByteSlice]` can be copied directly
394            // without calling `to_byte_slice` for each element,
395            // which is correct for all ArrowNativeType implementations.
396            let src = items.as_ptr() as *const u8;
397            let dst = self.data.as_ptr().add(self.len);
398            std::ptr::copy_nonoverlapping(src, dst, additional)
399        }
400        self.len += additional;
401    }
402
403    /// Extends the buffer with a new item, increasing its capacity if needed.
404    /// # Example
405    /// ```
406    /// # use arrow_buffer::buffer::MutableBuffer;
407    /// let mut buffer = MutableBuffer::new(0);
408    /// buffer.push(256u32);
409    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
410    /// ```
411    #[inline]
412    pub fn push<T: ToByteSlice>(&mut self, item: T) {
413        let additional = std::mem::size_of::<T>();
414        self.reserve(additional);
415        unsafe {
416            let src = item.to_byte_slice().as_ptr();
417            let dst = self.data.as_ptr().add(self.len);
418            std::ptr::copy_nonoverlapping(src, dst, additional);
419        }
420        self.len += additional;
421    }
422
423    /// Extends the buffer with a new item, without checking for sufficient capacity
424    /// # Safety
425    /// Caller must ensure that the capacity()-len()>=`size_of<T>`()
426    #[inline]
427    pub unsafe fn push_unchecked<T: ToByteSlice>(&mut self, item: T) {
428        let additional = std::mem::size_of::<T>();
429        let src = item.to_byte_slice().as_ptr();
430        let dst = self.data.as_ptr().add(self.len);
431        std::ptr::copy_nonoverlapping(src, dst, additional);
432        self.len += additional;
433    }
434
435    /// Extends the buffer by `additional` bytes equal to `0u8`, incrementing its capacity if needed.
436    #[inline]
437    pub fn extend_zeros(&mut self, additional: usize) {
438        self.resize(self.len + additional, 0);
439    }
440
441    /// # Safety
442    /// The caller must ensure that the buffer was properly initialized up to `len`.
443    #[inline]
444    pub unsafe fn set_len(&mut self, len: usize) {
445        assert!(len <= self.capacity());
446        self.len = len;
447    }
448
449    /// Invokes `f` with values `0..len` collecting the boolean results into a new `MutableBuffer`
450    ///
451    /// This is similar to `from_trusted_len_iter_bool`, however, can be significantly faster
452    /// as it eliminates the conditional `Iterator::next`
453    #[inline]
454    pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, mut f: F) -> Self {
455        let mut buffer = Self::new(bit_util::ceil(len, 64) * 8);
456
457        let chunks = len / 64;
458        let remainder = len % 64;
459        for chunk in 0..chunks {
460            let mut packed = 0;
461            for bit_idx in 0..64 {
462                let i = bit_idx + chunk * 64;
463                packed |= (f(i) as u64) << bit_idx;
464            }
465
466            // SAFETY: Already allocated sufficient capacity
467            unsafe { buffer.push_unchecked(packed) }
468        }
469
470        if remainder != 0 {
471            let mut packed = 0;
472            for bit_idx in 0..remainder {
473                let i = bit_idx + chunks * 64;
474                packed |= (f(i) as u64) << bit_idx;
475            }
476
477            // SAFETY: Already allocated sufficient capacity
478            unsafe { buffer.push_unchecked(packed) }
479        }
480
481        buffer.truncate(bit_util::ceil(len, 8));
482        buffer
483    }
484}
485
486/// Creates a non-null pointer with alignment of [`ALIGNMENT`]
487///
488/// This is similar to [`NonNull::dangling`]
489#[inline]
490pub(crate) fn dangling_ptr() -> NonNull<u8> {
491    // SAFETY: ALIGNMENT is a non-zero usize which is then cast
492    // to a *mut u8. Therefore, `ptr` is not null and the conditions for
493    // calling new_unchecked() are respected.
494    #[cfg(miri)]
495    {
496        // Since miri implies a nightly rust version we can use the unstable strict_provenance feature
497        unsafe { NonNull::new_unchecked(std::ptr::without_provenance_mut(ALIGNMENT)) }
498    }
499    #[cfg(not(miri))]
500    {
501        unsafe { NonNull::new_unchecked(ALIGNMENT as *mut u8) }
502    }
503}
504
505impl<A: ArrowNativeType> Extend<A> for MutableBuffer {
506    #[inline]
507    fn extend<T: IntoIterator<Item = A>>(&mut self, iter: T) {
508        let iterator = iter.into_iter();
509        self.extend_from_iter(iterator)
510    }
511}
512
513impl<T: ArrowNativeType> From<Vec<T>> for MutableBuffer {
514    fn from(value: Vec<T>) -> Self {
515        // Safety
516        // Vec::as_ptr guaranteed to not be null and ArrowNativeType are trivially transmutable
517        let data = unsafe { NonNull::new_unchecked(value.as_ptr() as _) };
518        let len = value.len() * mem::size_of::<T>();
519        // Safety
520        // Vec guaranteed to have a valid layout matching that of `Layout::array`
521        // This is based on `RawVec::current_memory`
522        let layout = unsafe { Layout::array::<T>(value.capacity()).unwrap_unchecked() };
523        mem::forget(value);
524        Self { data, len, layout }
525    }
526}
527
528impl MutableBuffer {
529    #[inline]
530    pub(super) fn extend_from_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
531        &mut self,
532        mut iterator: I,
533    ) {
534        let item_size = std::mem::size_of::<T>();
535        let (lower, _) = iterator.size_hint();
536        let additional = lower * item_size;
537        self.reserve(additional);
538
539        // this is necessary because of https://github.com/rust-lang/rust/issues/32155
540        let mut len = SetLenOnDrop::new(&mut self.len);
541        let mut dst = unsafe { self.data.as_ptr().add(len.local_len) };
542        let capacity = self.layout.size();
543
544        while len.local_len + item_size <= capacity {
545            if let Some(item) = iterator.next() {
546                unsafe {
547                    let src = item.to_byte_slice().as_ptr();
548                    std::ptr::copy_nonoverlapping(src, dst, item_size);
549                    dst = dst.add(item_size);
550                }
551                len.local_len += item_size;
552            } else {
553                break;
554            }
555        }
556        drop(len);
557
558        iterator.for_each(|item| self.push(item));
559    }
560
561    /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length.
562    /// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
563    /// # Example
564    /// ```
565    /// # use arrow_buffer::buffer::MutableBuffer;
566    /// let v = vec![1u32];
567    /// let iter = v.iter().map(|x| x * 2);
568    /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
569    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
570    /// ```
571    /// # Safety
572    /// This method assumes that the iterator's size is correct and is undefined behavior
573    /// to use it on an iterator that reports an incorrect length.
574    // This implementation is required for two reasons:
575    // 1. there is no trait `TrustedLen` in stable rust and therefore
576    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
577    // 2. `from_trusted_len_iter` is faster.
578    #[inline]
579    pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
580        iterator: I,
581    ) -> Self {
582        let item_size = std::mem::size_of::<T>();
583        let (_, upper) = iterator.size_hint();
584        let upper = upper.expect("from_trusted_len_iter requires an upper limit");
585        let len = upper * item_size;
586
587        let mut buffer = MutableBuffer::new(len);
588
589        let mut dst = buffer.data.as_ptr();
590        for item in iterator {
591            // note how there is no reserve here (compared with `extend_from_iter`)
592            let src = item.to_byte_slice().as_ptr();
593            std::ptr::copy_nonoverlapping(src, dst, item_size);
594            dst = dst.add(item_size);
595        }
596        assert_eq!(
597            dst.offset_from(buffer.data.as_ptr()) as usize,
598            len,
599            "Trusted iterator length was not accurately reported"
600        );
601        buffer.len = len;
602        buffer
603    }
604
605    /// Creates a [`MutableBuffer`] from a boolean [`Iterator`] with a trusted (upper) length.
606    /// # use arrow_buffer::buffer::MutableBuffer;
607    /// # Example
608    /// ```
609    /// # use arrow_buffer::buffer::MutableBuffer;
610    /// let v = vec![false, true, false];
611    /// let iter = v.iter().map(|x| *x || true);
612    /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter) };
613    /// assert_eq!(buffer.len(), 1) // 3 booleans have 1 byte
614    /// ```
615    /// # Safety
616    /// This method assumes that the iterator's size is correct and is undefined behavior
617    /// to use it on an iterator that reports an incorrect length.
618    // This implementation is required for two reasons:
619    // 1. there is no trait `TrustedLen` in stable rust and therefore
620    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
621    // 2. `from_trusted_len_iter_bool` is faster.
622    #[inline]
623    pub unsafe fn from_trusted_len_iter_bool<I: Iterator<Item = bool>>(mut iterator: I) -> Self {
624        let (_, upper) = iterator.size_hint();
625        let len = upper.expect("from_trusted_len_iter requires an upper limit");
626
627        Self::collect_bool(len, |_| iterator.next().unwrap())
628    }
629
630    /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length or errors
631    /// if any of the items of the iterator is an error.
632    /// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
633    /// # Safety
634    /// This method assumes that the iterator's size is correct and is undefined behavior
635    /// to use it on an iterator that reports an incorrect length.
636    #[inline]
637    pub unsafe fn try_from_trusted_len_iter<
638        E,
639        T: ArrowNativeType,
640        I: Iterator<Item = Result<T, E>>,
641    >(
642        iterator: I,
643    ) -> Result<Self, E> {
644        let item_size = std::mem::size_of::<T>();
645        let (_, upper) = iterator.size_hint();
646        let upper = upper.expect("try_from_trusted_len_iter requires an upper limit");
647        let len = upper * item_size;
648
649        let mut buffer = MutableBuffer::new(len);
650
651        let mut dst = buffer.data.as_ptr();
652        for item in iterator {
653            let item = item?;
654            // note how there is no reserve here (compared with `extend_from_iter`)
655            let src = item.to_byte_slice().as_ptr();
656            std::ptr::copy_nonoverlapping(src, dst, item_size);
657            dst = dst.add(item_size);
658        }
659        // try_from_trusted_len_iter is instantiated a lot, so we extract part of it into a less
660        // generic method to reduce compile time
661        unsafe fn finalize_buffer(dst: *mut u8, buffer: &mut MutableBuffer, len: usize) {
662            assert_eq!(
663                dst.offset_from(buffer.data.as_ptr()) as usize,
664                len,
665                "Trusted iterator length was not accurately reported"
666            );
667            buffer.len = len;
668        }
669        finalize_buffer(dst, &mut buffer, len);
670        Ok(buffer)
671    }
672}
673
674impl Default for MutableBuffer {
675    fn default() -> Self {
676        Self::with_capacity(0)
677    }
678}
679
680impl std::ops::Deref for MutableBuffer {
681    type Target = [u8];
682
683    fn deref(&self) -> &[u8] {
684        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len) }
685    }
686}
687
688impl std::ops::DerefMut for MutableBuffer {
689    fn deref_mut(&mut self) -> &mut [u8] {
690        unsafe { std::slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) }
691    }
692}
693
694impl Drop for MutableBuffer {
695    fn drop(&mut self) {
696        if self.layout.size() != 0 {
697            // Safety: data was allocated with standard allocator with given layout
698            unsafe { std::alloc::dealloc(self.data.as_ptr() as _, self.layout) };
699        }
700    }
701}
702
703impl PartialEq for MutableBuffer {
704    fn eq(&self, other: &MutableBuffer) -> bool {
705        if self.len != other.len {
706            return false;
707        }
708        if self.layout != other.layout {
709            return false;
710        }
711        self.as_slice() == other.as_slice()
712    }
713}
714
715unsafe impl Sync for MutableBuffer {}
716unsafe impl Send for MutableBuffer {}
717
718struct SetLenOnDrop<'a> {
719    len: &'a mut usize,
720    local_len: usize,
721}
722
723impl<'a> SetLenOnDrop<'a> {
724    #[inline]
725    fn new(len: &'a mut usize) -> Self {
726        SetLenOnDrop {
727            local_len: *len,
728            len,
729        }
730    }
731}
732
733impl Drop for SetLenOnDrop<'_> {
734    #[inline]
735    fn drop(&mut self) {
736        *self.len = self.local_len;
737    }
738}
739
740/// Creating a `MutableBuffer` instance by setting bits according to the boolean values
741impl std::iter::FromIterator<bool> for MutableBuffer {
742    fn from_iter<I>(iter: I) -> Self
743    where
744        I: IntoIterator<Item = bool>,
745    {
746        let mut iterator = iter.into_iter();
747        let mut result = {
748            let byte_capacity: usize = iterator.size_hint().0.saturating_add(7) / 8;
749            MutableBuffer::new(byte_capacity)
750        };
751
752        loop {
753            let mut exhausted = false;
754            let mut byte_accum: u8 = 0;
755            let mut mask: u8 = 1;
756
757            //collect (up to) 8 bits into a byte
758            while mask != 0 {
759                if let Some(value) = iterator.next() {
760                    byte_accum |= match value {
761                        true => mask,
762                        false => 0,
763                    };
764                    mask <<= 1;
765                } else {
766                    exhausted = true;
767                    break;
768                }
769            }
770
771            // break if the iterator was exhausted before it provided a bool for this byte
772            if exhausted && mask == 1 {
773                break;
774            }
775
776            //ensure we have capacity to write the byte
777            if result.len() == result.capacity() {
778                //no capacity for new byte, allocate 1 byte more (plus however many more the iterator advertises)
779                let additional_byte_capacity = 1usize.saturating_add(
780                    iterator.size_hint().0.saturating_add(7) / 8, //convert bit count to byte count, rounding up
781                );
782                result.reserve(additional_byte_capacity)
783            }
784
785            // Soundness: capacity was allocated above
786            unsafe { result.push_unchecked(byte_accum) };
787            if exhausted {
788                break;
789            }
790        }
791        result
792    }
793}
794
795impl<T: ArrowNativeType> std::iter::FromIterator<T> for MutableBuffer {
796    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
797        let mut buffer = Self::default();
798        buffer.extend_from_iter(iter.into_iter());
799        buffer
800    }
801}
802
803#[cfg(test)]
804mod tests {
805    use super::*;
806
807    #[test]
808    fn test_mutable_new() {
809        let buf = MutableBuffer::new(63);
810        assert_eq!(64, buf.capacity());
811        assert_eq!(0, buf.len());
812        assert!(buf.is_empty());
813    }
814
815    #[test]
816    fn test_mutable_default() {
817        let buf = MutableBuffer::default();
818        assert_eq!(0, buf.capacity());
819        assert_eq!(0, buf.len());
820        assert!(buf.is_empty());
821
822        let mut buf = MutableBuffer::default();
823        buf.extend_from_slice(b"hello");
824        assert_eq!(5, buf.len());
825        assert_eq!(b"hello", buf.as_slice());
826    }
827
828    #[test]
829    fn test_mutable_extend_from_slice() {
830        let mut buf = MutableBuffer::new(100);
831        buf.extend_from_slice(b"hello");
832        assert_eq!(5, buf.len());
833        assert_eq!(b"hello", buf.as_slice());
834
835        buf.extend_from_slice(b" world");
836        assert_eq!(11, buf.len());
837        assert_eq!(b"hello world", buf.as_slice());
838
839        buf.clear();
840        assert_eq!(0, buf.len());
841        buf.extend_from_slice(b"hello arrow");
842        assert_eq!(11, buf.len());
843        assert_eq!(b"hello arrow", buf.as_slice());
844    }
845
846    #[test]
847    fn mutable_extend_from_iter() {
848        let mut buf = MutableBuffer::new(0);
849        buf.extend(vec![1u32, 2]);
850        assert_eq!(8, buf.len());
851        assert_eq!(&[1u8, 0, 0, 0, 2, 0, 0, 0], buf.as_slice());
852
853        buf.extend(vec![3u32, 4]);
854        assert_eq!(16, buf.len());
855        assert_eq!(
856            &[1u8, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0],
857            buf.as_slice()
858        );
859    }
860
861    #[test]
862    fn mutable_extend_from_iter_unaligned_u64() {
863        let mut buf = MutableBuffer::new(16);
864        buf.push(1_u8);
865        buf.extend([1_u64]);
866        assert_eq!(9, buf.len());
867        assert_eq!(&[1u8, 1u8, 0, 0, 0, 0, 0, 0, 0], buf.as_slice());
868    }
869
870    #[test]
871    fn mutable_extend_from_slice_unaligned_u64() {
872        let mut buf = MutableBuffer::new(16);
873        buf.extend_from_slice(&[1_u8]);
874        buf.extend_from_slice(&[1_u64]);
875        assert_eq!(9, buf.len());
876        assert_eq!(&[1u8, 1u8, 0, 0, 0, 0, 0, 0, 0], buf.as_slice());
877    }
878
879    #[test]
880    fn mutable_push_unaligned_u64() {
881        let mut buf = MutableBuffer::new(16);
882        buf.push(1_u8);
883        buf.push(1_u64);
884        assert_eq!(9, buf.len());
885        assert_eq!(&[1u8, 1u8, 0, 0, 0, 0, 0, 0, 0], buf.as_slice());
886    }
887
888    #[test]
889    fn mutable_push_unchecked_unaligned_u64() {
890        let mut buf = MutableBuffer::new(16);
891        unsafe {
892            buf.push_unchecked(1_u8);
893            buf.push_unchecked(1_u64);
894        }
895        assert_eq!(9, buf.len());
896        assert_eq!(&[1u8, 1u8, 0, 0, 0, 0, 0, 0, 0], buf.as_slice());
897    }
898
899    #[test]
900    fn test_from_trusted_len_iter() {
901        let iter = vec![1u32, 2].into_iter();
902        let buf = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
903        assert_eq!(8, buf.len());
904        assert_eq!(&[1u8, 0, 0, 0, 2, 0, 0, 0], buf.as_slice());
905    }
906
907    #[test]
908    fn test_mutable_reserve() {
909        let mut buf = MutableBuffer::new(1);
910        assert_eq!(64, buf.capacity());
911
912        // Reserving a smaller capacity should have no effect.
913        buf.reserve(10);
914        assert_eq!(64, buf.capacity());
915
916        buf.reserve(80);
917        assert_eq!(128, buf.capacity());
918
919        buf.reserve(129);
920        assert_eq!(256, buf.capacity());
921    }
922
923    #[test]
924    fn test_mutable_resize() {
925        let mut buf = MutableBuffer::new(1);
926        assert_eq!(64, buf.capacity());
927        assert_eq!(0, buf.len());
928
929        buf.resize(20, 0);
930        assert_eq!(64, buf.capacity());
931        assert_eq!(20, buf.len());
932
933        buf.resize(10, 0);
934        assert_eq!(64, buf.capacity());
935        assert_eq!(10, buf.len());
936
937        buf.resize(100, 0);
938        assert_eq!(128, buf.capacity());
939        assert_eq!(100, buf.len());
940
941        buf.resize(30, 0);
942        assert_eq!(128, buf.capacity());
943        assert_eq!(30, buf.len());
944
945        buf.resize(0, 0);
946        assert_eq!(128, buf.capacity());
947        assert_eq!(0, buf.len());
948    }
949
950    #[test]
951    fn test_mutable_into() {
952        let mut buf = MutableBuffer::new(1);
953        buf.extend_from_slice(b"aaaa bbbb cccc dddd");
954        assert_eq!(19, buf.len());
955        assert_eq!(64, buf.capacity());
956        assert_eq!(b"aaaa bbbb cccc dddd", buf.as_slice());
957
958        let immutable_buf: Buffer = buf.into();
959        assert_eq!(19, immutable_buf.len());
960        assert_eq!(64, immutable_buf.capacity());
961        assert_eq!(b"aaaa bbbb cccc dddd", immutable_buf.as_slice());
962    }
963
964    #[test]
965    fn test_mutable_equal() {
966        let mut buf = MutableBuffer::new(1);
967        let mut buf2 = MutableBuffer::new(1);
968
969        buf.extend_from_slice(&[0xaa]);
970        buf2.extend_from_slice(&[0xaa, 0xbb]);
971        assert!(buf != buf2);
972
973        buf.extend_from_slice(&[0xbb]);
974        assert_eq!(buf, buf2);
975
976        buf2.reserve(65);
977        assert!(buf != buf2);
978    }
979
980    #[test]
981    fn test_mutable_shrink_to_fit() {
982        let mut buffer = MutableBuffer::new(128);
983        assert_eq!(buffer.capacity(), 128);
984        buffer.push(1);
985        buffer.push(2);
986
987        buffer.shrink_to_fit();
988        assert!(buffer.capacity() >= 64 && buffer.capacity() < 128);
989    }
990
991    #[test]
992    fn test_mutable_set_null_bits() {
993        let mut buffer = MutableBuffer::new(8).with_bitset(8, true);
994
995        for i in 0..=buffer.capacity() {
996            buffer.set_null_bits(i, 0);
997            assert_eq!(buffer[..8], [255; 8][..]);
998        }
999
1000        buffer.set_null_bits(1, 4);
1001        assert_eq!(buffer[..8], [255, 0, 0, 0, 0, 255, 255, 255][..]);
1002    }
1003
1004    #[test]
1005    #[should_panic = "out of bounds for buffer of length"]
1006    fn test_mutable_set_null_bits_oob() {
1007        let mut buffer = MutableBuffer::new(64);
1008        buffer.set_null_bits(1, buffer.capacity());
1009    }
1010
1011    #[test]
1012    #[should_panic = "out of bounds for buffer of length"]
1013    fn test_mutable_set_null_bits_oob_by_overflow() {
1014        let mut buffer = MutableBuffer::new(0);
1015        buffer.set_null_bits(1, usize::MAX);
1016    }
1017
1018    #[test]
1019    fn from_iter() {
1020        let buffer = [1u16, 2, 3, 4].into_iter().collect::<MutableBuffer>();
1021        assert_eq!(buffer.len(), 4 * mem::size_of::<u16>());
1022        assert_eq!(buffer.as_slice(), &[1, 0, 2, 0, 3, 0, 4, 0]);
1023    }
1024
1025    #[test]
1026    #[should_panic(expected = "failed to create layout for MutableBuffer: LayoutError")]
1027    fn test_with_capacity_panics_above_max_capacity() {
1028        let max_capacity = isize::MAX as usize - (isize::MAX as usize % ALIGNMENT);
1029        let _ = MutableBuffer::with_capacity(max_capacity + 1);
1030    }
1031}