vortex_buffer/
buffer.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::type_name;
5use std::cmp::Ordering;
6use std::collections::Bound;
7use std::fmt::{Debug, Formatter};
8use std::hash::{Hash, Hasher};
9use std::marker::PhantomData;
10use std::ops::{Deref, RangeBounds};
11
12use bytes::{Buf, Bytes};
13use vortex_error::{VortexExpect, vortex_panic};
14
15use crate::debug::TruncatedDebug;
16use crate::trusted_len::TrustedLen;
17use crate::{Alignment, BufferMut, ByteBuffer};
18
19/// An immutable buffer of items of `T`.
20pub struct Buffer<T> {
21    pub(crate) bytes: Bytes,
22    pub(crate) length: usize,
23    pub(crate) alignment: Alignment,
24    pub(crate) _marker: PhantomData<T>,
25}
26
27impl<T> Clone for Buffer<T> {
28    #[inline]
29    fn clone(&self) -> Self {
30        Self {
31            bytes: self.bytes.clone(),
32            length: self.length,
33            alignment: self.alignment,
34            _marker: PhantomData,
35        }
36    }
37}
38
39impl<T> Default for Buffer<T> {
40    fn default() -> Self {
41        Self {
42            bytes: Default::default(),
43            length: 0,
44            alignment: Alignment::of::<T>(),
45            _marker: PhantomData,
46        }
47    }
48}
49
50impl<T> PartialEq for Buffer<T> {
51    #[inline]
52    fn eq(&self, other: &Self) -> bool {
53        self.bytes == other.bytes
54    }
55}
56
57impl<T: PartialEq> PartialEq<Vec<T>> for Buffer<T> {
58    fn eq(&self, other: &Vec<T>) -> bool {
59        self.as_ref() == other.as_slice()
60    }
61}
62
63impl<T: PartialEq> PartialEq<Buffer<T>> for Vec<T> {
64    fn eq(&self, other: &Buffer<T>) -> bool {
65        self.as_slice() == other.as_ref()
66    }
67}
68
69impl<T> Eq for Buffer<T> {}
70
71impl<T> Ord for Buffer<T> {
72    #[inline]
73    fn cmp(&self, other: &Self) -> Ordering {
74        self.bytes.cmp(&other.bytes)
75    }
76}
77
78impl<T> PartialOrd for Buffer<T> {
79    #[inline]
80    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
81        Some(self.cmp(other))
82    }
83}
84
85impl<T> Hash for Buffer<T> {
86    #[inline]
87    fn hash<H: Hasher>(&self, state: &mut H) {
88        self.bytes.as_ref().hash(state)
89    }
90}
91
92impl<T> Buffer<T> {
93    /// Returns a new `Buffer<T>` copied from the provided `Vec<T>`, `&[T]`, etc.
94    ///
95    /// Due to our underlying usage of `bytes::Bytes`, we are unable to take zero-copy ownership
96    /// of the provided `Vec<T>` while maintaining the ability to convert it back into a mutable
97    /// buffer. We could fix this by forking `Bytes`, or in many other complex ways, but for now
98    /// callers should prefer to construct `Buffer<T>` from a `BufferMut<T>`.
99    pub fn copy_from(values: impl AsRef<[T]>) -> Self {
100        BufferMut::copy_from(values).freeze()
101    }
102
103    /// Returns a new `Buffer<T>` copied from the provided slice and with the requested alignment.
104    pub fn copy_from_aligned(values: impl AsRef<[T]>, alignment: Alignment) -> Self {
105        BufferMut::copy_from_aligned(values, alignment).freeze()
106    }
107
108    /// Create a new zeroed `Buffer` with the given value.
109    pub fn zeroed(len: usize) -> Self {
110        Self::zeroed_aligned(len, Alignment::of::<T>())
111    }
112
113    /// Create a new zeroed `Buffer` with the given value.
114    pub fn zeroed_aligned(len: usize, alignment: Alignment) -> Self {
115        BufferMut::zeroed_aligned(len, alignment).freeze()
116    }
117
118    /// Create a new empty `ByteBuffer` with the provided alignment.
119    pub fn empty() -> Self {
120        BufferMut::empty().freeze()
121    }
122
123    /// Create a new empty `ByteBuffer` with the provided alignment.
124    pub fn empty_aligned(alignment: Alignment) -> Self {
125        BufferMut::empty_aligned(alignment).freeze()
126    }
127
128    /// Create a new full `ByteBuffer` with the given value.
129    pub fn full(item: T, len: usize) -> Self
130    where
131        T: Copy,
132    {
133        BufferMut::full(item, len).freeze()
134    }
135
136    /// Create a `Buffer<T>` zero-copy from a `ByteBuffer`.
137    ///
138    /// ## Panics
139    ///
140    /// Panics if the buffer is not aligned to the size of `T`, or the length is not a multiple of
141    /// the size of `T`.
142    pub fn from_byte_buffer(buffer: ByteBuffer) -> Self {
143        // TODO(ngates): should this preserve the current alignment of the buffer?
144        Self::from_byte_buffer_aligned(buffer, Alignment::of::<T>())
145    }
146
147    /// Create a `Buffer<T>` zero-copy from a `ByteBuffer`.
148    ///
149    /// ## Panics
150    ///
151    /// Panics if the buffer is not aligned to the given alignment, if the length is not a multiple
152    /// of the size of `T`, or if the given alignment is not aligned to that of `T`.
153    pub fn from_byte_buffer_aligned(buffer: ByteBuffer, alignment: Alignment) -> Self {
154        Self::from_bytes_aligned(buffer.into_inner(), alignment)
155    }
156
157    /// Create a `Buffer<T>` zero-copy from a `Bytes`.
158    ///
159    /// ## Panics
160    ///
161    /// Panics if the buffer is not aligned to the size of `T`, or the length is not a multiple of
162    /// the size of `T`.
163    pub fn from_bytes_aligned(bytes: Bytes, alignment: Alignment) -> Self {
164        if !alignment.is_aligned_to(Alignment::of::<T>()) {
165            vortex_panic!(
166                "Alignment {} must be compatible with the scalar type's alignment {}",
167                alignment,
168                Alignment::of::<T>(),
169            );
170        }
171        if bytes.as_ptr().align_offset(*alignment) != 0 {
172            vortex_panic!(
173                "Bytes alignment must align to the requested alignment {}",
174                alignment,
175            );
176        }
177        if bytes.len() % size_of::<T>() != 0 {
178            vortex_panic!(
179                "Bytes length {} must be a multiple of the scalar type's size {}",
180                bytes.len(),
181                size_of::<T>()
182            );
183        }
184        let length = bytes.len() / size_of::<T>();
185        Self {
186            bytes,
187            length,
188            alignment,
189            _marker: Default::default(),
190        }
191    }
192
193    /// Create a buffer with values from the TrustedLen iterator.
194    /// Should be preferred over `from_iter` when the iterator is known to be `TrustedLen`.
195    pub fn from_trusted_len_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {
196        let (_, upper_bound) = iter.size_hint();
197        let mut buffer = BufferMut::with_capacity(
198            upper_bound.vortex_expect("TrustedLen iterator has no upper bound"),
199        );
200        buffer.extend_trusted(iter);
201        buffer.freeze()
202    }
203
204    /// Returns the length of the buffer in elements of type T.
205    #[inline(always)]
206    pub fn len(&self) -> usize {
207        self.length
208    }
209
210    /// Returns whether the buffer is empty.
211    #[inline(always)]
212    pub fn is_empty(&self) -> bool {
213        self.length == 0
214    }
215
216    /// Returns the alignment of the buffer.
217    #[inline(always)]
218    pub fn alignment(&self) -> Alignment {
219        self.alignment
220    }
221
222    /// Returns a slice over the buffer of elements of type T.
223    #[inline(always)]
224    pub fn as_slice(&self) -> &[T] {
225        // SAFETY: alignment of Buffer is checked on construction
226        unsafe { std::slice::from_raw_parts(self.bytes.as_ptr().cast(), self.length) }
227    }
228
229    /// Return a view over the buffer as an opaque byte slice.
230    #[inline(always)]
231    pub fn as_bytes(&self) -> &[u8] {
232        self.bytes.as_ref()
233    }
234
235    /// Returns an iterator over the buffer of elements of type T.
236    pub fn iter(&self) -> Iter<'_, T> {
237        Iter {
238            inner: self.as_slice().iter(),
239        }
240    }
241
242    /// Returns a slice of self for the provided range.
243    ///
244    /// # Panics
245    ///
246    /// Requires that `begin <= end` and `end <= self.len()`.
247    /// Also requires that both `begin` and `end` are aligned to the buffer's required alignment.
248    #[inline(always)]
249    pub fn slice(&self, range: impl RangeBounds<usize>) -> Self {
250        self.slice_with_alignment(range, self.alignment)
251    }
252
253    /// Returns a slice of self for the provided range, with no guarantees about the resulting
254    /// alignment.
255    ///
256    /// # Panics
257    ///
258    /// Requires that `begin <= end` and `end <= self.len()`.
259    #[inline(always)]
260    pub fn slice_unaligned(&self, range: impl RangeBounds<usize>) -> Self {
261        self.slice_with_alignment(range, Alignment::of::<u8>())
262    }
263
264    /// Returns a slice of self for the provided range, ensuring the resulting slice has the
265    /// given alignment.
266    ///
267    /// # Panics
268    ///
269    /// Requires that `begin <= end` and `end <= self.len()`.
270    /// Also requires that both `begin` and `end` are aligned to the given alignment.
271    pub fn slice_with_alignment(
272        &self,
273        range: impl RangeBounds<usize>,
274        alignment: Alignment,
275    ) -> Self {
276        let len = self.len();
277        let begin = match range.start_bound() {
278            Bound::Included(&n) => n,
279            Bound::Excluded(&n) => n.checked_add(1).vortex_expect("out of range"),
280            Bound::Unbounded => 0,
281        };
282        let end = match range.end_bound() {
283            Bound::Included(&n) => n.checked_add(1).vortex_expect("out of range"),
284            Bound::Excluded(&n) => n,
285            Bound::Unbounded => len,
286        };
287
288        if begin > end {
289            vortex_panic!(
290                "range start must not be greater than end: {:?} <= {:?}",
291                begin,
292                end
293            );
294        }
295        if end > len {
296            vortex_panic!("range end out of bounds: {:?} > {:?}", end, len);
297        }
298
299        if end == begin {
300            // We prefer to return a new empty buffer instead of sharing this one and creating a
301            // strong reference just to hold an empty slice.
302            return Self::empty_aligned(alignment);
303        }
304
305        let begin_byte = begin * size_of::<T>();
306        let end_byte = end * size_of::<T>();
307
308        if !begin_byte.is_multiple_of(*alignment) {
309            vortex_panic!("range start must be aligned to {alignment:?}");
310        }
311        if !end_byte.is_multiple_of(*alignment) {
312            vortex_panic!("range end must be aligned to {alignment:?}");
313        }
314        if !alignment.is_aligned_to(Alignment::of::<T>()) {
315            vortex_panic!("Slice alignment must at least align to type T")
316        }
317
318        Self {
319            bytes: self.bytes.slice(begin_byte..end_byte),
320            length: end - begin,
321            alignment,
322            _marker: Default::default(),
323        }
324    }
325
326    /// Returns a slice of self that is equivalent to the given subset.
327    ///
328    /// When processing the buffer you will often end up with `&[T]` that is a subset
329    /// of the underlying buffer. This function turns the slice into a slice of the buffer
330    /// it has been taken from.
331    ///
332    /// # Panics:
333    /// Requires that the given sub slice is in fact contained within the Bytes buffer; otherwise this function will panic.
334    #[inline(always)]
335    pub fn slice_ref(&self, subset: &[T]) -> Self {
336        self.slice_ref_with_alignment(subset, Alignment::of::<T>())
337    }
338
339    /// Returns a slice of self that is equivalent to the given subset.
340    ///
341    /// When processing the buffer you will often end up with `&[T]` that is a subset
342    /// of the underlying buffer. This function turns the slice into a slice of the buffer
343    /// it has been taken from.
344    ///
345    /// # Panics:
346    /// Requires that the given sub slice is in fact contained within the Bytes buffer; otherwise this function will panic.
347    /// Also requires that the given alignment aligns to the type of slice and is smaller or equal to the buffers alignment
348    pub fn slice_ref_with_alignment(&self, subset: &[T], alignment: Alignment) -> Self {
349        if !alignment.is_aligned_to(Alignment::of::<T>()) {
350            vortex_panic!("slice_ref alignment must at least align to type T")
351        }
352
353        if !self.alignment.is_aligned_to(alignment) {
354            vortex_panic!("slice_ref subset alignment must at least align to the buffer alignment")
355        }
356
357        if subset.as_ptr().align_offset(*alignment) != 0 {
358            vortex_panic!("slice_ref subset must be aligned to {:?}", alignment);
359        }
360
361        let subset_u8 =
362            unsafe { std::slice::from_raw_parts(subset.as_ptr().cast(), size_of_val(subset)) };
363
364        Self {
365            bytes: self.bytes.slice_ref(subset_u8),
366            length: subset.len(),
367            alignment,
368            _marker: Default::default(),
369        }
370    }
371
372    /// Returns the underlying aligned buffer.
373    pub fn inner(&self) -> &Bytes {
374        debug_assert_eq!(
375            self.length * size_of::<T>(),
376            self.bytes.len(),
377            "Own length has to be the same as the underlying bytes length"
378        );
379        &self.bytes
380    }
381
382    /// Returns the underlying aligned buffer.
383    pub fn into_inner(self) -> Bytes {
384        debug_assert_eq!(
385            self.length * size_of::<T>(),
386            self.bytes.len(),
387            "Own length has to be the same as the underlying bytes length"
388        );
389        self.bytes
390    }
391
392    /// Return the ByteBuffer for this `Buffer<T>`.
393    pub fn into_byte_buffer(self) -> ByteBuffer {
394        ByteBuffer {
395            bytes: self.bytes,
396            length: self.length * size_of::<T>(),
397            alignment: self.alignment,
398            _marker: Default::default(),
399        }
400    }
401
402    /// Try to convert self into `BufferMut<T>` if there is only a single strong reference.
403    pub fn try_into_mut(self) -> Result<BufferMut<T>, Self> {
404        self.bytes
405            .try_into_mut()
406            .map(|bytes| BufferMut {
407                bytes,
408                length: self.length,
409                alignment: self.alignment,
410                _marker: Default::default(),
411            })
412            .map_err(|bytes| Self {
413                bytes,
414                length: self.length,
415                alignment: self.alignment,
416                _marker: Default::default(),
417            })
418    }
419
420    /// Convert self into `BufferMut<T>`, cloning the data if there are multiple strong references.
421    pub fn into_mut(self) -> BufferMut<T> {
422        self.try_into_mut()
423            .unwrap_or_else(|buffer| BufferMut::<T>::copy_from(&buffer))
424    }
425
426    /// Returns whether a `Buffer<T>` is aligned to the given alignment.
427    pub fn is_aligned(&self, alignment: Alignment) -> bool {
428        self.bytes.as_ptr().align_offset(*alignment) == 0
429    }
430
431    /// Return a `Buffer<T>` with the given alignment. Where possible, this will be zero-copy.
432    pub fn aligned(mut self, alignment: Alignment) -> Self {
433        if self.as_ptr().align_offset(*alignment) == 0 {
434            self.alignment = alignment;
435            self
436        } else {
437            #[cfg(feature = "warn-copy")]
438            {
439                let bt = std::backtrace::Backtrace::capture();
440                log::warn!(
441                    "Buffer is not aligned to requested alignment {alignment}, copying: {bt}"
442                )
443            }
444            Self::copy_from_aligned(self, alignment)
445        }
446    }
447
448    /// Return a `Buffer<T>` with the given alignment. Panics if the buffer is not aligned.
449    pub fn ensure_aligned(mut self, alignment: Alignment) -> Self {
450        if self.as_ptr().align_offset(*alignment) == 0 {
451            self.alignment = alignment;
452            self
453        } else {
454            vortex_panic!("Buffer is not aligned to requested alignment {}", alignment)
455        }
456    }
457}
458
459/// An iterator over Buffer elements.
460///
461/// This is an analog to the `std::slice::Iter` type.
462pub struct Iter<'a, T> {
463    inner: std::slice::Iter<'a, T>,
464}
465
466impl<'a, T> Iterator for Iter<'a, T> {
467    type Item = &'a T;
468
469    #[inline]
470    fn next(&mut self) -> Option<Self::Item> {
471        self.inner.next()
472    }
473
474    #[inline]
475    fn size_hint(&self) -> (usize, Option<usize>) {
476        self.inner.size_hint()
477    }
478
479    #[inline]
480    fn count(self) -> usize {
481        self.inner.count()
482    }
483
484    #[inline]
485    fn last(self) -> Option<Self::Item> {
486        self.inner.last()
487    }
488
489    #[inline]
490    fn nth(&mut self, n: usize) -> Option<Self::Item> {
491        self.inner.nth(n)
492    }
493}
494
495impl<T> ExactSizeIterator for Iter<'_, T> {
496    #[inline]
497    fn len(&self) -> usize {
498        self.inner.len()
499    }
500}
501
502impl<T: Debug> Debug for Buffer<T> {
503    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
504        f.debug_struct(&format!("Buffer<{}>", type_name::<T>()))
505            .field("length", &self.length)
506            .field("alignment", &self.alignment)
507            .field("as_slice", &TruncatedDebug(self.as_slice()))
508            .finish()
509    }
510}
511
512impl<T> Deref for Buffer<T> {
513    type Target = [T];
514
515    #[inline]
516    fn deref(&self) -> &Self::Target {
517        self.as_slice()
518    }
519}
520
521impl<T> AsRef<[T]> for Buffer<T> {
522    #[inline]
523    fn as_ref(&self) -> &[T] {
524        self.as_slice()
525    }
526}
527
528impl<T> FromIterator<T> for Buffer<T> {
529    #[inline]
530    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
531        BufferMut::from_iter(iter).freeze()
532    }
533}
534
535// Helper struct to allow us to zero-copy any vec into a buffer
536#[repr(transparent)]
537struct Wrapper<T>(Vec<T>);
538
539impl<T> AsRef<[u8]> for Wrapper<T> {
540    fn as_ref(&self) -> &[u8] {
541        let data = self.0.as_ptr().cast::<u8>();
542        let len = self.0.len() * size_of::<T>();
543        unsafe { std::slice::from_raw_parts(data, len) }
544    }
545}
546
547impl<T> From<Vec<T>> for Buffer<T>
548where
549    T: Send + 'static,
550{
551    fn from(value: Vec<T>) -> Self {
552        let original_len = value.len();
553        let wrapped_vec = Wrapper(value);
554
555        let bytes = Bytes::from_owner(wrapped_vec);
556
557        assert_eq!(bytes.as_ptr().align_offset(align_of::<T>()), 0);
558
559        Self {
560            bytes,
561            length: original_len,
562            alignment: Alignment::of::<T>(),
563            _marker: PhantomData,
564        }
565    }
566}
567
568impl From<Bytes> for ByteBuffer {
569    fn from(bytes: Bytes) -> Self {
570        let length = bytes.len();
571        Self {
572            bytes,
573            length,
574            alignment: Alignment::of::<u8>(),
575            _marker: Default::default(),
576        }
577    }
578}
579
580impl Buf for ByteBuffer {
581    #[inline]
582    fn remaining(&self) -> usize {
583        self.len()
584    }
585
586    #[inline]
587    fn chunk(&self) -> &[u8] {
588        self.as_slice()
589    }
590
591    #[inline]
592    fn advance(&mut self, cnt: usize) {
593        if !cnt.is_multiple_of(*self.alignment) {
594            vortex_panic!(
595                "Cannot advance buffer by {} items, resulting alignment is not {}",
596                cnt,
597                self.alignment
598            );
599        }
600        self.bytes.advance(cnt);
601        self.length -= cnt;
602    }
603}
604
605/// Owned iterator over a [`Buffer`].
606pub struct BufferIterator<T> {
607    buffer: Buffer<T>,
608    index: usize,
609}
610
611impl<T: Copy> Iterator for BufferIterator<T> {
612    type Item = T;
613
614    #[inline]
615    fn next(&mut self) -> Option<Self::Item> {
616        (self.index < self.buffer.len()).then(move || {
617            let value = self.buffer[self.index];
618            self.index += 1;
619            value
620        })
621    }
622
623    #[inline]
624    fn size_hint(&self) -> (usize, Option<usize>) {
625        let remaining = self.buffer.len() - self.index;
626        (remaining, Some(remaining))
627    }
628}
629
630impl<T: Copy> IntoIterator for Buffer<T> {
631    type Item = T;
632    type IntoIter = BufferIterator<T>;
633
634    #[inline]
635    fn into_iter(self) -> Self::IntoIter {
636        BufferIterator {
637            buffer: self,
638            index: 0,
639        }
640    }
641}
642
643impl<T> From<BufferMut<T>> for Buffer<T> {
644    #[inline]
645    fn from(value: BufferMut<T>) -> Self {
646        value.freeze()
647    }
648}
649
650#[cfg(test)]
651mod test {
652    use bytes::Buf;
653
654    use crate::{Alignment, Buffer, ByteBuffer, buffer};
655
656    #[test]
657    fn align() {
658        let buf = buffer![0u8, 1, 2];
659        let aligned = buf.aligned(Alignment::new(32));
660        assert_eq!(aligned.alignment(), Alignment::new(32));
661        assert_eq!(aligned.as_slice(), &[0, 1, 2]);
662    }
663
664    #[test]
665    fn slice() {
666        let buf = buffer![0, 1, 2, 3, 4];
667        assert_eq!(buf.slice(1..3).as_slice(), &[1, 2]);
668        assert_eq!(buf.slice(1..=3).as_slice(), &[1, 2, 3]);
669    }
670
671    #[test]
672    fn slice_unaligned() {
673        let buf = buffer![0i32, 1, 2, 3, 4].into_byte_buffer();
674        // With a regular slice, this would panic. See [`slice_bad_alignment`].
675        let sliced = buf.slice_unaligned(1..2);
676        // Verify the slice has the expected length (1 byte from index 1 to 2).
677        assert_eq!(sliced.len(), 1);
678        // The original buffer has i32 values [0, 1, 2, 3, 4].
679        // In little-endian bytes, 0i32 = [0, 0, 0, 0], so byte at index 1 is 0.
680        assert_eq!(sliced.as_slice(), &[0]);
681    }
682
683    #[test]
684    #[should_panic]
685    fn slice_bad_alignment() {
686        let buf = buffer![0i32, 1, 2, 3, 4].into_byte_buffer();
687        // We should only be able to slice this buffer on 4-byte (i32) boundaries.
688        buf.slice(1..2);
689    }
690
691    #[test]
692    fn bytes_buf() {
693        let mut buf = ByteBuffer::copy_from("helloworld".as_bytes());
694        assert_eq!(buf.remaining(), 10);
695        assert_eq!(buf.chunk(), b"helloworld");
696
697        Buf::advance(&mut buf, 5);
698        assert_eq!(buf.remaining(), 5);
699        assert_eq!(buf.as_slice(), b"world");
700        assert_eq!(buf.chunk(), b"world");
701    }
702
703    #[test]
704    fn from_vec() {
705        let vec = vec![1, 2, 3, 4, 5];
706        let buff = Buffer::from(vec.clone());
707        assert!(buff.is_aligned(Alignment::of::<i32>()));
708        assert_eq!(vec, buff);
709    }
710}