vortex_buffer/
buffer.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::type_name;
5use std::cmp::Ordering;
6use std::collections::Bound;
7use std::fmt::Debug;
8use std::fmt::Formatter;
9use std::hash::Hash;
10use std::hash::Hasher;
11use std::marker::PhantomData;
12use std::ops::Deref;
13use std::ops::RangeBounds;
14
15use bytes::Buf;
16use bytes::Bytes;
17use vortex_error::VortexExpect;
18use vortex_error::vortex_panic;
19
20use crate::Alignment;
21use crate::BufferMut;
22use crate::ByteBuffer;
23use crate::debug::TruncatedDebug;
24use crate::trusted_len::TrustedLen;
25
26/// An immutable buffer of items of `T`.
27pub struct Buffer<T> {
28    pub(crate) bytes: Bytes,
29    pub(crate) length: usize,
30    pub(crate) alignment: Alignment,
31    pub(crate) _marker: PhantomData<T>,
32}
33
34impl<T> Clone for Buffer<T> {
35    #[inline]
36    fn clone(&self) -> Self {
37        Self {
38            bytes: self.bytes.clone(),
39            length: self.length,
40            alignment: self.alignment,
41            _marker: PhantomData,
42        }
43    }
44}
45
46impl<T> Default for Buffer<T> {
47    fn default() -> Self {
48        Self {
49            bytes: Default::default(),
50            length: 0,
51            alignment: Alignment::of::<T>(),
52            _marker: PhantomData,
53        }
54    }
55}
56
57impl<T> PartialEq for Buffer<T> {
58    #[inline]
59    fn eq(&self, other: &Self) -> bool {
60        self.bytes == other.bytes
61    }
62}
63
64impl<T: PartialEq> PartialEq<Vec<T>> for Buffer<T> {
65    fn eq(&self, other: &Vec<T>) -> bool {
66        self.as_ref() == other.as_slice()
67    }
68}
69
70impl<T: PartialEq> PartialEq<Buffer<T>> for Vec<T> {
71    fn eq(&self, other: &Buffer<T>) -> bool {
72        self.as_slice() == other.as_ref()
73    }
74}
75
76impl<T> Eq for Buffer<T> {}
77
78impl<T> Ord for Buffer<T> {
79    #[inline]
80    fn cmp(&self, other: &Self) -> Ordering {
81        self.bytes.cmp(&other.bytes)
82    }
83}
84
85impl<T> PartialOrd for Buffer<T> {
86    #[inline]
87    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
88        Some(self.cmp(other))
89    }
90}
91
92impl<T> Hash for Buffer<T> {
93    #[inline]
94    fn hash<H: Hasher>(&self, state: &mut H) {
95        self.bytes.as_ref().hash(state)
96    }
97}
98
99impl<T> Buffer<T> {
100    /// Returns a new `Buffer<T>` copied from the provided `Vec<T>`, `&[T]`, etc.
101    ///
102    /// Due to our underlying usage of `bytes::Bytes`, we are unable to take zero-copy ownership
103    /// of the provided `Vec<T>` while maintaining the ability to convert it back into a mutable
104    /// buffer. We could fix this by forking `Bytes`, or in many other complex ways, but for now
105    /// callers should prefer to construct `Buffer<T>` from a `BufferMut<T>`.
106    pub fn copy_from(values: impl AsRef<[T]>) -> Self {
107        BufferMut::copy_from(values).freeze()
108    }
109
110    /// Returns a new `Buffer<T>` copied from the provided slice and with the requested alignment.
111    pub fn copy_from_aligned(values: impl AsRef<[T]>, alignment: Alignment) -> Self {
112        BufferMut::copy_from_aligned(values, alignment).freeze()
113    }
114
115    /// Create a new zeroed `Buffer` with the given value.
116    pub fn zeroed(len: usize) -> Self {
117        Self::zeroed_aligned(len, Alignment::of::<T>())
118    }
119
120    /// Create a new zeroed `Buffer` with the given value.
121    pub fn zeroed_aligned(len: usize, alignment: Alignment) -> Self {
122        BufferMut::zeroed_aligned(len, alignment).freeze()
123    }
124
125    /// Create a new empty `ByteBuffer` with the provided alignment.
126    pub fn empty() -> Self {
127        BufferMut::empty().freeze()
128    }
129
130    /// Create a new empty `ByteBuffer` with the provided alignment.
131    pub fn empty_aligned(alignment: Alignment) -> Self {
132        BufferMut::empty_aligned(alignment).freeze()
133    }
134
135    /// Create a new full `ByteBuffer` with the given value.
136    pub fn full(item: T, len: usize) -> Self
137    where
138        T: Copy,
139    {
140        BufferMut::full(item, len).freeze()
141    }
142
143    /// Create a `Buffer<T>` zero-copy from a `ByteBuffer`.
144    ///
145    /// ## Panics
146    ///
147    /// Panics if the buffer is not aligned to the size of `T`, or the length is not a multiple of
148    /// the size of `T`.
149    pub fn from_byte_buffer(buffer: ByteBuffer) -> Self {
150        // TODO(ngates): should this preserve the current alignment of the buffer?
151        Self::from_byte_buffer_aligned(buffer, Alignment::of::<T>())
152    }
153
154    /// Create a `Buffer<T>` zero-copy from a `ByteBuffer`.
155    ///
156    /// ## Panics
157    ///
158    /// Panics if the buffer is not aligned to the given alignment, if the length is not a multiple
159    /// of the size of `T`, or if the given alignment is not aligned to that of `T`.
160    pub fn from_byte_buffer_aligned(buffer: ByteBuffer, alignment: Alignment) -> Self {
161        Self::from_bytes_aligned(buffer.into_inner(), alignment)
162    }
163
164    /// Create a `Buffer<T>` zero-copy from a `Bytes`.
165    ///
166    /// ## Panics
167    ///
168    /// Panics if the buffer is not aligned to the size of `T`, or the length is not a multiple of
169    /// the size of `T`.
170    pub fn from_bytes_aligned(bytes: Bytes, alignment: Alignment) -> Self {
171        if !alignment.is_aligned_to(Alignment::of::<T>()) {
172            vortex_panic!(
173                "Alignment {} must be compatible with the scalar type's alignment {}",
174                alignment,
175                Alignment::of::<T>(),
176            );
177        }
178        if bytes.as_ptr().align_offset(*alignment) != 0 {
179            vortex_panic!(
180                "Bytes alignment must align to the requested alignment {}",
181                alignment,
182            );
183        }
184        if bytes.len() % size_of::<T>() != 0 {
185            vortex_panic!(
186                "Bytes length {} must be a multiple of the scalar type's size {}",
187                bytes.len(),
188                size_of::<T>()
189            );
190        }
191        let length = bytes.len() / size_of::<T>();
192        Self {
193            bytes,
194            length,
195            alignment,
196            _marker: Default::default(),
197        }
198    }
199
200    /// Create a buffer with values from the TrustedLen iterator.
201    /// Should be preferred over `from_iter` when the iterator is known to be `TrustedLen`.
202    pub fn from_trusted_len_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {
203        let (_, upper_bound) = iter.size_hint();
204        let mut buffer = BufferMut::with_capacity(
205            upper_bound.vortex_expect("TrustedLen iterator has no upper bound"),
206        );
207        buffer.extend_trusted(iter);
208        buffer.freeze()
209    }
210
211    /// Clear the buffer, preserving existing capacity.
212    pub fn clear(&mut self) {
213        self.bytes.clear();
214        self.length = 0;
215    }
216
217    /// Returns the length of the buffer in elements of type T.
218    #[inline(always)]
219    pub fn len(&self) -> usize {
220        self.length
221    }
222
223    /// Returns whether the buffer is empty.
224    #[inline(always)]
225    pub fn is_empty(&self) -> bool {
226        self.length == 0
227    }
228
229    /// Returns the alignment of the buffer.
230    #[inline(always)]
231    pub fn alignment(&self) -> Alignment {
232        self.alignment
233    }
234
235    /// Returns a slice over the buffer of elements of type T.
236    #[inline(always)]
237    pub fn as_slice(&self) -> &[T] {
238        // SAFETY: alignment of Buffer is checked on construction
239        unsafe { std::slice::from_raw_parts(self.bytes.as_ptr().cast(), self.length) }
240    }
241
242    /// Return a view over the buffer as an opaque byte slice.
243    #[inline(always)]
244    pub fn as_bytes(&self) -> &[u8] {
245        self.bytes.as_ref()
246    }
247
248    /// Returns an iterator over the buffer of elements of type T.
249    pub fn iter(&self) -> Iter<'_, T> {
250        Iter {
251            inner: self.as_slice().iter(),
252        }
253    }
254
255    /// Returns a slice of self for the provided range.
256    ///
257    /// # Panics
258    ///
259    /// Requires that `begin <= end` and `end <= self.len()`.
260    /// Also requires that both `begin` and `end` are aligned to the buffer's required alignment.
261    #[inline(always)]
262    pub fn slice(&self, range: impl RangeBounds<usize>) -> Self {
263        self.slice_with_alignment(range, self.alignment)
264    }
265
266    /// Returns a slice of self for the provided range, with no guarantees about the resulting
267    /// alignment.
268    ///
269    /// # Panics
270    ///
271    /// Requires that `begin <= end` and `end <= self.len()`.
272    #[inline(always)]
273    pub fn slice_unaligned(&self, range: impl RangeBounds<usize>) -> Self {
274        self.slice_with_alignment(range, Alignment::of::<u8>())
275    }
276
277    /// Returns a slice of self for the provided range, ensuring the resulting slice has the
278    /// given alignment.
279    ///
280    /// # Panics
281    ///
282    /// Requires that `begin <= end` and `end <= self.len()`.
283    /// Also requires that both `begin` and `end` are aligned to the given alignment.
284    pub fn slice_with_alignment(
285        &self,
286        range: impl RangeBounds<usize>,
287        alignment: Alignment,
288    ) -> Self {
289        let len = self.len();
290        let begin = match range.start_bound() {
291            Bound::Included(&n) => n,
292            Bound::Excluded(&n) => n.checked_add(1).vortex_expect("out of range"),
293            Bound::Unbounded => 0,
294        };
295        let end = match range.end_bound() {
296            Bound::Included(&n) => n.checked_add(1).vortex_expect("out of range"),
297            Bound::Excluded(&n) => n,
298            Bound::Unbounded => len,
299        };
300
301        if begin > end {
302            vortex_panic!(
303                "range start must not be greater than end: {:?} <= {:?}",
304                begin,
305                end
306            );
307        }
308        if end > len {
309            vortex_panic!("range end out of bounds: {:?} > {:?}", end, len);
310        }
311
312        if end == begin {
313            // We prefer to return a new empty buffer instead of sharing this one and creating a
314            // strong reference just to hold an empty slice.
315            return Self::empty_aligned(alignment);
316        }
317
318        let begin_byte = begin * size_of::<T>();
319        let end_byte = end * size_of::<T>();
320
321        if !begin_byte.is_multiple_of(*alignment) {
322            vortex_panic!("range start must be aligned to {alignment:?}");
323        }
324        if !end_byte.is_multiple_of(*alignment) {
325            vortex_panic!("range end must be aligned to {alignment:?}");
326        }
327        if !alignment.is_aligned_to(Alignment::of::<T>()) {
328            vortex_panic!("Slice alignment must at least align to type T")
329        }
330
331        Self {
332            bytes: self.bytes.slice(begin_byte..end_byte),
333            length: end - begin,
334            alignment,
335            _marker: Default::default(),
336        }
337    }
338
339    /// Returns a slice of self that is equivalent to the given subset.
340    ///
341    /// When processing the buffer you will often end up with `&[T]` that is a subset
342    /// of the underlying buffer. This function turns the slice into a slice of the buffer
343    /// it has been taken from.
344    ///
345    /// # Panics:
346    /// Requires that the given sub slice is in fact contained within the Bytes buffer; otherwise this function will panic.
347    #[inline(always)]
348    pub fn slice_ref(&self, subset: &[T]) -> Self {
349        self.slice_ref_with_alignment(subset, Alignment::of::<T>())
350    }
351
352    /// Returns a slice of self that is equivalent to the given subset.
353    ///
354    /// When processing the buffer you will often end up with `&[T]` that is a subset
355    /// of the underlying buffer. This function turns the slice into a slice of the buffer
356    /// it has been taken from.
357    ///
358    /// # Panics:
359    /// Requires that the given sub slice is in fact contained within the Bytes buffer; otherwise this function will panic.
360    /// Also requires that the given alignment aligns to the type of slice and is smaller or equal to the buffers alignment
361    pub fn slice_ref_with_alignment(&self, subset: &[T], alignment: Alignment) -> Self {
362        if !alignment.is_aligned_to(Alignment::of::<T>()) {
363            vortex_panic!("slice_ref alignment must at least align to type T")
364        }
365
366        if !self.alignment.is_aligned_to(alignment) {
367            vortex_panic!("slice_ref subset alignment must at least align to the buffer alignment")
368        }
369
370        if subset.as_ptr().align_offset(*alignment) != 0 {
371            vortex_panic!("slice_ref subset must be aligned to {:?}", alignment);
372        }
373
374        let subset_u8 =
375            unsafe { std::slice::from_raw_parts(subset.as_ptr().cast(), size_of_val(subset)) };
376
377        Self {
378            bytes: self.bytes.slice_ref(subset_u8),
379            length: subset.len(),
380            alignment,
381            _marker: Default::default(),
382        }
383    }
384
385    /// Returns the underlying aligned buffer.
386    pub fn inner(&self) -> &Bytes {
387        debug_assert_eq!(
388            self.length * size_of::<T>(),
389            self.bytes.len(),
390            "Own length has to be the same as the underlying bytes length"
391        );
392        &self.bytes
393    }
394
395    /// Returns the underlying aligned buffer.
396    pub fn into_inner(self) -> Bytes {
397        debug_assert_eq!(
398            self.length * size_of::<T>(),
399            self.bytes.len(),
400            "Own length has to be the same as the underlying bytes length"
401        );
402        self.bytes
403    }
404
405    /// Return the ByteBuffer for this `Buffer<T>`.
406    pub fn into_byte_buffer(self) -> ByteBuffer {
407        ByteBuffer {
408            bytes: self.bytes,
409            length: self.length * size_of::<T>(),
410            alignment: self.alignment,
411            _marker: Default::default(),
412        }
413    }
414
415    /// Cast a `Buffer<T>` into a `Buffer<U>`.
416    ///
417    /// # Panics
418    ///
419    /// Panics if the type `U` does not have the same size and alignment as `T`.
420    pub fn cast_into<U>(self) -> Buffer<U> {
421        assert_eq!(size_of::<T>(), size_of::<U>(), "Buffer type size mismatch");
422        assert_eq!(
423            align_of::<T>(),
424            align_of::<U>(),
425            "Buffer type alignment mismatch"
426        );
427
428        Buffer {
429            bytes: self.bytes,
430            length: self.length,
431            alignment: self.alignment,
432            _marker: PhantomData,
433        }
434    }
435
436    /// Try to convert self into `BufferMut<T>` if there is only a single strong reference.
437    pub fn try_into_mut(self) -> Result<BufferMut<T>, Self> {
438        self.bytes
439            .try_into_mut()
440            .map(|bytes| BufferMut {
441                bytes,
442                length: self.length,
443                alignment: self.alignment,
444                _marker: Default::default(),
445            })
446            .map_err(|bytes| Self {
447                bytes,
448                length: self.length,
449                alignment: self.alignment,
450                _marker: Default::default(),
451            })
452    }
453
454    /// Convert self into `BufferMut<T>`, cloning the data if there are multiple strong references.
455    pub fn into_mut(self) -> BufferMut<T> {
456        self.try_into_mut()
457            .unwrap_or_else(|buffer| BufferMut::<T>::copy_from(&buffer))
458    }
459
460    /// Returns whether a `Buffer<T>` is aligned to the given alignment.
461    pub fn is_aligned(&self, alignment: Alignment) -> bool {
462        self.bytes.as_ptr().align_offset(*alignment) == 0
463    }
464
465    /// Return a `Buffer<T>` with the given alignment. Where possible, this will be zero-copy.
466    pub fn aligned(mut self, alignment: Alignment) -> Self {
467        if self.as_ptr().align_offset(*alignment) == 0 {
468            self.alignment = alignment;
469            self
470        } else {
471            #[cfg(feature = "warn-copy")]
472            {
473                let bt = std::backtrace::Backtrace::capture();
474                log::warn!(
475                    "Buffer is not aligned to requested alignment {alignment}, copying: {bt}"
476                )
477            }
478            Self::copy_from_aligned(self, alignment)
479        }
480    }
481
482    /// Return a `Buffer<T>` with the given alignment. Panics if the buffer is not aligned.
483    pub fn ensure_aligned(mut self, alignment: Alignment) -> Self {
484        if self.as_ptr().align_offset(*alignment) == 0 {
485            self.alignment = alignment;
486            self
487        } else {
488            vortex_panic!("Buffer is not aligned to requested alignment {}", alignment)
489        }
490    }
491}
492
493/// An iterator over Buffer elements.
494///
495/// This is an analog to the `std::slice::Iter` type.
496pub struct Iter<'a, T> {
497    inner: std::slice::Iter<'a, T>,
498}
499
500impl<'a, T> Iterator for Iter<'a, T> {
501    type Item = &'a T;
502
503    #[inline]
504    fn next(&mut self) -> Option<Self::Item> {
505        self.inner.next()
506    }
507
508    #[inline]
509    fn size_hint(&self) -> (usize, Option<usize>) {
510        self.inner.size_hint()
511    }
512
513    #[inline]
514    fn count(self) -> usize {
515        self.inner.count()
516    }
517
518    #[inline]
519    fn last(self) -> Option<Self::Item> {
520        self.inner.last()
521    }
522
523    #[inline]
524    fn nth(&mut self, n: usize) -> Option<Self::Item> {
525        self.inner.nth(n)
526    }
527}
528
529impl<T> ExactSizeIterator for Iter<'_, T> {
530    #[inline]
531    fn len(&self) -> usize {
532        self.inner.len()
533    }
534}
535
536impl<T: Debug> Debug for Buffer<T> {
537    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
538        f.debug_struct(&format!("Buffer<{}>", type_name::<T>()))
539            .field("length", &self.length)
540            .field("alignment", &self.alignment)
541            .field("as_slice", &TruncatedDebug(self.as_slice()))
542            .finish()
543    }
544}
545
546impl<T> Deref for Buffer<T> {
547    type Target = [T];
548
549    #[inline]
550    fn deref(&self) -> &Self::Target {
551        self.as_slice()
552    }
553}
554
555impl<T> AsRef<[T]> for Buffer<T> {
556    #[inline]
557    fn as_ref(&self) -> &[T] {
558        self.as_slice()
559    }
560}
561
562impl<T> FromIterator<T> for Buffer<T> {
563    #[inline]
564    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
565        BufferMut::from_iter(iter).freeze()
566    }
567}
568
569// Helper struct to allow us to zero-copy any vec into a buffer
570#[repr(transparent)]
571struct Wrapper<T>(Vec<T>);
572
573impl<T> AsRef<[u8]> for Wrapper<T> {
574    fn as_ref(&self) -> &[u8] {
575        let data = self.0.as_ptr().cast::<u8>();
576        let len = self.0.len() * size_of::<T>();
577        unsafe { std::slice::from_raw_parts(data, len) }
578    }
579}
580
581impl<T> From<Vec<T>> for Buffer<T>
582where
583    T: Send + 'static,
584{
585    fn from(value: Vec<T>) -> Self {
586        let original_len = value.len();
587        let wrapped_vec = Wrapper(value);
588
589        let bytes = Bytes::from_owner(wrapped_vec);
590
591        assert_eq!(bytes.as_ptr().align_offset(align_of::<T>()), 0);
592
593        Self {
594            bytes,
595            length: original_len,
596            alignment: Alignment::of::<T>(),
597            _marker: PhantomData,
598        }
599    }
600}
601
602impl From<Bytes> for ByteBuffer {
603    fn from(bytes: Bytes) -> Self {
604        let length = bytes.len();
605        Self {
606            bytes,
607            length,
608            alignment: Alignment::of::<u8>(),
609            _marker: Default::default(),
610        }
611    }
612}
613
614impl Buf for ByteBuffer {
615    #[inline]
616    fn remaining(&self) -> usize {
617        self.len()
618    }
619
620    #[inline]
621    fn chunk(&self) -> &[u8] {
622        self.as_slice()
623    }
624
625    #[inline]
626    fn advance(&mut self, cnt: usize) {
627        if !cnt.is_multiple_of(*self.alignment) {
628            vortex_panic!(
629                "Cannot advance buffer by {} items, resulting alignment is not {}",
630                cnt,
631                self.alignment
632            );
633        }
634        self.bytes.advance(cnt);
635        self.length -= cnt;
636    }
637}
638
639/// Owned iterator over a [`Buffer`].
640pub struct BufferIterator<T> {
641    buffer: Buffer<T>,
642    index: usize,
643}
644
645impl<T: Copy> Iterator for BufferIterator<T> {
646    type Item = T;
647
648    #[inline]
649    fn next(&mut self) -> Option<Self::Item> {
650        (self.index < self.buffer.len()).then(move || {
651            let value = self.buffer[self.index];
652            self.index += 1;
653            value
654        })
655    }
656
657    #[inline]
658    fn size_hint(&self) -> (usize, Option<usize>) {
659        let remaining = self.buffer.len() - self.index;
660        (remaining, Some(remaining))
661    }
662}
663
664impl<T: Copy> IntoIterator for Buffer<T> {
665    type Item = T;
666    type IntoIter = BufferIterator<T>;
667
668    #[inline]
669    fn into_iter(self) -> Self::IntoIter {
670        BufferIterator {
671            buffer: self,
672            index: 0,
673        }
674    }
675}
676
677impl<T> From<BufferMut<T>> for Buffer<T> {
678    #[inline]
679    fn from(value: BufferMut<T>) -> Self {
680        value.freeze()
681    }
682}
683
684#[cfg(test)]
685mod test {
686    use bytes::Buf;
687
688    use crate::Alignment;
689    use crate::Buffer;
690    use crate::ByteBuffer;
691    use crate::buffer;
692
693    #[test]
694    fn align() {
695        let buf = buffer![0u8, 1, 2];
696        let aligned = buf.aligned(Alignment::new(32));
697        assert_eq!(aligned.alignment(), Alignment::new(32));
698        assert_eq!(aligned.as_slice(), &[0, 1, 2]);
699    }
700
701    #[test]
702    fn slice() {
703        let buf = buffer![0, 1, 2, 3, 4];
704        assert_eq!(buf.slice(1..3).as_slice(), &[1, 2]);
705        assert_eq!(buf.slice(1..=3).as_slice(), &[1, 2, 3]);
706    }
707
708    #[test]
709    fn slice_unaligned() {
710        let buf = buffer![0i32, 1, 2, 3, 4].into_byte_buffer();
711        // With a regular slice, this would panic. See [`slice_bad_alignment`].
712        let sliced = buf.slice_unaligned(1..2);
713        // Verify the slice has the expected length (1 byte from index 1 to 2).
714        assert_eq!(sliced.len(), 1);
715        // The original buffer has i32 values [0, 1, 2, 3, 4].
716        // In little-endian bytes, 0i32 = [0, 0, 0, 0], so byte at index 1 is 0.
717        assert_eq!(sliced.as_slice(), &[0]);
718    }
719
720    #[test]
721    #[should_panic]
722    fn slice_bad_alignment() {
723        let buf = buffer![0i32, 1, 2, 3, 4].into_byte_buffer();
724        // We should only be able to slice this buffer on 4-byte (i32) boundaries.
725        buf.slice(1..2);
726    }
727
728    #[test]
729    fn bytes_buf() {
730        let mut buf = ByteBuffer::copy_from("helloworld".as_bytes());
731        assert_eq!(buf.remaining(), 10);
732        assert_eq!(buf.chunk(), b"helloworld");
733
734        Buf::advance(&mut buf, 5);
735        assert_eq!(buf.remaining(), 5);
736        assert_eq!(buf.as_slice(), b"world");
737        assert_eq!(buf.chunk(), b"world");
738    }
739
740    #[test]
741    fn from_vec() {
742        let vec = vec![1, 2, 3, 4, 5];
743        let buff = Buffer::from(vec.clone());
744        assert!(buff.is_aligned(Alignment::of::<i32>()));
745        assert_eq!(vec, buff);
746    }
747}