1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
use arrow2::buffer::Buffer;
/// Convenience-wrapper around an arrow [`Buffer`] that is known to contain a
/// a primitive type.
///
/// The arrow2 [`Buffer`] object is internally reference-counted and can be
/// easily converted back to a `&[T]` referencing the underlying storage.
/// This avoids some of the lifetime complexities that would otherwise
/// arise from returning a `&[T]` directly, but is significantly more
/// performant than doing the full allocation necessary to return a `Vec<T>`.
#[derive(Clone, Debug, Default, PartialEq)]
pub struct ArrowBuffer<T>(pub Buffer<T>);
impl<T: crate::SizeBytes> crate::SizeBytes for ArrowBuffer<T> {
#[inline]
fn heap_size_bytes(&self) -> u64 {
let Self(buf) = self;
std::mem::size_of_val(buf.as_slice()) as _
}
}
impl<T> ArrowBuffer<T> {
/// The number of instances of T stored in this buffer.
#[inline]
pub fn num_instances(&self) -> usize {
// WARNING: If you are touching this code, make sure you know what len() actually does.
//
// There is ambiguity in how arrow2 and arrow-rs talk about buffer lengths, including
// some incorrect documentation: https://github.com/jorgecarleitao/arrow2/issues/1430
//
// Arrow2 `Buffer<T>` is typed and `len()` is the number of units of `T`, but the documentation
// is currently incorrect.
// Arrow-rs `Buffer` is untyped and len() is in bytes, but `ScalarBuffer`s are in units of T.
self.0.len()
}
/// The number of bytes stored in this buffer
#[inline]
pub fn size_in_bytes(&self) -> usize {
self.0.len() * std::mem::size_of::<T>()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
#[inline]
pub fn as_slice(&self) -> &[T] {
self.0.as_slice()
}
#[inline]
pub fn into_inner(self) -> Buffer<T> {
self.0
}
/// Returns a new [`Buffer`] that is a slice of this buffer starting at `offset`.
///
/// Doing so allows the same memory region to be shared between buffers.
///
/// # Panics
/// Panics iff `offset + length` is larger than `len`.
#[inline]
pub fn sliced(self, range: std::ops::Range<usize>) -> Self {
Self(self.0.sliced(range.start, range.len()))
}
}
impl<T: bytemuck::Pod> ArrowBuffer<T> {
/// Cast POD (plain-old-data) types to another POD type.
///
/// For instance: cast a buffer of `u8` to a buffer of `f32`.
#[inline]
pub fn cast_pod<Target: bytemuck::Pod>(
&self,
) -> Result<ArrowBuffer<Target>, bytemuck::PodCastError> {
// TODO(emilk): when we switch from arrow2, see if we can make this function zero-copy
re_tracing::profile_function!();
let target_slice: &[Target] = bytemuck::try_cast_slice(self.as_slice())?;
Ok(ArrowBuffer::from(target_slice.to_vec()))
}
/// Cast POD (plain-old-data) types to `u8`.
#[inline]
pub fn cast_to_u8(&self) -> ArrowBuffer<u8> {
match self.cast_pod() {
Ok(buf) => buf,
Err(_) => unreachable!("We can always cast POD types to u8"),
}
}
}
impl<T: Eq> Eq for ArrowBuffer<T> {}
impl<T: Clone> ArrowBuffer<T> {
#[inline]
pub fn to_vec(&self) -> Vec<T> {
self.0.as_slice().to_vec()
}
}
impl<T> From<Buffer<T>> for ArrowBuffer<T> {
#[inline]
fn from(value: Buffer<T>) -> Self {
Self(value)
}
}
impl<T> From<Vec<T>> for ArrowBuffer<T> {
#[inline]
fn from(value: Vec<T>) -> Self {
Self(value.into())
}
}
impl<T: Clone> From<&[T]> for ArrowBuffer<T> {
#[inline]
fn from(value: &[T]) -> Self {
Self(value.iter().cloned().collect()) // TODO(emilk): avoid extra clones
}
}
impl<T> FromIterator<T> for ArrowBuffer<T> {
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
Self(Buffer::from_iter(iter))
}
}
impl<T> std::ops::Deref for ArrowBuffer<T> {
type Target = [T];
#[inline]
fn deref(&self) -> &[T] {
self.0.as_slice()
}
}