1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
use arrow2::buffer::Buffer;

/// Convenience-wrapper around an arrow [`Buffer`] that is known to contain a
/// a primitive type.
///
/// The arrow2 [`Buffer`] object is internally reference-counted and can be
/// easily converted back to a `&[T]` referencing the underlying storage.
/// This avoids some of the lifetime complexities that would otherwise
/// arise from returning a `&[T]` directly, but is significantly more
/// performant than doing the full allocation necessary to return a `Vec<T>`.
#[derive(Clone, Debug, Default, PartialEq)]
pub struct ArrowBuffer<T>(pub Buffer<T>);

impl<T: crate::SizeBytes> crate::SizeBytes for ArrowBuffer<T> {
    #[inline]
    fn heap_size_bytes(&self) -> u64 {
        let Self(buf) = self;
        std::mem::size_of_val(buf.as_slice()) as _
    }
}

impl<T> ArrowBuffer<T> {
    /// The number of instances of T stored in this buffer.
    #[inline]
    pub fn num_instances(&self) -> usize {
        // WARNING: If you are touching this code, make sure you know what len() actually does.
        //
        // There is ambiguity in how arrow2 and arrow-rs talk about buffer lengths, including
        // some incorrect documentation: https://github.com/jorgecarleitao/arrow2/issues/1430
        //
        // Arrow2 `Buffer<T>` is typed and `len()` is the number of units of `T`, but the documentation
        // is currently incorrect.
        // Arrow-rs `Buffer` is untyped and len() is in bytes, but `ScalarBuffer`s are in units of T.
        self.0.len()
    }

    /// The number of bytes stored in this buffer
    #[inline]
    pub fn size_in_bytes(&self) -> usize {
        self.0.len() * std::mem::size_of::<T>()
    }

    #[inline]
    pub fn is_empty(&self) -> bool {
        self.0.is_empty()
    }

    #[inline]
    pub fn as_slice(&self) -> &[T] {
        self.0.as_slice()
    }

    #[inline]
    pub fn into_inner(self) -> Buffer<T> {
        self.0
    }

    /// Returns a new [`Buffer`] that is a slice of this buffer starting at `offset`.
    ///
    /// Doing so allows the same memory region to be shared between buffers.
    ///
    /// # Panics
    /// Panics iff `offset + length` is larger than `len`.
    #[inline]
    pub fn sliced(self, range: std::ops::Range<usize>) -> Self {
        Self(self.0.sliced(range.start, range.len()))
    }
}

impl<T: bytemuck::Pod> ArrowBuffer<T> {
    /// Cast POD (plain-old-data) types to another POD type.
    ///
    /// For instance: cast a buffer of `u8` to a buffer of `f32`.
    #[inline]
    pub fn cast_pod<Target: bytemuck::Pod>(
        &self,
    ) -> Result<ArrowBuffer<Target>, bytemuck::PodCastError> {
        // TODO(emilk): when we switch from arrow2, see if we can make this function zero-copy
        re_tracing::profile_function!();
        let target_slice: &[Target] = bytemuck::try_cast_slice(self.as_slice())?;
        Ok(ArrowBuffer::from(target_slice.to_vec()))
    }

    /// Cast POD (plain-old-data) types to `u8`.
    #[inline]
    pub fn cast_to_u8(&self) -> ArrowBuffer<u8> {
        match self.cast_pod() {
            Ok(buf) => buf,
            Err(_) => unreachable!("We can always cast POD types to u8"),
        }
    }
}

impl<T: Eq> Eq for ArrowBuffer<T> {}

impl<T: Clone> ArrowBuffer<T> {
    #[inline]
    pub fn to_vec(&self) -> Vec<T> {
        self.0.as_slice().to_vec()
    }
}

impl<T> From<Buffer<T>> for ArrowBuffer<T> {
    #[inline]
    fn from(value: Buffer<T>) -> Self {
        Self(value)
    }
}

impl<T> From<Vec<T>> for ArrowBuffer<T> {
    #[inline]
    fn from(value: Vec<T>) -> Self {
        Self(value.into())
    }
}

impl<T: Clone> From<&[T]> for ArrowBuffer<T> {
    #[inline]
    fn from(value: &[T]) -> Self {
        Self(value.iter().cloned().collect()) // TODO(emilk): avoid extra clones
    }
}

impl<T> FromIterator<T> for ArrowBuffer<T> {
    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
        Self(Buffer::from_iter(iter))
    }
}

impl<T> std::ops::Deref for ArrowBuffer<T> {
    type Target = [T];

    #[inline]
    fn deref(&self) -> &[T] {
        self.0.as_slice()
    }
}