vortex_array/arrays/varbinview/
binary_view.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt;
5use std::hash::{Hash, Hasher};
6use std::ops::Range;
7
8use static_assertions::{assert_eq_align, assert_eq_size};
9use vortex_error::VortexUnwrap;
10
11#[derive(Clone, Copy, Debug, PartialEq, Eq)]
12#[repr(C, align(8))]
13pub struct Inlined {
14    pub(super) size: u32,
15    pub(super) data: [u8; BinaryView::MAX_INLINED_SIZE],
16}
17
18impl Inlined {
19    fn new<const N: usize>(value: &[u8]) -> Self {
20        let mut inlined = Self {
21            size: N.try_into().vortex_unwrap(),
22            data: [0u8; BinaryView::MAX_INLINED_SIZE],
23        };
24        inlined.data[..N].copy_from_slice(&value[..N]);
25        inlined
26    }
27
28    #[inline]
29    pub fn value(&self) -> &[u8] {
30        &self.data[0..(self.size as usize)]
31    }
32}
33
34#[derive(Clone, Copy, Debug)]
35#[repr(C, align(8))]
36pub struct Ref {
37    pub(super) size: u32,
38    pub(super) prefix: [u8; 4],
39    pub(super) buffer_index: u32,
40    pub(super) offset: u32,
41}
42
43impl Ref {
44    pub fn new(size: u32, prefix: [u8; 4], buffer_index: u32, offset: u32) -> Self {
45        Self {
46            size,
47            prefix,
48            buffer_index,
49            offset,
50        }
51    }
52
53    #[inline]
54    pub fn size(&self) -> u32 {
55        self.size
56    }
57
58    #[inline]
59    pub fn buffer_index(&self) -> u32 {
60        self.buffer_index
61    }
62
63    #[inline]
64    pub fn offset(&self) -> u32 {
65        self.offset
66    }
67
68    #[inline]
69    pub fn prefix(&self) -> &[u8; 4] {
70        &self.prefix
71    }
72
73    #[inline]
74    pub fn as_range(&self) -> Range<usize> {
75        self.offset as usize..(self.offset + self.size) as usize
76    }
77
78    #[inline]
79    pub fn with_buffer_and_offset(&self, buffer_index: u32, offset: u32) -> Ref {
80        Self::new(self.size, self.prefix, buffer_index, offset)
81    }
82}
83
84#[derive(Clone, Copy)]
85#[repr(C, align(16))]
86pub union BinaryView {
87    // Numeric representation. This is logically `u128`, but we split it into the high and low
88    // bits to preserve the alignment.
89    pub(super) le_bytes: [u8; 16],
90
91    // Inlined representation: strings <= 12 bytes
92    pub(super) inlined: Inlined,
93
94    // Reference type: strings > 12 bytes.
95    pub(super) _ref: Ref,
96}
97
98assert_eq_size!(BinaryView, [u8; 16]);
99assert_eq_size!(Inlined, [u8; 16]);
100assert_eq_size!(Ref, [u8; 16]);
101assert_eq_align!(BinaryView, u128);
102
103impl Hash for BinaryView {
104    fn hash<H: Hasher>(&self, state: &mut H) {
105        unsafe { std::mem::transmute::<&BinaryView, &[u8; 16]>(self) }.hash(state);
106    }
107}
108
109impl Default for BinaryView {
110    fn default() -> Self {
111        Self::make_view(&[], 0, 0)
112    }
113}
114
115impl BinaryView {
116    pub const MAX_INLINED_SIZE: usize = 12;
117
118    /// Create a view from a value, block and offset
119    ///
120    /// Depending on the length of the provided value either a new inlined
121    /// or a reference view will be constructed.
122    ///
123    /// Adapted from arrow-rs <https://github.com/apache/arrow-rs/blob/f4fde769ab6e1a9b75f890b7f8b47bc22800830b/arrow-array/src/builder/generic_bytes_view_builder.rs#L524>
124    /// Explicitly enumerating inlined view produces code that avoids calling generic `ptr::copy_non_interleave` that's slower than explicit stores
125    #[inline(never)]
126    pub fn make_view(value: &[u8], block: u32, offset: u32) -> Self {
127        match value.len() {
128            0 => Self {
129                inlined: Inlined::new::<0>(value),
130            },
131            1 => Self {
132                inlined: Inlined::new::<1>(value),
133            },
134            2 => Self {
135                inlined: Inlined::new::<2>(value),
136            },
137            3 => Self {
138                inlined: Inlined::new::<3>(value),
139            },
140            4 => Self {
141                inlined: Inlined::new::<4>(value),
142            },
143            5 => Self {
144                inlined: Inlined::new::<5>(value),
145            },
146            6 => Self {
147                inlined: Inlined::new::<6>(value),
148            },
149            7 => Self {
150                inlined: Inlined::new::<7>(value),
151            },
152            8 => Self {
153                inlined: Inlined::new::<8>(value),
154            },
155            9 => Self {
156                inlined: Inlined::new::<9>(value),
157            },
158            10 => Self {
159                inlined: Inlined::new::<10>(value),
160            },
161            11 => Self {
162                inlined: Inlined::new::<11>(value),
163            },
164            12 => Self {
165                inlined: Inlined::new::<12>(value),
166            },
167            _ => Self {
168                _ref: Ref::new(
169                    u32::try_from(value.len()).vortex_unwrap(),
170                    value[0..4].try_into().vortex_unwrap(),
171                    block,
172                    offset,
173                ),
174            },
175        }
176    }
177
178    /// Create a new empty view
179    #[inline]
180    pub fn empty_view() -> Self {
181        Self::new_inlined(&[])
182    }
183
184    /// Create a new inlined binary view
185    #[inline]
186    pub fn new_inlined(value: &[u8]) -> Self {
187        assert!(
188            value.len() <= Self::MAX_INLINED_SIZE,
189            "expected inlined value to be <= 12 bytes, was {}",
190            value.len()
191        );
192
193        Self::make_view(value, 0, 0)
194    }
195
196    #[inline]
197    pub fn len(&self) -> u32 {
198        unsafe { self.inlined.size }
199    }
200
201    #[inline]
202    pub fn is_empty(&self) -> bool {
203        self.len() == 0
204    }
205
206    #[inline]
207    #[allow(clippy::cast_possible_truncation)]
208    pub fn is_inlined(&self) -> bool {
209        self.len() <= (Self::MAX_INLINED_SIZE as u32)
210    }
211
212    pub fn as_inlined(&self) -> &Inlined {
213        unsafe { &self.inlined }
214    }
215
216    pub fn as_view(&self) -> &Ref {
217        unsafe { &self._ref }
218    }
219
220    pub fn as_u128(&self) -> u128 {
221        // SAFETY: binary view always safe to read as u128 LE bytes
222        unsafe { u128::from_le_bytes(self.le_bytes) }
223    }
224}
225
226impl From<u128> for BinaryView {
227    fn from(value: u128) -> Self {
228        BinaryView {
229            le_bytes: value.to_le_bytes(),
230        }
231    }
232}
233
234impl From<Ref> for BinaryView {
235    fn from(value: Ref) -> Self {
236        BinaryView { _ref: value }
237    }
238}
239
240impl fmt::Debug for BinaryView {
241    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
242        let mut s = f.debug_struct("BinaryView");
243        if self.is_inlined() {
244            s.field("inline", &self.as_inlined());
245        } else {
246            s.field("ref", &self.as_view());
247        }
248        s.finish()
249    }
250}