Skip to main content

vortex_array/arrays/varbinview/
view.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! The 16-byte view struct stored in variable-length binary vectors.
5
6use std::fmt;
7use std::hash::Hash;
8use std::hash::Hasher;
9use std::ops::Range;
10
11use static_assertions::assert_eq_align;
12use static_assertions::assert_eq_size;
13use vortex_error::VortexExpect;
14
15/// A view over a variable-length binary value.
16///
17/// Either an inlined representation (for values <= 12 bytes) or a reference
18/// to an external buffer (for values > 12 bytes).
19#[derive(Clone, Copy)]
20#[repr(C, align(16))]
21pub union BinaryView {
22    /// Numeric representation. This is logically `u128`, but we split it into the high and low
23    /// bits to preserve the alignment.
24    pub(crate) le_bytes: [u8; 16],
25
26    /// Inlined representation: strings <= 12 bytes
27    pub(crate) inlined: Inlined,
28
29    /// Reference type: strings > 12 bytes.
30    pub(crate) _ref: Ref,
31}
32
33assert_eq_align!(BinaryView, u128);
34assert_eq_size!(BinaryView, [u8; 16]);
35assert_eq_size!(Inlined, [u8; 16]);
36assert_eq_size!(Ref, [u8; 16]);
37
38/// Variant of a [`BinaryView`] that holds an inlined value.
39#[derive(Clone, Copy, Debug, PartialEq, Eq)]
40#[repr(C, align(8))]
41pub struct Inlined {
42    /// The size of the full value.
43    pub size: u32,
44    /// The full inlined value.
45    pub data: [u8; BinaryView::MAX_INLINED_SIZE],
46}
47
48impl Inlined {
49    /// Creates a new inlined representation from the provided value of constant size.
50    #[inline]
51    fn new<const N: usize>(value: &[u8]) -> Self {
52        debug_assert_eq!(value.len(), N);
53        let mut inlined = Self {
54            size: N.try_into().vortex_expect("inlined size must fit in u32"),
55            data: [0u8; BinaryView::MAX_INLINED_SIZE],
56        };
57        inlined.data[..N].copy_from_slice(&value[..N]);
58        inlined
59    }
60
61    /// Returns the full inlined value.
62    #[inline]
63    pub fn value(&self) -> &[u8] {
64        &self.data[0..(self.size as usize)]
65    }
66}
67
68/// Variant of a [`BinaryView`] that holds a reference to an external buffer.
69#[derive(Clone, Copy, Debug)]
70#[repr(C, align(8))]
71pub struct Ref {
72    /// The size of the full value.
73    pub size: u32,
74    /// The prefix bytes of the value (first 4 bytes).
75    pub prefix: [u8; 4],
76    /// The index of the buffer where the full value is stored.
77    pub buffer_index: u32,
78    /// The offset within the buffer where the full value starts.
79    pub offset: u32,
80}
81
82impl Ref {
83    /// Returns the range within the buffer where the full value is stored.
84    #[inline]
85    pub fn as_range(&self) -> Range<usize> {
86        self.offset as usize..(self.offset + self.size) as usize
87    }
88
89    /// Replaces the buffer index and offset of the reference, returning a new `Ref`.
90    #[inline]
91    pub fn with_buffer_and_offset(&self, buffer_index: u32, offset: u32) -> Ref {
92        Self {
93            size: self.size,
94            prefix: self.prefix,
95            buffer_index,
96            offset,
97        }
98    }
99}
100
101impl BinaryView {
102    /// Maximum size of an inlined binary value.
103    pub const MAX_INLINED_SIZE: usize = 12;
104
105    /// Create a view from a value, block and offset
106    ///
107    /// Depending on the length of the provided value either a new inlined
108    /// or a reference view will be constructed.
109    ///
110    /// Adapted from arrow-rs <https://github.com/apache/arrow-rs/blob/f4fde769ab6e1a9b75f890b7f8b47bc22800830b/arrow-array/src/builder/generic_bytes_view_builder.rs#L524>
111    /// Explicitly enumerating inlined view produces code that avoids calling generic `ptr::copy_non_interleave` that's slower than explicit stores
112    #[inline(never)]
113    pub fn make_view(value: &[u8], block: u32, offset: u32) -> Self {
114        match value.len() {
115            0 => Self {
116                inlined: Inlined::new::<0>(value),
117            },
118            1 => Self {
119                inlined: Inlined::new::<1>(value),
120            },
121            2 => Self {
122                inlined: Inlined::new::<2>(value),
123            },
124            3 => Self {
125                inlined: Inlined::new::<3>(value),
126            },
127            4 => Self {
128                inlined: Inlined::new::<4>(value),
129            },
130            5 => Self {
131                inlined: Inlined::new::<5>(value),
132            },
133            6 => Self {
134                inlined: Inlined::new::<6>(value),
135            },
136            7 => Self {
137                inlined: Inlined::new::<7>(value),
138            },
139            8 => Self {
140                inlined: Inlined::new::<8>(value),
141            },
142            9 => Self {
143                inlined: Inlined::new::<9>(value),
144            },
145            10 => Self {
146                inlined: Inlined::new::<10>(value),
147            },
148            11 => Self {
149                inlined: Inlined::new::<11>(value),
150            },
151            12 => Self {
152                inlined: Inlined::new::<12>(value),
153            },
154            _ => Self {
155                _ref: Ref {
156                    size: u32::try_from(value.len()).vortex_expect("value length must fit in u32"),
157                    prefix: value[0..4]
158                        .try_into()
159                        .ok()
160                        .vortex_expect("prefix must be exactly 4 bytes"),
161                    buffer_index: block,
162                    offset,
163                },
164            },
165        }
166    }
167
168    /// Create a new empty view
169    #[inline]
170    pub fn empty_view() -> Self {
171        Self { le_bytes: [0; 16] }
172    }
173
174    /// Create a new inlined binary view
175    ///
176    /// # Panics
177    ///
178    /// Panics if the provided string is too long to inline.
179    #[inline]
180    pub fn new_inlined(value: &[u8]) -> Self {
181        assert!(
182            value.len() <= Self::MAX_INLINED_SIZE,
183            "expected inlined value to be <= 12 bytes, was {}",
184            value.len()
185        );
186
187        Self::make_view(value, 0, 0)
188    }
189
190    /// Returns the length of the binary value.
191    #[inline]
192    pub fn len(&self) -> u32 {
193        unsafe { self.inlined.size }
194    }
195
196    /// Returns true if the binary value is empty.
197    #[inline]
198    pub fn is_empty(&self) -> bool {
199        self.len() == 0
200    }
201
202    /// Returns true if the binary value is inlined.
203    #[inline]
204    #[expect(
205        clippy::cast_possible_truncation,
206        reason = "MAX_INLINED_SIZE is a small constant"
207    )]
208    pub fn is_inlined(&self) -> bool {
209        self.len() <= (Self::MAX_INLINED_SIZE as u32)
210    }
211
212    /// Returns the inlined representation of the binary value.
213    pub fn as_inlined(&self) -> &Inlined {
214        debug_assert!(self.is_inlined());
215        unsafe { &self.inlined }
216    }
217
218    /// Returns the reference representation of the binary value.
219    pub fn as_view(&self) -> &Ref {
220        debug_assert!(!self.is_inlined());
221        unsafe { &self._ref }
222    }
223
224    /// Returns a mutable reference to the reference representation of the binary value.
225    pub fn as_view_mut(&mut self) -> &mut Ref {
226        unsafe { &mut self._ref }
227    }
228
229    /// Returns the binary view as u128 representation.
230    pub fn as_u128(&self) -> u128 {
231        // SAFETY: binary view always safe to read as u128 LE bytes
232        unsafe { u128::from_le_bytes(self.le_bytes) }
233    }
234}
235
236impl From<u128> for BinaryView {
237    fn from(value: u128) -> Self {
238        BinaryView {
239            le_bytes: value.to_le_bytes(),
240        }
241    }
242}
243
244impl From<Ref> for BinaryView {
245    fn from(value: Ref) -> Self {
246        BinaryView { _ref: value }
247    }
248}
249
250impl PartialEq for BinaryView {
251    fn eq(&self, other: &Self) -> bool {
252        let a = unsafe { std::mem::transmute::<&BinaryView, &u128>(self) };
253        let b = unsafe { std::mem::transmute::<&BinaryView, &u128>(other) };
254        a == b
255    }
256}
257impl Eq for BinaryView {}
258
259impl Hash for BinaryView {
260    fn hash<H: Hasher>(&self, state: &mut H) {
261        unsafe { std::mem::transmute::<&BinaryView, &u128>(self) }.hash(state);
262    }
263}
264
265impl Default for BinaryView {
266    fn default() -> Self {
267        Self::make_view(&[], 0, 0)
268    }
269}
270
271impl fmt::Debug for BinaryView {
272    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
273        let mut s = f.debug_struct("BinaryView");
274        if self.is_inlined() {
275            s.field("inline", &self.as_inlined());
276        } else {
277            s.field("ref", &self.as_view());
278        }
279        s.finish()
280    }
281}